From b7cc8bf2850c5650a9e47416af8bd488f9be9161 Mon Sep 17 00:00:00 2001 From: Derrick Brashear Date: Thu, 1 Oct 2009 16:09:17 -0400 Subject: [PATCH] create LINUX24 directory shadow src/afs/LINUX into .../LINUX24, removing 2.6 code from it same for rx/LINUX cheat and simply modify MKAFS_OSTYPE in MakefileProto.LINUX.in Reviewed-on: http://gerrit.openafs.org/565 Reviewed-by: Matt Benjamin Reviewed-by: Derrick Brashear Tested-by: Derrick Brashear --- src/afs/LINUX24/osi_alloc.c | 422 +++++++ src/afs/LINUX24/osi_cred.c | 109 ++ src/afs/LINUX24/osi_file.c | 619 +++++++++ src/afs/LINUX24/osi_flush.s | 41 + src/afs/LINUX24/osi_groups.c | 338 +++++ src/afs/LINUX24/osi_inode.c | 45 + src/afs/LINUX24/osi_inode.h | 0 src/afs/LINUX24/osi_ioctl.c | 134 ++ src/afs/LINUX24/osi_machdep.h | 334 +++++ src/afs/LINUX24/osi_misc.c | 144 +++ src/afs/LINUX24/osi_module.c | 197 +++ src/afs/LINUX24/osi_pag_module.c | 132 ++ src/afs/LINUX24/osi_probe.c | 1241 ++++++++++++++++++ src/afs/LINUX24/osi_proc.c | 369 ++++++ src/afs/LINUX24/osi_prototypes.h | 101 ++ src/afs/LINUX24/osi_sleep.c | 303 +++++ src/afs/LINUX24/osi_syscall.c | 458 +++++++ src/afs/LINUX24/osi_sysctl.c | 246 ++++ src/afs/LINUX24/osi_vfs.hin | 86 ++ src/afs/LINUX24/osi_vfsops.c | 483 +++++++ src/afs/LINUX24/osi_vm.c | 156 +++ src/afs/LINUX24/osi_vnodeops.c | 2507 +++++++++++++++++++++++++++++++++++++ src/libafs/MakefileProto.LINUX.in | 8 + src/rx/LINUX24/rx_kmutex.c | 164 +++ src/rx/LINUX24/rx_kmutex.h | 94 ++ src/rx/LINUX24/rx_knet.c | 308 +++++ 26 files changed, 9039 insertions(+) create mode 100644 src/afs/LINUX24/osi_alloc.c create mode 100644 src/afs/LINUX24/osi_cred.c create mode 100644 src/afs/LINUX24/osi_file.c create mode 100644 src/afs/LINUX24/osi_flush.s create mode 100644 src/afs/LINUX24/osi_groups.c create mode 100644 src/afs/LINUX24/osi_inode.c create mode 100644 src/afs/LINUX24/osi_inode.h create mode 100644 src/afs/LINUX24/osi_ioctl.c create mode 100644 src/afs/LINUX24/osi_machdep.h create mode 100644 src/afs/LINUX24/osi_misc.c create mode 100644 src/afs/LINUX24/osi_module.c create mode 100644 src/afs/LINUX24/osi_pag_module.c create mode 100644 src/afs/LINUX24/osi_probe.c create mode 100644 src/afs/LINUX24/osi_proc.c create mode 100644 src/afs/LINUX24/osi_prototypes.h create mode 100644 src/afs/LINUX24/osi_sleep.c create mode 100644 src/afs/LINUX24/osi_syscall.c create mode 100644 src/afs/LINUX24/osi_sysctl.c create mode 100644 src/afs/LINUX24/osi_vfs.hin create mode 100644 src/afs/LINUX24/osi_vfsops.c create mode 100644 src/afs/LINUX24/osi_vm.c create mode 100644 src/afs/LINUX24/osi_vnodeops.c create mode 100644 src/rx/LINUX24/rx_kmutex.c create mode 100644 src/rx/LINUX24/rx_kmutex.h create mode 100644 src/rx/LINUX24/rx_knet.c diff --git a/src/afs/LINUX24/osi_alloc.c b/src/afs/LINUX24/osi_alloc.c new file mode 100644 index 0000000..30a69a5 --- /dev/null +++ b/src/afs/LINUX24/osi_alloc.c @@ -0,0 +1,422 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * osi_alloc.c - Linux memory allocation routines. + * + */ +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "h/mm.h" +#include "h/slab.h" + +#include "afs_atomlist.h" +#include "afs_lhash.h" + +#define MAX_KMALLOC_SIZE PAGE_SIZE /* Max we should alloc with kmalloc */ +#define MAX_BUCKET_LEN 30 /* max. no. of entries per buckets we expect to see */ +#define STAT_INTERVAL 8192 /* we collect stats once every STAT_INTERVAL allocs */ + +/* types of alloc */ +#define KM_TYPE 1 /* kmalloc */ +#define VM_TYPE 2 /* vmalloc */ + +struct osi_linux_mem { + void *chunk; +}; + +/* These assume 32-bit pointers */ +#define MEMTYPE(A) (((unsigned long)A) & 0x3) +#define MEMADDR(A) (void *)((unsigned long)(A) & (~0x3)) + +/* globals */ +afs_atomlist *al_mem_pool; /* pool of osi_linux_mem structures */ +afs_lhash *lh_mem_htab; /* mem hash table */ +unsigned int allocator_init = 0; /* has the allocator been initialized? */ +unsigned int afs_linux_cur_allocs = 0; +unsigned int afs_linux_total_allocs = 0; +unsigned int afs_linux_hash_verify_count = 0; /* used by hash_verify */ +struct afs_lhash_stat afs_linux_lsb; /* hash table statistics */ +unsigned int afs_linux_hash_bucket_dist[MAX_BUCKET_LEN]; /* bucket population distribution in our hash table */ + +#if defined(AFS_LINUX24_ENV) +#include "h/vmalloc.h" +#else +/* externs : can we do this in a better way. Including vmalloc.h causes other + * problems.*/ +extern void vfree(void *addr); +extern void *vmalloc(unsigned long size); +#endif + +/* Allocator support functions (static) */ + +static int +hash_equal(const void *a, const void *b) +{ + return (MEMADDR(((struct osi_linux_mem *)a)->chunk) == + MEMADDR(((struct osi_linux_mem *)b)->chunk)); + +} + +/* linux_alloc : Allocates memory from the linux kernel. It uses + * kmalloc if possible. Otherwise, we use vmalloc. + * Input: + * asize - size of memory required in bytes + * Return Values: + * returns NULL if we failed to allocate memory. + * or pointer to memory if we succeeded. + */ +static void * +linux_alloc(unsigned int asize, int drop_glock) +{ + void *new = NULL; + int max_retry = 10; + int haveGlock = ISAFS_GLOCK(); + + /* if we can use kmalloc use it to allocate the required memory. */ + while (!new && max_retry) { + if (asize <= MAX_KMALLOC_SIZE) { + new = (void *)(unsigned long)kmalloc(asize, +#ifdef GFP_NOFS + GFP_NOFS +#else + GFP_KERNEL +#endif + ); + if (new) /* piggy back alloc type */ + new = (void *)(KM_TYPE | (unsigned long)new); + } else { + osi_Assert(drop_glock || !haveGlock); + if (drop_glock && haveGlock) + AFS_GUNLOCK(); + new = (void *)vmalloc(asize); + if (drop_glock && haveGlock) + AFS_GLOCK(); + if (new) /* piggy back alloc type */ + new = (void *)(VM_TYPE | (unsigned long)new); + } + + if (!new) { +#ifdef set_current_state + set_current_state(TASK_INTERRUPTIBLE); +#else + current->state = TASK_INTERRUPTIBLE; +#endif + if (drop_glock && haveGlock) + AFS_GUNLOCK(); + schedule_timeout(HZ); + if (drop_glock && haveGlock) + AFS_GLOCK(); +#ifdef set_current_state + set_current_state(TASK_RUNNING); +#else + current->state = TASK_RUNNING; +#endif + --max_retry; + } + } + if (new) + memset(MEMADDR(new), 0, asize); + + return new; +} + +static void +linux_free(void *p) +{ + + /* mask out the type information from the pointer and + * use the appropriate free routine to free the chunk. + */ + switch (MEMTYPE(p)) { + case KM_TYPE: + kfree(MEMADDR(p)); + break; + case VM_TYPE: + vfree(MEMADDR(p)); + break; + default: + printf("afs_osi_Free: Asked to free unknown type %d at 0x%lx\n", + (int)MEMTYPE(p), (unsigned long)MEMADDR(p)); + break; + } + +} + +/* hash_chunk() receives a pointer to a chunk and hashes it to produce a + * key that the hashtable can use. The key is obtained by + * right shifting out the 2 LSBs and then multiplying the + * result by a constant no. and dividing it with a large prime. + */ +#define HASH_CONST 32786 +#define HASH_PRIME 79367 +static unsigned +hash_chunk(void *p) +{ + unsigned int key; + + key = (unsigned int)(long)p >> 2; + key = (key * HASH_CONST) % HASH_PRIME; + + return key; +} + +/* hash_free() : Invoked by osi_linux_free_afs_memory(), thru + * afs_lhash_iter(), this function is called by the lhash + * module for every entry in the hash table. hash_free + * frees the memory associated with the entry as well + * as returning the osi_linux_mem struct to its pool. + */ +static void +hash_free(size_t index, unsigned key, void *data) +{ + linux_free(((struct osi_linux_mem *)data)->chunk); + afs_atomlist_put(al_mem_pool, data); +} + +/* hash_verify() is invoked by osi_linux_verify_alloced_memory() thru + * afs_lhash_iter() and is called by the lhash module for every element + * in the hash table. + * hash_verify() verifies (within limits) that the memory passed to it is + * valid. + */ +static void +hash_verify(size_t index, unsigned key, void *data) +{ + struct osi_linux_mem *lmp = (struct osi_linux_mem *)data; + int memtype; + + memtype = MEMTYPE(lmp->chunk); + if (memtype != KM_TYPE && memtype != VM_TYPE) { + printf + ("osi_linux_verify_alloced_memory: unknown type %d at 0x%lx, index=%lu\n", + (int)memtype, (unsigned long)lmp->chunk, (unsigned long)index); + } + afs_linux_hash_verify_count++; +} + + +/* local_free() : wrapper for vfree(), to deal with incompatible protoypes */ +static void +local_free(void *p, size_t n) +{ + vfree(p); +} + +/* linux_alloc_init(): Initializes the kernel memory allocator. As part + * of this process, it also initializes a pool of osi_linux_mem + * structures as well as the hash table itself. + * Return values: + * 0 - failure + * 1 - success + */ +static int +linux_alloc_init(void) +{ + /* initiate our pool of osi_linux_mem structs */ + al_mem_pool = + afs_atomlist_create(sizeof(struct osi_linux_mem), sizeof(long) * 1024, + (void *)vmalloc, local_free); + if (!al_mem_pool) { + printf("afs_osi_Alloc: Error in initialization(atomlist_create)\n"); + return 0; + } + + /* initialize the hash table to hold references to alloc'ed chunks */ + lh_mem_htab = afs_lhash_create(hash_equal, (void *)vmalloc, local_free); + if (!lh_mem_htab) { + printf("afs_osi_Alloc: Error in initialization(lhash_create)\n"); + return 0; + } + + return 1; + +} + +/* hash_bucket_stat() : Counts the no. of elements in each bucket and + * stores results in our bucket stats vector. + */ +static unsigned int cur_bucket, cur_bucket_len; +static void +hash_bucket_stat(size_t index, unsigned key, void *data) +{ + if (index == cur_bucket) { + /* while still on the same bucket, inc len & return */ + cur_bucket_len++; + return; + } else { /* if we're on the next bucket, store the distribution */ + if (cur_bucket_len < MAX_BUCKET_LEN) + afs_linux_hash_bucket_dist[cur_bucket_len]++; + else + printf + ("afs_get_hash_stats: Warning! exceeded max bucket len %d\n", + cur_bucket_len); + cur_bucket = index; + cur_bucket_len = 1; + } +} + +/* get_hash_stats() : get hash table statistics */ +static void +get_hash_stats(void) +{ + int i; + + afs_lhash_stat(lh_mem_htab, &afs_linux_lsb); + + /* clear out the bucket stat vector */ + for (i = 0; i < MAX_BUCKET_LEN; i++, afs_linux_hash_bucket_dist[i] = 0); + cur_bucket = cur_bucket_len = 00; + + /* populate the bucket stat vector */ + afs_lhash_iter(lh_mem_htab, hash_bucket_stat); +} + +/************** Linux memory allocator interface functions **********/ + +#if defined(AFS_LINUX24_ENV) +DECLARE_MUTEX(afs_linux_alloc_sem); +#else +struct semaphore afs_linux_alloc_sem = MUTEX; +#endif + +void * +osi_linux_alloc(unsigned int asize, int drop_glock) +{ + void *new = NULL; + struct osi_linux_mem *lmem; + + new = linux_alloc(asize, drop_glock); /* get a chunk of memory of size asize */ + + if (!new) { + printf("afs_osi_Alloc: Can't vmalloc %d bytes.\n", asize); + return new; + } + + down(&afs_linux_alloc_sem); + + /* allocator hasn't been initialized yet */ + if (allocator_init == 0) { + if (linux_alloc_init() == 0) { + goto error; + } + allocator_init = 1; /* initialization complete */ + } + + /* get an atom to store the pointer to the chunk */ + lmem = (struct osi_linux_mem *)afs_atomlist_get(al_mem_pool); + if (!lmem) { + printf("afs_osi_Alloc: atomlist_get() failed."); + goto free_error; + } + /* store the chunk reference */ + lmem->chunk = new; + + /* hash in the chunk */ + if (afs_lhash_enter(lh_mem_htab, hash_chunk(new), lmem) != 0) { + printf("afs_osi_Alloc: lhash_enter failed\n"); + goto free_error; + } + afs_linux_cur_allocs++; /* no. of current allocations */ + afs_linux_total_allocs++; /* total no. of allocations done so far */ + if ((afs_linux_cur_allocs % STAT_INTERVAL) == 0) { + get_hash_stats(); + } + error: + up(&afs_linux_alloc_sem); + return MEMADDR(new); + + free_error: + if (new) { + linux_free(new); + } + new = NULL; + goto error; + + +} + +/* osi_linux_free() - free chunk of memory passed to us. + */ +void +osi_linux_free(void *addr) +{ + struct osi_linux_mem lmem, *lmp; + + down(&afs_linux_alloc_sem); + + lmem.chunk = addr; + /* remove this chunk from our hash table */ + if ((lmp = + (struct osi_linux_mem *)afs_lhash_remove(lh_mem_htab, + hash_chunk(addr), &lmem))) { + linux_free(lmp->chunk); /* this contains the piggybacked type info */ + afs_atomlist_put(al_mem_pool, lmp); /* return osi_linux_mem struct to pool */ + afs_linux_cur_allocs--; + } else { + BUG(); + printf("osi_linux_free: failed to remove chunk from hashtable\n"); + } + + up(&afs_linux_alloc_sem); +} + +/* osi_linux_free_afs_memory() - free all chunks of memory allocated. + */ +void +osi_linux_free_afs_memory(void) +{ + down(&afs_linux_alloc_sem); + + if (allocator_init) { + /* iterate through all elements in the hash table and free both + * the chunk and the atom associated with it. + */ + afs_lhash_iter(lh_mem_htab, hash_free); + + /* free the atomlist. */ + afs_atomlist_destroy(al_mem_pool); + + /* free the hashlist. */ + afs_lhash_destroy(lh_mem_htab); + + /* change the state so that the allocator is now uninitialized. */ + allocator_init = 0; + } + up(&afs_linux_alloc_sem); +} + +/* osi_linux_verify_alloced_memory(): verify all chunks of alloced memory in + * our hash table. + */ +void +osi_linux_verify_alloced_memory() +{ + down(&afs_linux_alloc_sem); + + /* count of times hash_verify was called. reset it to 0 before iteration */ + afs_linux_hash_verify_count = 0; + + /* iterate thru elements in the hash table */ + afs_lhash_iter(lh_mem_htab, hash_verify); + + if (afs_linux_hash_verify_count != afs_linux_cur_allocs) { + /* hmm, some pieces of memory are missing. */ + printf + ("osi_linux_verify_alloced_memory: %d chunks of memory are not accounted for during verify!\n", + afs_linux_hash_verify_count - afs_linux_cur_allocs); + } + + up(&afs_linux_alloc_sem); + return; +} diff --git a/src/afs/LINUX24/osi_cred.c b/src/afs/LINUX24/osi_cred.c new file mode 100644 index 0000000..9a52a2a --- /dev/null +++ b/src/afs/LINUX24/osi_cred.c @@ -0,0 +1,109 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * osi_cred.c - Linux cred handling routines. + * + */ +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" +#include "afsincludes.h" + +cred_t * +crget(void) +{ + cred_t *tmp; + +#if !defined(GFP_NOFS) +#define GFP_NOFS GFP_KERNEL +#endif + tmp = kmalloc(sizeof(cred_t), GFP_NOFS); + if (!tmp) + osi_Panic("crget: No more memory for creds!\n"); + + tmp->cr_ref = 1; + return tmp; +} + +void +crfree(cred_t * cr) +{ + if (cr->cr_ref > 1) { + cr->cr_ref--; + return; + } + + kfree(cr); +} + + +/* Return a duplicate of the cred. */ +cred_t * +crdup(cred_t * cr) +{ + cred_t *tmp = crget(); + + tmp->cr_uid = cr->cr_uid; + tmp->cr_ruid = cr->cr_ruid; + tmp->cr_gid = cr->cr_gid; + tmp->cr_rgid = cr->cr_rgid; + + memcpy(tmp->cr_groups, cr->cr_groups, NGROUPS * sizeof(gid_t)); + tmp->cr_ngroups = cr->cr_ngroups; + + return tmp; +} + +cred_t * +crref(void) +{ + cred_t *cr = crget(); + + cr->cr_uid = current_fsuid(); + cr->cr_ruid = current_uid(); + cr->cr_gid = current_fsgid(); + cr->cr_rgid = current_gid(); + + memcpy(cr->cr_groups, current->groups, NGROUPS * sizeof(gid_t)); + cr->cr_ngroups = current->ngroups; + + return cr; +} + + +/* Set the cred info into the current task */ +void +crset(cred_t * cr) +{ +#if defined(STRUCT_TASK_HAS_CRED) + struct cred *new_creds; + + /* If our current task doesn't have identical real and effective + * credentials, commit_cred won't let us change them, so we just + * bail here. + */ + if (current->cred != current->real_cred) + return; + new_creds = prepare_creds(); + new_creds->fsuid = cr->cr_uid; + new_creds->uid = cr->cr_ruid; + new_creds->fsgid = cr->cr_gid; + new_creds->gid = cr->cr_rgid; +#else + current->fsuid = cr->cr_uid; + current->uid = cr->cr_ruid; + current->fsgid = cr->cr_gid; + current->gid = cr->cr_rgid; +#endif + memcpy(current->groups, cr->cr_groups, NGROUPS * sizeof(gid_t)); + current->ngroups = cr->cr_ngroups; +} diff --git a/src/afs/LINUX24/osi_file.c b/src/afs/LINUX24/osi_file.c new file mode 100644 index 0000000..c2c7cc7 --- /dev/null +++ b/src/afs/LINUX24/osi_file.c @@ -0,0 +1,619 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +#include +#include "afs/param.h" + + +#ifdef AFS_LINUX24_ENV +#include "h/module.h" /* early to avoid printf->printk mapping */ +#endif +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afsincludes.h" /* Afs-based standard headers */ +#include "afs/afs_stats.h" /* afs statistics */ +#include "h/smp_lock.h" +#if defined(AFS_LINUX26_ENV) +#include "h/namei.h" +#endif +#if defined(LINUX_USE_FH) +#include "h/exportfs.h" +int cache_fh_type = -1; +int cache_fh_len = -1; +#endif + +afs_lock_t afs_xosi; /* lock is for tvattr */ +extern struct osi_dev cacheDev; +#if defined(AFS_LINUX24_ENV) +extern struct vfsmount *afs_cacheMnt; +#endif +extern struct super_block *afs_cacheSBp; + +#if defined(AFS_LINUX26_ENV) +struct file * +afs_linux_raw_open(afs_dcache_id_t *ainode, ino_t *hint) +{ + struct inode *tip = NULL; + struct dentry *dp = NULL; + struct file* filp; + +#if !defined(LINUX_USE_FH) + tip = iget(afs_cacheSBp, ainode->ufs); + if (!tip) + osi_Panic("Can't get inode %d\n", (int) ainode->ufs); + + dp = d_alloc_anon(tip); +#else + dp = afs_cacheSBp->s_export_op->fh_to_dentry(afs_cacheSBp, &ainode->ufs.fh, + cache_fh_len, cache_fh_type); + if (!dp) + osi_Panic("Can't get dentry\n"); + tip = dp->d_inode; +#endif + tip->i_flags |= MS_NOATIME; /* Disable updating access times. */ + +#if defined(STRUCT_TASK_HAS_CRED) + filp = dentry_open(dp, mntget(afs_cacheMnt), O_RDWR, current_cred()); +#else + filp = dentry_open(dp, mntget(afs_cacheMnt), O_RDWR); +#endif + if (IS_ERR(filp)) +#if defined(LINUX_USE_FH) + osi_Panic("Can't open file\n"); +#else + osi_Panic("Can't open inode %d\n", (int) ainode->ufs); +#endif + if (hint) + *hint = tip->i_ino; + return filp; +} + +void * +osi_UFSOpen(afs_dcache_id_t *ainode) +{ + struct osi_file *afile = NULL; + extern int cacheDiskType; + struct file *filp = NULL; + AFS_STATCNT(osi_UFSOpen); + if (cacheDiskType != AFS_FCACHE_TYPE_UFS) { + osi_Panic("UFSOpen called for non-UFS cache\n"); + } + if (!afs_osicred_initialized) { + /* valid for alpha_osf, SunOS, Ultrix */ + memset((char *)&afs_osi_cred, 0, sizeof(AFS_UCRED)); + crhold(&afs_osi_cred); /* don't let it evaporate, since it is static */ + afs_osicred_initialized = 1; + } + afile = (struct osi_file *)osi_AllocLargeSpace(sizeof(struct osi_file)); + AFS_GUNLOCK(); + if (!afile) { + osi_Panic("osi_UFSOpen: Failed to allocate %d bytes for osi_file.\n", + (int)sizeof(struct osi_file)); + } + memset(afile, 0, sizeof(struct osi_file)); + + afile->filp = afs_linux_raw_open(ainode, &afile->inum); + afile->size = i_size_read(FILE_INODE(afile->filp)); + AFS_GLOCK(); + afile->offset = 0; + afile->proc = (int (*)())0; + return (void *)afile; +} +#else +void * +osi_UFSOpen(afs_dcache_id_t *ainode) +{ + register struct osi_file *afile = NULL; + extern int cacheDiskType; + afs_int32 code = 0; + struct inode *tip = NULL; + struct file *filp = NULL; + AFS_STATCNT(osi_UFSOpen); + if (cacheDiskType != AFS_FCACHE_TYPE_UFS) { + osi_Panic("UFSOpen called for non-UFS cache\n"); + } + if (!afs_osicred_initialized) { + /* valid for alpha_osf, SunOS, Ultrix */ + memset((char *)&afs_osi_cred, 0, sizeof(AFS_UCRED)); + crhold(&afs_osi_cred); /* don't let it evaporate, since it is static */ + afs_osicred_initialized = 1; + } + afile = (struct osi_file *)osi_AllocLargeSpace(sizeof(struct osi_file)); + AFS_GUNLOCK(); + if (!afile) { + osi_Panic("osi_UFSOpen: Failed to allocate %d bytes for osi_file.\n", + sizeof(struct osi_file)); + } + memset(afile, 0, sizeof(struct osi_file)); + filp = &afile->file; + filp->f_dentry = &afile->dentry; + tip = iget(afs_cacheSBp, ainode->ufs); + if (!tip) + osi_Panic("Can't get inode %d\n", ainode->ufs); + FILE_INODE(filp) = tip; + tip->i_flags |= MS_NOATIME; /* Disable updating access times. */ + filp->f_flags = O_RDWR; +#if defined(AFS_LINUX24_ENV) + filp->f_mode = FMODE_READ|FMODE_WRITE; + filp->f_op = fops_get(tip->i_fop); +#else + filp->f_op = tip->i_op->default_file_ops; +#endif + if (filp->f_op && filp->f_op->open) + code = filp->f_op->open(tip, filp); + if (code) + osi_Panic("Can't open inode %d\n", ainode->ufs); + afile->size = i_size_read(tip); + AFS_GLOCK(); + afile->offset = 0; + afile->proc = (int (*)())0; + afile->inum = ainode->ufs; /* for hint validity checking */ + return (void *)afile; +} +#endif + +#if defined(LINUX_USE_FH) +/* + * Given a dentry, return the file handle as encoded by the filesystem. + * We can't assume anything about the length (words, not bytes). + * The cache has to live on a single filesystem with uniform file + * handles, otherwise we panic. + */ +void osi_get_fh(struct dentry *dp, afs_ufs_dcache_id_t *ainode) { + int max_len; + int type; + + if (cache_fh_len > 0) + max_len = cache_fh_len; + else + max_len = MAX_FH_LEN; + if (dp->d_sb->s_export_op->encode_fh) { + type = dp->d_sb->s_export_op->encode_fh(dp, &ainode->raw[0], &max_len, 0); + if (type == 255) { + osi_Panic("File handle encoding failed\n"); + } + if (cache_fh_type < 0) + cache_fh_type = type; + if (cache_fh_len < 0) { + cache_fh_len = max_len; + } + if (type != cache_fh_type || max_len != cache_fh_len) { + osi_Panic("Inconsistent file handles within cache\n"); + } + } else { + /* If fs doesn't provide an encode_fh method, assume the default INO32 type */ + if (cache_fh_type < 0) + cache_fh_type = FILEID_INO32_GEN; + if (cache_fh_len < 0) + cache_fh_len = sizeof(struct fid)/4; + ainode->fh.i32.ino = dp->d_inode->i_ino; + ainode->fh.i32.gen = dp->d_inode->i_generation; + } +} +#else +void osi_get_fh(struct dentry *dp, afs_ufs_dcache_id_t *ainode) { + *ainode = dp->d_inode->i_ino; +} +#endif + +int +afs_osi_Stat(register struct osi_file *afile, register struct osi_stat *astat) +{ + register afs_int32 code; + AFS_STATCNT(osi_Stat); + MObtainWriteLock(&afs_xosi, 320); + astat->size = i_size_read(OSIFILE_INODE(afile)); +#if defined(AFS_LINUX26_ENV) + astat->mtime = OSIFILE_INODE(afile)->i_mtime.tv_sec; + astat->atime = OSIFILE_INODE(afile)->i_atime.tv_sec; +#else + astat->mtime = OSIFILE_INODE(afile)->i_mtime; + astat->atime = OSIFILE_INODE(afile)->i_atime; +#endif + code = 0; + MReleaseWriteLock(&afs_xosi); + return code; +} + +#ifdef AFS_LINUX26_ENV +int +osi_UFSClose(register struct osi_file *afile) +{ + AFS_STATCNT(osi_Close); + if (afile) { + if (OSIFILE_INODE(afile)) { + filp_close(afile->filp, NULL); + } + } + + osi_FreeLargeSpace(afile); + return 0; +} +#else +int +osi_UFSClose(register struct osi_file *afile) +{ + AFS_STATCNT(osi_Close); + if (afile) { + if (FILE_INODE(&afile->file)) { + struct file *filp = &afile->file; + if (filp->f_op && filp->f_op->release) + filp->f_op->release(FILE_INODE(filp), filp); + iput(FILE_INODE(filp)); + } + } + + osi_FreeLargeSpace(afile); + return 0; +} +#endif + +int +osi_UFSTruncate(register struct osi_file *afile, afs_int32 asize) +{ + register afs_int32 code; + struct osi_stat tstat; + struct iattr newattrs; + struct inode *inode = OSIFILE_INODE(afile); + AFS_STATCNT(osi_Truncate); + + /* This routine only shrinks files, and most systems + * have very slow truncates, even when the file is already + * small enough. Check now and save some time. + */ + code = afs_osi_Stat(afile, &tstat); + if (code || tstat.size <= asize) + return code; + MObtainWriteLock(&afs_xosi, 321); + AFS_GUNLOCK(); +#ifdef STRUCT_INODE_HAS_I_ALLOC_SEM + down_write(&inode->i_alloc_sem); +#endif +#ifdef STRUCT_INODE_HAS_I_MUTEX + mutex_lock(&inode->i_mutex); +#else + down(&inode->i_sem); +#endif + newattrs.ia_size = asize; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; +#if defined(AFS_LINUX24_ENV) + newattrs.ia_ctime = CURRENT_TIME; + + /* avoid notify_change() since it wants to update dentry->d_parent */ + lock_kernel(); + code = inode_change_ok(inode, &newattrs); + if (!code) { +#ifdef INODE_SETATTR_NOT_VOID +#if defined(AFS_LINUX26_ENV) + if (inode->i_op && inode->i_op->setattr) + code = inode->i_op->setattr(afile->filp->f_dentry, &newattrs); + else +#endif + code = inode_setattr(inode, &newattrs); +#else + inode_setattr(inode, &newattrs); +#endif + } + unlock_kernel(); + if (!code) + truncate_inode_pages(&inode->i_data, asize); +#else + i_size_write(inode, asize); + if (inode->i_sb->s_op && inode->i_sb->s_op->notify_change) { + code = inode->i_sb->s_op->notify_change(&afile->dentry, &newattrs); + } + if (!code) { + truncate_inode_pages(inode, asize); + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + } +#endif + code = -code; +#ifdef STRUCT_INODE_HAS_I_MUTEX + mutex_unlock(&inode->i_mutex); +#else + up(&inode->i_sem); +#endif +#ifdef STRUCT_INODE_HAS_I_ALLOC_SEM + up_write(&inode->i_alloc_sem); +#endif + AFS_GLOCK(); + MReleaseWriteLock(&afs_xosi); + return code; +} + + +/* Generic read interface */ +int +afs_osi_Read(register struct osi_file *afile, int offset, void *aptr, + afs_int32 asize) +{ + struct uio auio; + struct iovec iov; + afs_int32 code; + + AFS_STATCNT(osi_Read); + + /* + * If the osi_file passed in is NULL, panic only if AFS is not shutting + * down. No point in crashing when we are already shutting down + */ + if (!afile) { + if (!afs_shuttingdown) + osi_Panic("osi_Read called with null param"); + else + return EIO; + } + + if (offset != -1) + afile->offset = offset; + setup_uio(&auio, &iov, aptr, afile->offset, asize, UIO_READ, AFS_UIOSYS); + AFS_GUNLOCK(); + code = osi_rdwr(afile, &auio, UIO_READ); + AFS_GLOCK(); + if (code == 0) { + code = asize - auio.uio_resid; + afile->offset += code; + } else { + afs_Trace2(afs_iclSetp, CM_TRACE_READFAILED, ICL_TYPE_INT32, auio.uio_resid, + ICL_TYPE_INT32, code); + code = -1; + } + return code; +} + +/* Generic write interface */ +int +afs_osi_Write(register struct osi_file *afile, afs_int32 offset, void *aptr, + afs_int32 asize) +{ + struct uio auio; + struct iovec iov; + afs_int32 code; + + AFS_STATCNT(osi_Write); + + if (!afile) { + if (!afs_shuttingdown) + osi_Panic("afs_osi_Write called with null param"); + else + return EIO; + } + + if (offset != -1) + afile->offset = offset; + setup_uio(&auio, &iov, aptr, afile->offset, asize, UIO_WRITE, AFS_UIOSYS); + AFS_GUNLOCK(); + code = osi_rdwr(afile, &auio, UIO_WRITE); + AFS_GLOCK(); + if (code == 0) { + code = asize - auio.uio_resid; + afile->offset += code; + } else { + if (code == ENOSPC) + afs_warnuser + ("\n\n\n*** Cache partition is FULL - Decrease cachesize!!! ***\n\n"); + code = -1; + } + + if (afile->proc) + (*afile->proc)(afile, code); + + return code; +} + + +/* This work should be handled by physstrat in ca/machdep.c. + This routine written from the RT NFS port strategy routine. + It has been generalized a bit, but should still be pretty clear. */ +int +afs_osi_MapStrategy(int (*aproc) (struct buf * bp), register struct buf *bp) +{ + afs_int32 returnCode; + + AFS_STATCNT(osi_MapStrategy); + returnCode = (*aproc) (bp); + + return returnCode; +} + +void +shutdown_osifile(void) +{ + AFS_STATCNT(shutdown_osifile); + if (afs_cold_shutdown) { + afs_osicred_initialized = 0; + } +} + +/* Intialize cache device info and fragment size for disk cache partition. */ +int +osi_InitCacheInfo(char *aname) +{ + int code; + extern afs_dcache_id_t cacheInode; + struct dentry *dp; + extern struct osi_dev cacheDev; + extern afs_int32 afs_fsfragsize; + extern struct super_block *afs_cacheSBp; + extern struct vfsmount *afs_cacheMnt; + code = osi_lookupname_internal(aname, 1, &afs_cacheMnt, &dp); + if (code) + return ENOENT; + + osi_get_fh(dp, &cacheInode.ufs); + cacheDev.dev = dp->d_inode->i_sb->s_dev; + afs_fsfragsize = dp->d_inode->i_sb->s_blocksize - 1; + afs_cacheSBp = dp->d_inode->i_sb; + + dput(dp); + + return 0; +} + + +#define FOP_READ(F, B, C) (F)->f_op->read(F, B, (size_t)(C), &(F)->f_pos) +#define FOP_WRITE(F, B, C) (F)->f_op->write(F, B, (size_t)(C), &(F)->f_pos) + +/* osi_rdwr + * seek, then read or write to an open inode. addrp points to data in + * kernel space. + */ +int +osi_rdwr(struct osi_file *osifile, uio_t * uiop, int rw) +{ +#ifdef AFS_LINUX26_ENV + struct file *filp = osifile->filp; +#else + struct file *filp = &osifile->file; +#endif + KERNEL_SPACE_DECL; + int code = 0; + struct iovec *iov; + afs_size_t count; + unsigned long savelim; + + savelim = current->TASK_STRUCT_RLIM[RLIMIT_FSIZE].rlim_cur; + current->TASK_STRUCT_RLIM[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + + if (uiop->uio_seg == AFS_UIOSYS) + TO_USER_SPACE(); + + /* seek to the desired position. Return -1 on error. */ + if (filp->f_op->llseek) { + if (filp->f_op->llseek(filp, (loff_t) uiop->uio_offset, 0) != uiop->uio_offset) + return -1; + } else + filp->f_pos = uiop->uio_offset; + + while (code == 0 && uiop->uio_resid > 0 && uiop->uio_iovcnt > 0) { + iov = uiop->uio_iov; + count = iov->iov_len; + if (count == 0) { + uiop->uio_iov++; + uiop->uio_iovcnt--; + continue; + } + + if (rw == UIO_READ) + code = FOP_READ(filp, iov->iov_base, count); + else + code = FOP_WRITE(filp, iov->iov_base, count); + + if (code < 0) { + code = -code; + break; + } else if (code == 0) { + /* + * This is bad -- we can't read any more data from the + * file, but we have no good way of signaling a partial + * read either. + */ + code = EIO; + break; + } + + iov->iov_base += code; + iov->iov_len -= code; + uiop->uio_resid -= code; + uiop->uio_offset += code; + code = 0; + } + + if (uiop->uio_seg == AFS_UIOSYS) + TO_KERNEL_SPACE(); + + current->TASK_STRUCT_RLIM[RLIMIT_FSIZE].rlim_cur = savelim; + + return code; +} + +/* setup_uio + * Setup a uio struct. + */ +void +setup_uio(uio_t * uiop, struct iovec *iovecp, const char *buf, afs_offs_t pos, + int count, uio_flag_t flag, uio_seg_t seg) +{ + iovecp->iov_base = (char *)buf; + iovecp->iov_len = count; + uiop->uio_iov = iovecp; + uiop->uio_iovcnt = 1; + uiop->uio_offset = pos; + uiop->uio_seg = seg; + uiop->uio_resid = count; + uiop->uio_flag = flag; +} + + +/* uiomove + * UIO_READ : dp -> uio + * UIO_WRITE : uio -> dp + */ +int +uiomove(char *dp, int length, uio_flag_t rw, uio_t * uiop) +{ + int count; + struct iovec *iov; + int code; + + while (length > 0 && uiop->uio_resid > 0 && uiop->uio_iovcnt > 0) { + iov = uiop->uio_iov; + count = iov->iov_len; + + if (!count) { + uiop->uio_iov++; + uiop->uio_iovcnt--; + continue; + } + + if (count > length) + count = length; + + switch (uiop->uio_seg) { + case AFS_UIOSYS: + switch (rw) { + case UIO_READ: + memcpy(iov->iov_base, dp, count); + break; + case UIO_WRITE: + memcpy(dp, iov->iov_base, count); + break; + default: + printf("uiomove: Bad rw = %d\n", rw); + return -EINVAL; + } + break; + case AFS_UIOUSER: + switch (rw) { + case UIO_READ: + AFS_COPYOUT(dp, iov->iov_base, count, code); + break; + case UIO_WRITE: + AFS_COPYIN(iov->iov_base, dp, count, code); + break; + default: + printf("uiomove: Bad rw = %d\n", rw); + return -EINVAL; + } + break; + default: + printf("uiomove: Bad seg = %d\n", uiop->uio_seg); + return -EINVAL; + } + + dp += count; + length -= count; + iov->iov_base += count; + iov->iov_len -= count; + uiop->uio_offset += count; + uiop->uio_resid -= count; + } + return 0; +} + diff --git a/src/afs/LINUX24/osi_flush.s b/src/afs/LINUX24/osi_flush.s new file mode 100644 index 0000000..925b874 --- /dev/null +++ b/src/afs/LINUX24/osi_flush.s @@ -0,0 +1,41 @@ +/* Taken from linux-2.6/arch/ppc64/boot/string.S + * + * Copyright (C) Paul Mackerras 1997. + * + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + * + * Flush the dcache and invalidate the icache for a range of addresses. + * + * flush_cache(addr, len) + */ + .section ".text" + .align 2 + .globl flush_cache + .section ".opd","aw" + .align 3 +flush_cache: + .quad .flush_cache,.TOC.@tocbase,0 + .previous + .size flush_cache,24 + .globl .flush_cache +.flush_cache: + addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */ + rlwinm. 4,4,27,5,31 + mtctr 4 + beqlr +1: dcbf 0,3 + icbi 0,3 + addi 3,3,0x20 + bdnz 1b + sync + isync + blr + .long 0 + .byte 0,12,0,0,0,0,0,0 + .type .flush_cache,@function + .size .flush_cache,.-.flush_cache diff --git a/src/afs/LINUX24/osi_groups.c b/src/afs/LINUX24/osi_groups.c new file mode 100644 index 0000000..5910a3c --- /dev/null +++ b/src/afs/LINUX24/osi_groups.c @@ -0,0 +1,338 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Implements: + * setgroups (syscall) + * setpag + * + */ +#include +#include "afs/param.h" +#ifdef LINUX_KEYRING_SUPPORT +#include +#endif + + +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "afs/afs_stats.h" /* statistics */ +#include "afs/nfsclient.h" +#ifdef AFS_LINUX22_ENV +#include "h/smp_lock.h" +#endif + +#define NUMPAGGROUPS 2 + +static int +afs_setgroups(cred_t **cr, int ngroups, gid_t * gidset, int change_parent) +{ + int ngrps; + int i; + gid_t *gp; + + AFS_STATCNT(afs_setgroups); + + if (ngroups > NGROUPS) + return EINVAL; + + gp = (*cr)->cr_groups; + if (ngroups < NGROUPS) + gp[ngroups] = (gid_t) NOGROUP; + + for (i = ngroups; i > 0; i--) { + *gp++ = *gidset++; + } + + (*cr)->cr_ngroups = ngroups; + crset(*cr); + return (0); +} + +/* Returns number of groups. And we trust groups to be large enough to + * hold all the groups. + */ +static int +afs_getgroups(cred_t *cr, gid_t *groups) +{ + int i; + int n; + gid_t *gp; + + AFS_STATCNT(afs_getgroups); + + gp = cr->cr_groups; + n = cr->cr_ngroups; + + for (i = 0; (i < n) && (*gp != (gid_t) NOGROUP); i++) + *groups++ = *gp++; + return i; +} + +/* Only propogate the PAG to the parent process. Unix's propogate to + * all processes sharing the cred. + */ +int +set_pag_in_parent(int pag, int g0, int g1) +{ + int i; +#ifdef STRUCT_TASK_STRUCT_HAS_PARENT + gid_t *gp = current->parent->groups; + int ngroups = current->parent->ngroups; +#else + gid_t *gp = current->p_pptr->groups; + int ngroups = current->p_pptr->ngroups; +#endif + + if ((ngroups < 2) || (afs_get_pag_from_groups(gp[0], gp[1]) == NOPAG)) { + /* We will have to shift grouplist to make room for pag */ + if (ngroups + 2 > NGROUPS) { + return EINVAL; + } + for (i = ngroups - 1; i >= 0; i--) { + gp[i + 2] = gp[i]; + } + ngroups += 2; + } + gp[0] = g0; + gp[1] = g1; + if (ngroups < NGROUPS) + gp[ngroups] = NOGROUP; + +#ifdef STRUCT_TASK_STRUCT_HAS_PARENT + current->parent->ngroups = ngroups; +#else + current->p_pptr->ngroups = ngroups; +#endif + return 0; +} + +int +__setpag(cred_t **cr, afs_uint32 pagvalue, afs_uint32 *newpag, + int change_parent) +{ + gid_t *gidset; + afs_int32 ngroups, code = 0; + int j; + + gidset = (gid_t *) osi_Alloc(NGROUPS * sizeof(gidset[0])); + ngroups = afs_getgroups(*cr, gidset); + + if (afs_get_pag_from_groups(gidset[0], gidset[1]) == NOPAG) { + /* We will have to shift grouplist to make room for pag */ + if (ngroups + 2 > NGROUPS) { + osi_Free((char *)gidset, NGROUPS * sizeof(int)); + return EINVAL; + } + for (j = ngroups - 1; j >= 0; j--) { + gidset[j + 2] = gidset[j]; + } + ngroups += 2; + } + *newpag = (pagvalue == -1 ? genpag() : pagvalue); + afs_get_groups_from_pag(*newpag, &gidset[0], &gidset[1]); + code = afs_setgroups(cr, ngroups, gidset, change_parent); + + /* If change_parent is set, then we should set the pag in the parent as + * well. + */ + if (change_parent && !code) { + code = set_pag_in_parent(*newpag, gidset[0], gidset[1]); + } + + osi_Free((char *)gidset, NGROUPS * sizeof(int)); + return code; +} + +int +setpag(cred_t **cr, afs_uint32 pagvalue, afs_uint32 *newpag, + int change_parent) +{ + int code; + + AFS_STATCNT(setpag); + + code = __setpag(cr, pagvalue, newpag, change_parent); + + return code; +} + + +/* Intercept the standard system call. */ +extern asmlinkage long (*sys_setgroupsp) (int gidsetsize, gid_t * grouplist); +asmlinkage long +afs_xsetgroups(int gidsetsize, gid_t * grouplist) +{ + long code; + cred_t *cr = crref(); + afs_uint32 junk; + int old_pag; + + lock_kernel(); + old_pag = PagInCred(cr); + crfree(cr); + unlock_kernel(); + + code = (*sys_setgroupsp) (gidsetsize, grouplist); + if (code) { + return code; + } + + lock_kernel(); + cr = crref(); + if (old_pag != NOPAG && PagInCred(cr) == NOPAG) { + /* re-install old pag if there's room. */ + code = __setpag(&cr, old_pag, &junk, 0); + } + crfree(cr); + unlock_kernel(); + + /* Linux syscall ABI returns errno as negative */ + return (-code); +} + +#if defined(AFS_LINUX24_ENV) +/* Intercept the standard uid32 system call. */ +extern asmlinkage long (*sys_setgroups32p) (int gidsetsize, gid_t * grouplist); +asmlinkage long +afs_xsetgroups32(int gidsetsize, gid_t * grouplist) +{ + long code; + cred_t *cr = crref(); + afs_uint32 junk; + int old_pag; + + lock_kernel(); + old_pag = PagInCred(cr); + crfree(cr); + unlock_kernel(); + + code = (*sys_setgroups32p) (gidsetsize, grouplist); + + if (code) { + return code; + } + + lock_kernel(); + cr = crref(); + if (old_pag != NOPAG && PagInCred(cr) == NOPAG) { + /* re-install old pag if there's room. */ + code = __setpag(&cr, old_pag, &junk, 0); + } + crfree(cr); + unlock_kernel(); + + /* Linux syscall ABI returns errno as negative */ + return (-code); +} +#endif + +#if defined(AFS_PPC64_LINUX20_ENV) +/* Intercept the uid16 system call as used by 32bit programs. */ +extern long (*sys32_setgroupsp)(int gidsetsize, gid_t *grouplist); +asmlinkage long afs32_xsetgroups(int gidsetsize, gid_t *grouplist) +{ + long code; + cred_t *cr = crref(); + afs_uint32 junk; + int old_pag; + + lock_kernel(); + old_pag = PagInCred(cr); + crfree(cr); + unlock_kernel(); + + code = (*sys32_setgroupsp)(gidsetsize, grouplist); + if (code) { + return code; + } + + lock_kernel(); + cr = crref(); + if (old_pag != NOPAG && PagInCred(cr) == NOPAG) { + /* re-install old pag if there's room. */ + code = __setpag(&cr, old_pag, &junk, 0); + } + crfree(cr); + unlock_kernel(); + + /* Linux syscall ABI returns errno as negative */ + return (-code); +} +#endif + +#if defined(AFS_SPARC64_LINUX20_ENV) || defined(AFS_AMD64_LINUX20_ENV) +/* Intercept the uid16 system call as used by 32bit programs. */ +extern long (*sys32_setgroupsp) (int gidsetsize, u16 * grouplist); +asmlinkage long +afs32_xsetgroups(int gidsetsize, u16 * grouplist) +{ + long code; + cred_t *cr = crref(); + afs_uint32 junk; + int old_pag; + + lock_kernel(); + old_pag = PagInCred(cr); + crfree(cr); + unlock_kernel(); + + code = (*sys32_setgroupsp) (gidsetsize, grouplist); + if (code) { + return code; + } + + lock_kernel(); + cr = crref(); + if (old_pag != NOPAG && PagInCred(cr) == NOPAG) { + /* re-install old pag if there's room. */ + code = __setpag(&cr, old_pag, &junk, 0); + } + crfree(cr); + unlock_kernel(); + + /* Linux syscall ABI returns errno as negative */ + return (-code); +} + +#ifdef AFS_LINUX24_ENV +/* Intercept the uid32 system call as used by 32bit programs. */ +extern long (*sys32_setgroups32p) (int gidsetsize, gid_t * grouplist); +asmlinkage long +afs32_xsetgroups32(int gidsetsize, gid_t * grouplist) +{ + long code; + cred_t *cr = crref(); + afs_uint32 junk; + int old_pag; + + lock_kernel(); + old_pag = PagInCred(cr); + crfree(cr); + unlock_kernel(); + + code = (*sys32_setgroups32p) (gidsetsize, grouplist); + if (code) { + return code; + } + + lock_kernel(); + cr = crref(); + if (old_pag != NOPAG && PagInCred(cr) == NOPAG) { + /* re-install old pag if there's room. */ + code = __setpag(&cr, old_pag, &junk, 0); + } + crfree(cr); + unlock_kernel(); + + /* Linux syscall ABI returns errno as negative */ + return (-code); +} +#endif +#endif diff --git a/src/afs/LINUX24/osi_inode.c b/src/afs/LINUX24/osi_inode.c new file mode 100644 index 0000000..fe2f5d8 --- /dev/null +++ b/src/afs/LINUX24/osi_inode.c @@ -0,0 +1,45 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * LINUX inode operations + * + * Implements: + * afs_syscall_icreate + * afs_syscall_iopen + * afs_syscall_iincdec + * + */ + +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afsincludes.h" /* Afs-based standard headers */ +#include "afs/osi_inode.h" +#include "afs/afs_stats.h" /* statistics stuff */ + +int +afs_syscall_icreate(long a, long b, long c, long d, long e, long f) +{ + return 0; +} + +int +afs_syscall_iopen(int a, int b, int c) +{ + return 0; +} + +int +afs_syscall_iincdec(int a, int v, int c, int d) +{ + return 0; +} diff --git a/src/afs/LINUX24/osi_inode.h b/src/afs/LINUX24/osi_inode.h new file mode 100644 index 0000000..e69de29 diff --git a/src/afs/LINUX24/osi_ioctl.c b/src/afs/LINUX24/osi_ioctl.c new file mode 100644 index 0000000..b29c1ad --- /dev/null +++ b/src/afs/LINUX24/osi_ioctl.c @@ -0,0 +1,134 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux module support routines. + * + */ +#include +#include "afs/param.h" + + +#include /* early to avoid printf->printk mapping */ +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "h/unistd.h" /* For syscall numbers. */ +#include "h/mm.h" + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif +#ifdef AFS_SPARC64_LINUX20_ENV +#include +#endif + +#include +#include +#include +#include +#include + +extern struct proc_dir_entry *openafs_procfs; +#if defined(NEED_IOCTL32) && !defined(HAVE_COMPAT_IOCTL) +static int ioctl32_done; +#endif + +extern asmlinkage long +afs_syscall(long syscall, long parm1, long parm2, long parm3, long parm4); + +static int +afs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + + struct afsprocdata sysargs; +#ifdef NEED_IOCTL32 + struct afsprocdata32 sysargs32; +#endif + + if (cmd != VIOC_SYSCALL && cmd != VIOC_SYSCALL32) return -EINVAL; + +#ifdef NEED_IOCTL32 +#ifdef AFS_SPARC64_LINUX24_ENV + if (current->thread.flags & SPARC_FLAG_32BIT) +#elif defined(AFS_SPARC64_LINUX20_ENV) + if (current->tss.flags & SPARC_FLAG_32BIT) +#elif defined(AFS_AMD64_LINUX20_ENV) + if (current->thread.flags & THREAD_IA32) +#elif defined(AFS_PPC64_LINUX20_ENV) + if (current->thread.flags & PPC_FLAG_32BIT) +#elif defined(AFS_S390X_LINUX20_ENV) + if (current->thread.flags & S390_FLAG_31BIT) +#else +#error Not done for this linux type +#endif +#endif /* NEED_IOCTL32 */ + { + if (copy_from_user(&sysargs32, (void *)arg, + sizeof(struct afsprocdata32))) + return -EFAULT; + + return afs_syscall((unsigned long)sysargs32.syscall, + (unsigned long)sysargs32.param1, + (unsigned long)sysargs32.param2, + (unsigned long)sysargs32.param3, + (unsigned long)sysargs32.param4); + } else { + if (copy_from_user(&sysargs, (void *)arg, sizeof(struct afsprocdata))) + return -EFAULT; + + return afs_syscall(sysargs.syscall, sysargs.param1, + sysargs.param2, sysargs.param3, sysargs.param4); + } +} + +#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) +static long afs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { + return afs_ioctl(FILE_INODE(file), file, cmd, arg); +} +#endif + +static struct file_operations afs_syscall_fops = { +#ifdef HAVE_UNLOCKED_IOCTL + .unlocked_ioctl = afs_unlocked_ioctl, +#else + .ioctl = afs_ioctl, +#endif +#ifdef HAVE_COMPAT_IOCTL + .compat_ioctl = afs_unlocked_ioctl, +#endif +}; + +void +osi_ioctl_init(void) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry(PROC_SYSCALL_NAME, 0666, openafs_procfs); + entry->proc_fops = &afs_syscall_fops; +#if defined(STRUCT_PROC_DIR_ENTRY_HAS_OWNER) + entry->owner = THIS_MODULE; +#endif + +#if defined(NEED_IOCTL32) && !defined(HAVE_COMPAT_IOCTL) + if (register_ioctl32_conversion(VIOC_SYSCALL32, NULL) == 0) + ioctl32_done = 1; +#endif +} + +void +osi_ioctl_clean(void) +{ + remove_proc_entry(PROC_SYSCALL_NAME, openafs_procfs); +#if defined(NEED_IOCTL32) && !defined(HAVE_COMPAT_IOCTL) + if (ioctl32_done) + unregister_ioctl32_conversion(VIOC_SYSCALL32); +#endif +} diff --git a/src/afs/LINUX24/osi_machdep.h b/src/afs/LINUX24/osi_machdep.h new file mode 100644 index 0000000..02e0898 --- /dev/null +++ b/src/afs/LINUX24/osi_machdep.h @@ -0,0 +1,334 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux implementation. + * + */ + +#ifndef OSI_MACHDEP_H_ +#define OSI_MACHDEP_H_ + +#include + +/* Only needed for xdr.h in glibc 2.1.x */ +#ifndef quad_t +#define quad_t __quad_t +#define u_quad_t __u_quad_t +#endif + +#undef getuerror + +#ifdef STRUCT_TASK_STRUCT_HAS_TGID +#define getpid() current->tgid +#ifdef STRUCT_TASK_STRUCT_HAS_REAL_PARENT +#define getppid() current->real_parent->tgid +#elif defined(STRUCT_TASK_STRUCT_HAS_PARENT) +#define getppid() current->parent->tgid +#else +#define getppid() current->p_opptr->tgid +#endif +#else /* !STRUCT_TASK_STRUCT_HAS_TGID */ +#define getpid() current->pid +#ifdef STRUCT_TASK_STRUCT_HAS_REAL_PARENT +#define getppid() current->real_parent->pid +#elif defined(STRUCT_TASK_STRUCT_HAS_PARENT) +#define getppid() current->parent->pid +#else +#define getppid() current->p_opptr->pid +#endif +#endif /* STRUCT_TASK_STRUCT_HAS_TGID */ + +#ifdef RECALC_SIGPENDING_TAKES_VOID +#define RECALC_SIGPENDING(X) recalc_sigpending() +#else +#define RECALC_SIGPENDING(X) recalc_sigpending(X) +#endif + +#if defined (STRUCT_TASK_STRUCT_HAS_SIGMASK_LOCK) +#define SIG_LOCK(X) spin_lock_irq(&X->sigmask_lock) +#define SIG_UNLOCK(X) spin_unlock_irq(&X->sigmask_lock) +#elif defined (STRUCT_TASK_STRUCT_HAS_SIGHAND) +#define SIG_LOCK(X) spin_lock_irq(&X->sighand->siglock) +#define SIG_UNLOCK(X) spin_unlock_irq(&X->sighand->siglock) +#else +#define SIG_LOCK(X) spin_lock_irq(&X->sig->siglock) +#define SIG_UNLOCK(X) spin_unlock_irq(&X->sig->siglock) +#endif + +#if defined (STRUCT_TASK_STRUCT_HAS_RLIM) +#define TASK_STRUCT_RLIM rlim +#elif defined (STRUCT_TASK_STRUCT_HAS_SIGNAL_RLIM) +#define TASK_STRUCT_RLIM signal->rlim +#else +#error Not sure what to do about rlim (should be in the Linux task struct somewhere....) +#endif + + +#define afs_hz HZ +#include "h/sched.h" +#if defined(HAVE_CURRENT_KERNEL_TIME) +static inline time_t osi_Time(void) { + struct timespec xtime; + xtime = current_kernel_time(); + return xtime.tv_sec; +} +#else +#define osi_Time() (xtime.tv_sec) +#endif + + + +#ifdef AFS_LINUX_64BIT_KERNEL +#define osi_GetTime(V) \ + do { \ + struct timeval __afs_tv; \ + do_gettimeofday(&__afs_tv); \ + (V)->tv_sec = (afs_int32)__afs_tv.tv_sec; \ + (V)->tv_usec = (afs_int32)__afs_tv.tv_usec; \ + } while (0) +#else +#define osi_GetTime(V) do_gettimeofday((V)) +#endif + +#undef gop_lookupname +#define gop_lookupname osi_lookupname + +#undef gop_lookupname_user +#define gop_lookupname_user osi_lookupname + +#define osi_vnhold(V, N) do { VN_HOLD(AFSTOV(V)); } while (0) +#define VN_HOLD(V) osi_Assert(igrab((V)) == (V)) +#define VN_RELE(V) iput((V)) + +#define afs_suser(x) capable(CAP_SYS_ADMIN) +#define wakeup afs_osi_Wakeup + +#undef vType +#define vType(V) ((AFSTOV((V)))->i_mode & S_IFMT) +#undef vSetType +#define vSetType(V, type) AFSTOV((V))->i_mode = ((type) | (AFSTOV((V))->i_mode & ~S_IFMT)) /* preserve mode */ + +#undef IsAfsVnode +#define IsAfsVnode(V) ((V)->i_sb == afs_globalVFS) /* test superblock instead */ +#undef SetAfsVnode +#define SetAfsVnode(V) /* unnecessary */ + +/* We often need to pretend we're in user space to get memory transfers + * right for the kernel calls we use. + */ +#include + +#ifdef KERNEL_SPACE_DECL +#undef KERNEL_SPACE_DECL +#undef TO_USER_SPACE +#undef TO_KERNEL_SPACE +#endif +#define KERNEL_SPACE_DECL mm_segment_t _fs_space_decl={0} +#define TO_USER_SPACE() { _fs_space_decl = get_fs(); set_fs(get_ds()); } +#define TO_KERNEL_SPACE() set_fs(_fs_space_decl) + +#define copyin(F, T, C) (copy_from_user ((char*)(T), (char*)(F), (C)) > 0 ? EFAULT : 0) +static inline long copyinstr(char *from, char *to, int count, int *length) { + long tmp; + tmp = strncpy_from_user(to, from, count); + if (tmp < 0) + return EFAULT; + *length = tmp; + return 0; +} +#define copyout(F, T, C) (copy_to_user ((char*)(T), (char*)(F), (C)) > 0 ? EFAULT : 0) + +/* kernel print statements */ +#define printf printk +#define uprintf printk + + +#ifndef NGROUPS +#define NGROUPS NGROUPS_SMALL +#endif + +/* cred struct */ +typedef struct afs_cred { /* maps to task field: */ + int cr_ref; + uid_t cr_uid; /* euid */ + uid_t cr_ruid; /* uid */ + gid_t cr_gid; /* egid */ + gid_t cr_rgid; /* gid */ + gid_t cr_groups[NGROUPS]; /* 32 groups - empty set to NOGROUP */ + int cr_ngroups; + struct afs_cred *cr_next; +} cred_t; +#define AFS_UCRED struct afs_cred +#define AFS_PROC struct task_struct +#if !defined(current_cred) +#define current_gid() (current->gid) +#define current_uid() (current->uid) +#define current_fsgid() (current->fsgid) +#define current_fsuid() (current->fsuid) +#endif +#if defined(STRUCT_TASK_HAS_CRED) +#define current_group_info() (current->cred->group_info) +#define task_gid(task) (task->cred->gid) +#define task_user(task) (task->cred->user) +#define task_session_keyring(task) (task->cred->tgcred->session_keyring) +#define current_session_keyring() (current->cred->tgcred->session_keyring) +#else +#define current_group_info() (current->group_info) +#if !defined(task_gid) +#define task_gid(task) (task->gid) +#endif +#if !defined(task_uid) +#define task_uid(task) (task->uid) +#endif +#define task_user(task) (task->user) +#define task_session_keyring(task) (task->signal->session_keyring) +#define current_session_keyring() (current->signal->session_keyring) +#endif +#define crhold(c) (c)->cr_ref++ + +/* UIO manipulation */ +typedef enum { AFS_UIOSYS, AFS_UIOUSER } uio_seg_t; +typedef enum { UIO_READ, UIO_WRITE } uio_flag_t; +typedef struct uio { + struct iovec *uio_iov; + int uio_iovcnt; + afs_offs_t uio_offset; + uio_seg_t uio_seg; + int uio_resid; + uio_flag_t uio_flag; +} uio_t; +#define afsio_iov uio_iov +#define afsio_iovcnt uio_iovcnt +#define afsio_offset uio_offset +#define afsio_seg uio_segflg +#define afsio_fmode uio_fmode +#define afsio_resid uio_resid + +/* Get/set the inode in the osifile struct. */ +#define FILE_INODE(F) (F)->f_dentry->d_inode + +#define OSIFILE_INODE(a) FILE_INODE(&(a)->file) + +#if defined(AFS_LINUX_64BIT_KERNEL) && !defined(AFS_ALPHA_LINUX20_ENV) && !defined(AFS_IA64_LINUX20_ENV) +#define NEED_IOCTL32 +#endif + +/* page offset is obtained and stored here during module initialization + * We need a variable to do this because, the PAGE_OFFSET macro defined in + * include/asm/page.h can change from kernel to kernel and we cannot use + * the hardcoded version. + */ +extern unsigned long afs_linux_page_offset; + +/* function to help with the page offset stuff */ +#define afs_linux_page_address(page) (afs_linux_page_offset + PAGE_SIZE * (page - mem_map)) + +#if defined(__KERNEL__) +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +extern struct mutex afs_global_lock; +#else +extern struct semaphore afs_global_lock; +#define mutex_lock(lock) down(lock) +#define mutex_unlock(lock) up(lock) +#endif +extern int afs_global_owner; + +#define AFS_GLOCK() \ +do { \ + mutex_lock(&afs_global_lock); \ + if (afs_global_owner) \ + osi_Panic("afs_global_lock already held by pid %d", \ + afs_global_owner); \ + afs_global_owner = current->pid; \ +} while (0) + +#define ISAFS_GLOCK() (afs_global_owner == current->pid) + +#define AFS_GUNLOCK() \ +do { \ + if (!ISAFS_GLOCK()) \ + osi_Panic("afs global lock not held at %s:%d", __FILE__, __LINE__); \ + afs_global_owner = 0; \ + mutex_unlock(&afs_global_lock); \ +} while (0) +#else +#define AFS_GLOCK() +#define AFS_GUNLOCK() +#define ISAFS_GLOCK() 1 +#define AFS_ASSERT_GLOCK() +#endif + +#ifdef AFS_AMD64_LINUX20_ENV +/* RHEL5 beta's kernel doesn't define these. They aren't gonna change, so... */ + +#ifndef __NR_ia32_afs_syscall +#define __NR_ia32_afs_syscall 137 +#endif +#ifndef __NR_ia32_setgroups +#define __NR_ia32_setgroups 81 +#endif +#ifndef __NR_ia32_setgroups32 +#define __NR_ia32_setgroups32 206 +#endif +#ifndef __NR_ia32_close +#define __NR_ia32_close 6 +#endif +#ifndef __NR_ia32_chdir +#define __NR_ia32_chdir 12 +#endif +#ifndef __NR_ia32_break +#define __NR_ia32_break 17 +#endif +#ifndef __NR_ia32_stty +#define __NR_ia32_stty 31 +#endif +#ifndef __NR_ia32_gtty +#define __NR_ia32_gtty 32 +#endif +#ifndef __NR_ia32_ftime +#define __NR_ia32_ftime 35 +#endif +#ifndef __NR_ia32_prof +#define __NR_ia32_prof 44 +#endif +#ifndef __NR_ia32_lock +#define __NR_ia32_lock 53 +#endif +#ifndef __NR_ia32_mpx +#define __NR_ia32_mpx 56 +#endif +#ifndef __NR_ia32_exit +#define __NR_ia32_exit 1 +#endif +#ifndef __NR_ia32_mount +#define __NR_ia32_mount 21 +#endif +#ifndef __NR_ia32_read +#define __NR_ia32_read 3 +#endif +#ifndef __NR_ia32_write +#define __NR_ia32_write 4 +#endif +#ifndef __NR_ia32_open +#define __NR_ia32_open 5 +#endif +#ifndef __NR_ia32_close +#define __NR_ia32_close 6 +#endif +#ifndef __NR_ia32_unlink +#define __NR_ia32_unlink 10 +#endif +#endif + +#endif /* OSI_MACHDEP_H_ */ diff --git a/src/afs/LINUX24/osi_misc.c b/src/afs/LINUX24/osi_misc.c new file mode 100644 index 0000000..d4fec6c --- /dev/null +++ b/src/afs/LINUX24/osi_misc.c @@ -0,0 +1,144 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux support routines. + * + */ +#include +#include "afs/param.h" + + +#include /* early to avoid printf->printk mapping */ +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "afs/afs_stats.h" +#if defined(AFS_LINUX24_ENV) +#include "h/smp_lock.h" +#endif + +int afs_osicred_initialized = 0; +AFS_UCRED afs_osi_cred; + +void +afs_osi_SetTime(osi_timeval_t * tvp) +{ +#if defined(AFS_LINUX24_ENV) + struct timeval tv; + tv.tv_sec = tvp->tv_sec; + tv.tv_usec = tvp->tv_usec; + + AFS_STATCNT(osi_SetTime); + + do_settimeofday(&tv); +#else + extern int (*sys_settimeofdayp) (struct timeval * tv, + struct timezone * tz); + + KERNEL_SPACE_DECL; + + AFS_STATCNT(osi_SetTime); + + TO_USER_SPACE(); + if (sys_settimeofdayp) + (void)(*sys_settimeofdayp) (tvp, NULL); + TO_KERNEL_SPACE(); +#endif +} + +void +osi_linux_mask(void) +{ + SIG_LOCK(current); + sigfillset(¤t->blocked); + RECALC_SIGPENDING(current); + SIG_UNLOCK(current); +} + +#if defined(AFS_LINUX24_ENV) +/* LOOKUP_POSITIVE is becoming the default */ +#ifndef LOOKUP_POSITIVE +#define LOOKUP_POSITIVE 0 +#endif +/* Lookup name and return vnode for same. */ +int +osi_lookupname_internal(char *aname, int followlink, struct vfsmount **mnt, + struct dentry **dpp) +{ + int code; + struct nameidata nd; + int flags = LOOKUP_POSITIVE; + code = ENOENT; + + if (followlink) + flags |= LOOKUP_FOLLOW; + if (path_init(aname, flags, &nd)) + code = path_walk(aname, &nd); + + if (!code) { +#if defined(STRUCT_NAMEIDATA_HAS_PATH) + *dpp = dget(nd.path.dentry); + if (mnt) + *mnt = mntget(nd.path.mnt); + path_put(&nd.path); +#else + *dpp = dget(nd.dentry); + if (mnt) + *mnt = mntget(nd.mnt); + path_release(&nd); +#endif + } + return code; +} + +int +osi_lookupname(char *aname, uio_seg_t seg, int followlink, + struct dentry **dpp) +{ + int code; + char *tname; + code = ENOENT; + if (seg == AFS_UIOUSER) { + tname = getname(aname); + if (IS_ERR(tname)) + return PTR_ERR(tname); + } else { + tname = aname; + } + code = osi_lookupname_internal(tname, followlink, NULL, dpp); + if (seg == AFS_UIOUSER) { + putname(tname); + } + return code; +} +#else +int +osi_lookupname(char *aname, uio_seg_t seg, int followlink, struct dentry **dpp) +{ + struct dentry *dp = NULL; + int code; + + code = ENOENT; + if (seg == AFS_UIOUSER) { + dp = followlink ? namei(aname) : lnamei(aname); + } else { + dp = lookup_dentry(aname, NULL, followlink ? 1 : 0); + } + + if (dp && !IS_ERR(dp)) { + if (dp->d_inode) { + *dpp = dp; + code = 0; + } else + dput(dp); + } + + return code; +} +#endif diff --git a/src/afs/LINUX24/osi_module.c b/src/afs/LINUX24/osi_module.c new file mode 100644 index 0000000..5727800 --- /dev/null +++ b/src/afs/LINUX24/osi_module.c @@ -0,0 +1,197 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux module support routines. + * + */ +#include +#include "afs/param.h" + + +#include /* early to avoid printf->printk mapping */ +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "h/unistd.h" /* For syscall numbers. */ +#include "h/mm.h" + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif +#ifdef AFS_SPARC64_LINUX20_ENV +#include +#endif + +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +#include +#include +#include +#endif + +#include "osi_pagecopy.h" + +extern struct file_system_type afs_fs_type; + +#if !defined(AFS_LINUX24_ENV) +static long get_page_offset(void); +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +DEFINE_MUTEX(afs_global_lock); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +DECLARE_MUTEX(afs_global_lock); +#else +struct semaphore afs_global_lock = MUTEX; +#endif +int afs_global_owner = 0; +#if !defined(AFS_LINUX24_ENV) +unsigned long afs_linux_page_offset = 0; /* contains the PAGE_OFFSET value */ +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +int __init +afs_init(void) +#else +int +init_module(void) +#endif +{ + int err; + AFS_RWLOCK_INIT(&afs_xosi, "afs_xosi"); + +#if !defined(AFS_LINUX24_ENV) + /* obtain PAGE_OFFSET value */ + afs_linux_page_offset = get_page_offset(); + +#ifndef AFS_S390_LINUX22_ENV + if (afs_linux_page_offset == 0) { + /* couldn't obtain page offset so can't continue */ + printf("afs: Unable to obtain PAGE_OFFSET. Exiting.."); + return -EIO; + } +#endif /* AFS_S390_LINUX22_ENV */ +#endif /* !defined(AFS_LINUX24_ENV) */ + + osi_Init(); + +#ifndef LINUX_KEYRING_SUPPORT + err = osi_syscall_init(); + if (err) + return err; +#endif + err = afs_init_inodecache(); + if (err) { +#ifndef LINUX_KEYRING_SUPPORT + osi_syscall_clean(); +#endif + return err; + } + err = register_filesystem(&afs_fs_type); + if (err) { + afs_destroy_inodecache(); +#ifndef LINUX_KEYRING_SUPPORT + osi_syscall_clean(); +#endif + return err; + } + + osi_sysctl_init(); +#ifdef LINUX_KEYRING_SUPPORT + osi_keyring_init(); +#endif +#ifdef AFS_LINUX24_ENV + osi_proc_init(); + osi_ioctl_init(); +#endif +#if defined(AFS_CACHE_BYPASS) + afs_warn("Cache bypass patched libafs module init.\n"); +#endif + afs_init_pagecopy(); + + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +void __exit +afs_cleanup(void) +#else +void +cleanup_module(void) +#endif +{ +#if defined(AFS_CACHE_BYPASS) + afs_warn("Cache bypass patched libafs module cleaning up.\n"); +#endif + + afs_shutdown_pagecopy(); + +#ifdef LINUX_KEYRING_SUPPORT + osi_keyring_shutdown(); +#endif + osi_sysctl_clean(); +#ifndef LINUX_KEYRING_SUPPORT + osi_syscall_clean(); +#endif + unregister_filesystem(&afs_fs_type); + + afs_destroy_inodecache(); + osi_linux_free_afs_memory(); + +#ifdef AFS_LINUX24_ENV + osi_ioctl_clean(); + osi_proc_clean(); +#endif + + return; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +MODULE_LICENSE("http://www.openafs.org/dl/license10.html"); +module_init(afs_init); +module_exit(afs_cleanup); +#endif + + +#if !defined(AFS_LINUX24_ENV) +static long +get_page_offset(void) +{ +#if defined(AFS_PPC_LINUX22_ENV) || defined(AFS_SPARC64_LINUX20_ENV) || defined(AFS_SPARC_LINUX20_ENV) || defined(AFS_ALPHA_LINUX20_ENV) || defined(AFS_S390_LINUX22_ENV) || defined(AFS_IA64_LINUX20_ENV) || defined(AFS_PARISC_LINUX24_ENV) || defined(AFS_AMD64_LINUX20_ENV) || defined(AFS_PPC64_LINUX20_ENV) + return PAGE_OFFSET; +#else + struct task_struct *p, *q; + + /* search backward thru the circular list */ +#if defined(EXPORTED_TASKLIST_LOCK) + read_lock(&tasklist_lock); +#endif + /* search backward thru the circular list */ +#ifdef DEFINED_PREV_TASK + for (q = current; p = q; q = prev_task(p)) { +#else + for (p = current; p; p = p->prev_task) { +#endif + if (p->pid == 1) { +#if defined(EXPORTED_TASKLIST_LOCK) + read_unlock(&tasklist_lock); +#endif + return p->addr_limit.seg; + } + } + +#if defined(EXPORTED_TASKLIST_LOCK) + read_unlock(&tasklist_lock); +#endif + return 0; +#endif +} +#endif /* !AFS_LINUX24_ENV */ diff --git a/src/afs/LINUX24/osi_pag_module.c b/src/afs/LINUX24/osi_pag_module.c new file mode 100644 index 0000000..cf1460d --- /dev/null +++ b/src/afs/LINUX24/osi_pag_module.c @@ -0,0 +1,132 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux module support routines. + * + */ +#include +#include "afs/param.h" + +#include /* early to avoid printf->printk mapping */ +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "h/unistd.h" /* For syscall numbers. */ +#include "h/mm.h" + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif +#ifdef AFS_SPARC64_LINUX20_ENV +#include +#endif + +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +#include +#include +#include +#endif + +static unsigned long nfs_server_addr = 0; +#if defined(module_param) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(nfs_server_addr, long, 0); +#else +MODULE_PARM(nfs_server_addr, "l"); +#endif +MODULE_PARM_DESC(nfs_server_addr, "IP Address of NFS Server"); + +static char *this_cell = 0; +#if defined(module_param_array) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(this_cell, charp, 0); +#else +MODULE_PARM(this_cell, "s"); +#endif +MODULE_PARM_DESC(this_cell, "Local cell name"); + +#if defined(AFS_LINUX24_ENV) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +DEFINE_MUTEX(afs_global_lock); +#else +DECLARE_MUTEX(afs_global_lock); +#endif +struct proc_dir_entry *openafs_procfs; +#else +struct semaphore afs_global_lock = MUTEX; +#endif +int afs_global_owner = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +int __init +afspag_init(void) +#else +int +init_module(void) +#endif +{ +#if !defined(EXPORTED_PROC_ROOT_FS) && defined(AFS_LINUX24_ENV) + char path[64]; +#endif + int err; + + osi_Init(); + + err = osi_syscall_init(); + if (err) + return err; +#ifdef AFS_LINUX24_ENV +#if defined(EXPORTED_PROC_ROOT_FS) + openafs_procfs = proc_mkdir(PROC_FSDIRNAME, proc_root_fs); +#else + sprintf(path, "fs/%s", PROC_FSDIRNAME); + openafs_procfs = proc_mkdir(path, NULL); +#endif + osi_ioctl_init(); +#endif + + afspag_Init(htonl(nfs_server_addr)); + if (this_cell) + afspag_SetPrimaryCell(this_cell); + + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +void __exit +afspag_cleanup(void) +#else +void +cleanup_module(void) +#endif +{ +#if !defined(EXPORTED_PROC_ROOT_FS) && defined(AFS_LINUX24_ENV) + char path[64]; +#endif + osi_syscall_clean(); + + osi_linux_free_afs_memory(); + +#ifdef AFS_LINUX24_ENV + osi_ioctl_clean(); +#if defined(EXPORTED_PROC_ROOT_FS) + remove_proc_entry(PROC_FSDIRNAME, proc_root_fs); +#else + sprintf(path, "fs/%s", PROC_FSDIRNAME); + remove_proc_entry(path, NULL); +#endif +#endif + return; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +MODULE_LICENSE("http://www.openafs.org/dl/license10.html"); +module_init(afspag_init); +module_exit(afspag_cleanup); +#endif diff --git a/src/afs/LINUX24/osi_probe.c b/src/afs/LINUX24/osi_probe.c new file mode 100644 index 0000000..1b670b3 --- /dev/null +++ b/src/afs/LINUX24/osi_probe.c @@ -0,0 +1,1241 @@ +/* + * vi:set cin noet sw=4 tw=70: + * Copyright 2004, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + * + * Portions of this code borrowed from arla under the following terms: + * Copyright (c) 2003-2004 Kungliga Tekniska Högskolan + * (Royal Institute of Technology, Stockholm, Sweden). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL"). + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Code to find the Linux syscall table */ + +#ifdef OSI_PROBE_STANDALONE +#define OSI_PROBE_DEBUG +#endif +#ifndef OSI_PROBE_STANDALONE +#include +#include "afs/param.h" +#endif +#if defined(ENABLE_LINUX_SYSCALL_PROBING) && defined(EXPORTED_INIT_MM) +#ifdef AFS_LINUX24_ENV +#include /* early to avoid printf->printk mapping */ +#ifndef OSI_PROBE_STANDALONE +#include "afs/sysincludes.h" +#include "afsincludes.h" +#endif +#include +#include +#ifdef CONFIG_H_EXISTS +#include +#endif +#include +#include +#include +#include + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif + +/* number of syscalls */ +/* NB: on MIPS we care about the 4xxx range */ +#ifndef NR_syscalls +#define NR_syscalls 222 +#endif + +/* lower bound of valid kernel text pointers */ +#ifdef AFS_IA64_LINUX20_ENV +#define ktxt_lower_bound (((unsigned long)&kernel_thread ) & 0xfff00000L) +#elif defined(AFS_PPC64_LINUX20_ENV) +#define ktxt_lower_bound (KERNELBASE) +#else +#define ktxt_lower_bound (((unsigned long)&kernel_thread ) & ~0xfffffL) +#endif + +/* On SPARC64 and S390X, sys_call_table contains 32-bit entries + * even though pointers are 64 bit quantities. + */ +#if defined(AFS_SPARC64_LINUX20_ENV) || defined(AFS_S390X_LINUX24_ENV) +#define SYSCALLTYPE unsigned int +#define PROBETYPE int +#else +#define SYSCALLTYPE void * +#define PROBETYPE long +#endif + +#if defined(AFS_S390X_LINUX20_ENV) +#define _SS(x) ((x) << 1) +#define _SX(x) ((x) &~ 1) +#else +#define _SS(x) (x) +#define _SX(x) (x) +#endif + +/* Older Linux doesn't have __user. The sys_read prototype needs it. */ +#ifndef __user +#define __user +#endif + +/* Allow the user to specify sys_call_table addresses */ +static unsigned long sys_call_table_addr[4] = { 0,0,0,0 }; +#if defined(module_param_array) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param_array(sys_call_table_addr, long, NULL, 0); +#else +MODULE_PARM(sys_call_table_addr, "1-4l"); +#endif +MODULE_PARM_DESC(sys_call_table_addr, "Location of system call tables"); + +/* If this is set, we are more careful about avoiding duplicate matches */ +static int probe_carefully = 1; +#if defined(module_param) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(probe_carefully, int, 0); +#else +MODULE_PARM(probe_carefully, "i"); +#endif +MODULE_PARM_DESC(probe_carefully, "Probe for system call tables carefully"); + +static int probe_ignore_syscalls[8] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +#if defined(module_param_array) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param_array(probe_ignore_syscalls, int, NULL, 0); +#else +MODULE_PARM(probe_ignore_syscalls, "1-8i"); +#endif +MODULE_PARM_DESC(probe_ignore_syscalls, "Syscalls to ignore in table checks"); + +#ifdef OSI_PROBE_DEBUG +/* + * Debugging flags: + * 0x0001 - General debugging + * 0x0002 - detail - try + * 0x0004 - detail - try_harder + * 0x0008 - detail - check_table + * 0x0010 - detail - check_harder + * 0x0020 - detail - check_harder/zapped + * 0x0040 - automatically ignore setgroups and afs_syscall + * 0x0080 - detail - check_table_readable + */ +static int probe_debug = 0x41; +#if defined(module_param) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(probe_debug, int, 0); +#else +MODULE_PARM(probe_debug, "i"); +#endif +MODULE_PARM_DESC(probe_debug, "Debugging level"); + +static unsigned long probe_debug_addr[4] = { 0,0,0,0 }; +#if defined(module_param_array) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param_array(probe_debug_addr, long, NULL, 0); +#else +MODULE_PARM(probe_debug_addr, "1-4l"); +#endif +MODULE_PARM_DESC(probe_debug_addr, "Debug range starting locations"); + +static unsigned long probe_debug_range = 0; +#if defined(module_param) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(probe_debug_range, long, 0); +#else +MODULE_PARM(probe_debug_range, "l"); +#endif +MODULE_PARM_DESC(probe_debug_range, "Debug range length"); + +static unsigned long probe_debug_tag = 0; +#if defined(module_param) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) +module_param(probe_debug_tag, long, 0); +#else +MODULE_PARM(probe_debug_tag, "l"); +#endif +MODULE_PARM_DESC(probe_debug_tag, "Debugging output start tag"); +#endif + + +/* Weak references are our friends. They are supported by the in-kernel + * linker in Linux 2.6 and by all versions of modutils back to 2.2pre1. + * A weak reference not satisified by the kernel will have value zero. + * + * Unfortunately, weak references to functions don't work right on + * IA64; specifically, if you actually try to make a call through + * such a reference, and the symbol doesn't exist in the kernel, then + * the module relocation code will oops. A workaround for this is + * probably possible, but the use of kallsyms_* is of limited value, + * so I'm not bothing with the effort for now. + * -- jhutz, 10-Feb-2005 + */ +#ifdef OSI_PROBE_KALLSYMS +extern int kallsyms_symbol_to_address(char *name, unsigned long *token, + char **mod_name, + unsigned long *mod_start, + unsigned long *mod_end, + char **sec_name, + unsigned long *sec_start, + unsigned long *sec_end, + char **sym_name, + unsigned long *sym_start, + unsigned long *sym_end + ) __attribute__((weak)); + +extern int kallsyms_address_to_symbol(unsigned long address, + char **mod_name, + unsigned long *mod_start, + unsigned long *mod_end, + char **sec_name, + unsigned long *sec_start, + unsigned long *sec_end, + char **sym_name, + unsigned long *sym_start, + unsigned long *sym_end + ) __attribute__((weak)); +#endif + +extern SYSCALLTYPE sys_call_table[] __attribute__((weak)); +extern SYSCALLTYPE ia32_sys_call_table[] __attribute__((weak)); +extern SYSCALLTYPE sys_call_table32[] __attribute__((weak)); +extern SYSCALLTYPE sys_call_table_emu[] __attribute__((weak)); + +extern asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) __attribute__((weak)); +extern asmlinkage long sys_close(unsigned int) __attribute__((weak)); +#if defined(EXPORTED_SYS_CHDIR) +extern asmlinkage long sys_chdir(const char *) __attribute__((weak)); +#endif +extern asmlinkage ssize_t sys_write(unsigned int, const char *, size_t) __attribute__((weak)); +extern asmlinkage long sys_wait4(pid_t, unsigned int *, int, struct rusage *) __attribute__((weak)); +extern asmlinkage long sys_exit (int) __attribute__((weak)); +#if defined(EXPORTED_SYS_OPEN) +extern asmlinkage long sys_open (const char *, int, int) __attribute__((weak)); +#endif +extern asmlinkage long sys_ioctl(unsigned int, unsigned int, unsigned long) __attribute__((weak)); + + +/* Structures used to control probing. We put all the details of which + * symbols we're interested in, what syscall functions to look for, etc + * into tables, so we can then have a single copy of the functions that + * actually do the work. + */ +typedef struct { + char *name; + int NR1; + void *fn1; + int NR2; + void *fn2; + int NR3; + void *fn3; +} tryctl; + +typedef struct { + char *symbol; /* symbol name */ + char *desc; /* description for messages */ + int offset; /* first syscall number in table */ + + void *weak_answer; /* weak symbol ref */ + void *parm_answer; /* module parameter answer */ + void *debug_answer; /* module parameter answer */ + unsigned long given_answer; /* compiled-in answer, if any */ + + tryctl *trylist; /* array of combinations to try */ + + unsigned long try_sect_sym; /* symbol in section to try scanning */ + unsigned long try_base; /* default base address for scan */ + unsigned long try_base_mask; /* base address bits to force to zero */ + unsigned long try_length; /* default length for scan */ + + unsigned long alt_try_sect_sym; /* symbol in section to try scanning */ + unsigned long alt_try_base; /* default base address for scan */ + unsigned long alt_try_base_mask; /* base address bits to force to zero */ + unsigned long alt_try_length; /* default length for scan */ + + int n_zapped_syscalls; /* number of unimplemented system calls */ + int *zapped_syscalls; /* list of unimplemented system calls */ + + int n_unique_syscalls; /* number of unique system calls */ + int *unique_syscalls; /* list of unimplemented system calls */ + + int verifyNR; /* syscall number to verify match */ + void *verify_fn; /* syscall pointer to verify match */ + + int debug_ignore_NR[4]; /* syscalls to ignore for debugging */ +} probectl; + +/********** Probing Configuration: sys_call_table **********/ + +/* syscall pairs/triplets to probe */ +/* On PPC64 and SPARC64, we need to omit the ones that might match both tables */ +static tryctl main_try[] = { +#if !defined(AFS_PPC64_LINUX20_ENV) && !defined(AFS_SPARC64_LINUX20_ENV) +#if defined(EXPORTED_SYS_CHDIR) + { "scan: close+chdir+write", __NR_close, &sys_close, __NR_chdir, &sys_chdir, __NR_write, &sys_write }, +#endif +#endif + { "scan: close+wait4", __NR_close, &sys_close, __NR_wait4, &sys_wait4, -1, 0 }, +#if !defined(AFS_PPC64_LINUX20_ENV) && !defined(AFS_SPARC64_LINUX20_ENV) +#if defined(EXPORTED_SYS_CHDIR) + { "scan: close+chdir", __NR_close, &sys_close, __NR_chdir, &sys_chdir, -1, 0 }, +#endif +#endif + { "scan: close+ioctl", __NR_close, &sys_close, __NR_ioctl, &sys_ioctl, -1, 0 }, +#if defined(EXPORTED_SYS_OPEN) + { "scan: exit+open", __NR_exit, &sys_exit, __NR_open, &sys_open, -1, 0 }, +#endif + { 0 } +}; + +/* zapped syscalls for try_harder */ +/* this list is based on the table in 'zapped_syscalls' */ + +static int main_zapped_syscalls[] = { +/* + * SPARC-Linux uses syscall number mappings chosen to be compatible + * with SunOS. So, it doesn't have any of the traditional calls or + * the new STREAMS ones. However, there are a number of syscalls + * which are SunOS-specific (not implemented on Linux), i386-specific + * (not implemented on SPARC-Linux), or implemented only on one of + * sparc32 or sparc64. Of course, there are no __NR macros for most + * of these. + * + * Note that the calls we list here are implemented by sys_nis_syscall, + * not by sys_ni_syscall. That means we have to exclude all of the + * other entries, or we might get a sys_ni_syscall into the list and + * the test would no longer work. + */ +#if defined(AFS_SPARC64_LINUX20_ENV) + /* mmap2, fstat64, getmsg, putmsg, modify_ldt */ + 56, 63, 151, 152, 218, +#elif defined(AFS_SPARC_LINUX20_ENV) + /* memory_ordering, getmsg, putmsg, unimplemented, modify_ldt */ + 52, 151, 152, 164, 218, +#else /* !AFS_SPARC_LINUX20_ENV */ + +/* + * These 7 syscalls are present in the syscall table on most "older" + * platforms that use the traditional syscall number mappings. They + * are not implemented on any platform. + */ +#ifdef __NR_break + __NR_break, +#endif +#ifdef __NR_stty + __NR_stty, +#endif +#ifdef __NR_gtty + __NR_gtty, +#endif +#ifdef __NR_ftime + __NR_ftime, +#endif +#ifdef __NR_prof + __NR_prof, +#endif +#ifdef __NR_lock + __NR_lock, +#endif +#ifdef __NR_mpx + __NR_mpx, +#endif +/* + * On s390 and arm (but not arm26), the seven traditional unimplemented + * system calls are indeed present and unimplemented. However, the + * corresponding __NR macros are not defined, so the tests above fail. + * Instead, we just have to know the numbers for these. + */ +#if defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + /* break, stty, gtty, ftime, prof, lock, mpx */ + 17, 31, 32, 35, 44, 53, 56, +#endif + +/* + * Sadly, some newer platforms like IA64, amd64, and PA-RISC don't have + * the traditional numbers, so the list above are not helpful. They + * do have entries for getpmsg/putpmsg, which are always unimplemented. + */ +#ifdef __NR_getpmsg + __NR_getpmsg, +#endif +#ifdef __NR_putpmsg + __NR_putpmsg, +#endif + +/* + * Alpha-Linux uses syscall number mappings chosen to be compatible + * with OSF/1. So, it doesn't have any of the traditional calls or + * the new STREAMS ones, but it does have several OSF/1-specific + * syscalls which are not implemented on Linux. These don't exist on + * any other platform. + */ +#ifdef __NR_osf_syscall + __NR_osf_syscall, +#endif +#ifdef __NR_osf_profil + __NR_osf_profil, +#endif +#ifdef __NR_osf_reboot + __NR_osf_reboot, +#endif +#ifdef __NR_osf_kmodcall + __NR_osf_kmodcall, +#endif +#ifdef __NR_osf_old_vtrace + __NR_osf_old_vtrace, +#endif + +/* + * On PPC64, we need a couple more entries to distinguish the two + * tables, since the system call numbers are the same and the sets of + * unimplemented calls are very similar. + * mmap2 and fstat64 are implemented only for 32-bit calls + */ +#ifdef AFS_PPC64_LINUX20_ENV + /* _mmap2, _fstat64 */ + 192, 197, +#endif /* AFS_PPC64_LINUX20_ENV */ + +/* Similarly for S390X, with lcown16 and fstat64 */ +#ifdef AFS_S390X_LINUX20_ENV + /* lchown16, fstat64 */ + 16, 197, +#endif +#endif /* !AFS_SPARC_LINUX20_ENV */ + 0 +}; + +/* unique syscalls for try_harder */ +static int main_unique_syscalls[] = { +#if defined(AFS_SPARC64_LINUX24_ENV) || defined(AFS_SPARC_LINUX24_ENV) + /* + * On SPARC, we need some additional unique calls to make sure + * we don't match the SunOS-compatibility table. + */ + __NR_sgetmask, __NR_ssetmask, +#endif + __NR_exit, __NR_mount, __NR_read, __NR_write, + __NR_open, __NR_close, __NR_unlink +}; + +/* probe control structure */ +static probectl main_probe = { + /* symbol name and description */ + "sys_call_table", + "system call table", + + /* syscall number of first entry in table */ +#ifdef AFS_IA64_LINUX20_ENV + 1024, +#else + 0, +#endif + + sys_call_table, /* weak symbol ref */ + 0, 0, /* module parameter answers */ +#ifdef AFS_LINUX_sys_call_table + AFS_LINUX_sys_call_table, /* compiled-in answer, if any */ +#else + 0, +#endif + + main_try, /* array of combinations to try */ + + /* symbol in section to try scanning */ +#if defined(AFS_SPARC64_LINUX20_ENV) || defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + (unsigned long)&sys_close, +#elif defined(AFS_AMD64_LINUX20_ENV) + /* On this platform, it's in a different section! */ + (unsigned long)&tasklist_lock, +#else + (unsigned long)&init_mm, +#endif + + /* default base address for scan */ + /* base address bits to force to zero */ + /* default length for scan */ +#if defined(AFS_SPARC64_LINUX20_ENV) + (unsigned long)(&sys_close), + 0xfffff, + 0x10000, +#elif defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + /* bleah; this is so suboptimal */ + (unsigned long)(&sys_close), + 0xfffff, + 0x20000, +#elif defined(AFS_IA64_LINUX20_ENV) + (unsigned long)(&init_mm), + 0x1fffff, + 0x30000, +#elif defined(AFS_AMD64_LINUX20_ENV) + (unsigned long)(&tasklist_lock) - 0x30000, + 0, + 0x6000, +#elif defined(AFS_PPC_LINUX20_ENV) || defined(AFS_PPC_LINUX20_ENV) + (unsigned long)&init_mm, + 0xffff, + 16384, +#else + (unsigned long)&init_mm, + 0, + 16384, +#endif + + 0, 0, 0, 0, + + /* number and list of unimplemented system calls */ + ((sizeof(main_zapped_syscalls)/sizeof(main_zapped_syscalls[0])) - 1), + main_zapped_syscalls, + + /* number and list of unique system calls */ + (sizeof(main_unique_syscalls)/sizeof(main_unique_syscalls[0])), + main_unique_syscalls, + + /* syscall number and pointer to verify match */ + __NR_close, &sys_close, + + /* syscalls to ignore for debugging */ + { +#if defined(AFS_ALPHA_LINUX20_ENV) + 338, +#elif defined(AFS_AMD64_LINUX20_ENV) + 183, +#elif defined(AFS_IA64_LINUX20_ENV) + 1141, +#elif defined(AFS_SPARC_LINUX20_ENV) || defined(AFS_SPARC64_LINUX20_ENV) + 227, +#else + 137, +#endif + __NR_setgroups, +#ifdef __NR_setgroups32 + __NR_setgroups32, +#else + -1, +#endif + -1, + } +}; + + +/********** Probing Configuration: amd64 ia32_sys_call_table **********/ +#if defined(AFS_AMD64_LINUX20_ENV) + +/* syscall pairs/triplets to probe */ +static tryctl ia32_try[] = { +#if defined(EXPORTED_SYS_CHDIR) + { "scan: close+chdir+write", __NR_ia32_close, &sys_close, __NR_ia32_chdir, &sys_chdir, __NR_ia32_write, &sys_write }, + { "scan: close+chdir", __NR_ia32_close, &sys_close, __NR_ia32_chdir, &sys_chdir, -1, 0 }, +#endif + { 0 } +}; + +/* zapped syscalls for try_harder */ +static int ia32_zapped_syscalls[] = { + __NR_ia32_break, __NR_ia32_stty, __NR_ia32_gtty, __NR_ia32_ftime, + __NR_ia32_prof, __NR_ia32_lock, __NR_ia32_mpx, + 0 +}; + +/* unique syscalls for try_harder */ +static int ia32_unique_syscalls[] = { + __NR_ia32_exit, __NR_ia32_mount, __NR_ia32_read, __NR_ia32_write, + __NR_ia32_open, __NR_ia32_close, __NR_ia32_unlink +}; + +/* probe control structure */ +static probectl ia32_probe = { + /* symbol name and description */ + "ia32_sys_call_table", + "32-bit system call table", + + /* syscall number of first entry in table */ + 0, + + ia32_sys_call_table, /* weak symbol ref */ + 0, 0, /* module parameter answers */ +#ifdef AFS_LINUX_ia32_sys_call_table + AFS_LINUX_ia32_sys_call_table,/* compiled-in answer, if any */ +#else + 0, +#endif + + ia32_try, /* array of combinations to try */ + + /* symbol in section to try scanning */ + (unsigned long)&init_mm, + + /* default base address for scan */ + /* base address bits to force to zero */ + /* default length for scan */ + (unsigned long)&init_mm, + 0, + (0x180000 / sizeof(unsigned long *)), + + 0, 0, 0, 0, + + + /* number and list of unimplemented system calls */ + ((sizeof(ia32_zapped_syscalls)/sizeof(ia32_zapped_syscalls[0])) - 1), + ia32_zapped_syscalls, + + /* number and list of unique system calls */ + (sizeof(ia32_unique_syscalls)/sizeof(ia32_unique_syscalls[0])), + ia32_unique_syscalls, + + /* syscall number and pointer to verify match */ + __NR_ia32_close, &sys_close, + + /* syscalls to ignore for debugging */ + { + 137, + __NR_ia32_setgroups, + __NR_ia32_setgroups32, + -1, + } +}; + +static probectl *probe_list[] = { + &main_probe, &ia32_probe +}; + + +/********** Probing Configuration: IA64 **********/ +#elif defined(AFS_IA64_LINUX20_ENV) +struct fptr { + void *ip; + unsigned long gp; +}; + +/* no 32-bit support on IA64 for now */ +static probectl *probe_list[] = { + &main_probe +}; + + +/********** Probing Configuration: ppc64, sparc64 sys_call_table32 **********/ +#elif defined(AFS_PPC64_LINUX20_ENV) || defined(AFS_SPARC64_LINUX20_ENV) +struct fptr { + void *ip; + unsigned long gp; +}; + +/* + * syscall pairs/triplets to probe + * This has to be empty, because anything that would work will + * also match the main table, and that's no good. + */ +static tryctl sct32_try[] = { + { 0 } +}; + +/* zapped syscalls for try_harder */ +static int sct32_zapped_syscalls[] = { +#ifdef AFS_PPC64_LINUX20_ENV + /* These should be sufficient */ + __NR_break, __NR_stty, __NR_gtty, __NR_ftime, + __NR_prof, __NR_lock, __NR_mpx, +#endif +#ifdef AFS_SPARC64_LINUX20_ENV + /* memory_ordering, getmsg, putmsg, unimplemented, modify_ldt */ + 52, 151, 152, 164, 218, +#endif + 0 +}; + +/* unique syscalls for try_harder */ +/* mmap2 and fstat64 are implemented only for 32-bit calls */ +static int sct32_unique_syscalls[] = { +#ifdef AFS_PPC64_LINUX20_ENV + /* _mmap2, _fstat64 */ + 192, 197, +#endif +#ifdef AFS_SPARC64_LINUX24_ENV + /* + * On SPARC, we need some additional unique calls to make sure + * we don't match the SunOS-compatibility table. + */ + __NR_sgetmask, __NR_ssetmask, +#endif + __NR_exit, __NR_mount, __NR_read, __NR_write, + __NR_open, __NR_close, __NR_unlink +}; + +/* probe control structure */ +static probectl sct32_probe = { + /* symbol name and description */ + "sys_call_table32", + "32-bit system call table", + + /* syscall number of first entry in table */ + 0, + + sys_call_table32, /* weak symbol ref */ + 0, 0, /* module parameter answers */ +#ifdef AFS_LINUX_sys_call_table32 + AFS_LINUX_sys_call_table32, /* compiled-in answer, if any */ +#else + 0, +#endif + + sct32_try, /* array of combinations to try */ + + /* symbol in section to try scanning */ +#if defined(AFS_SPARC64_LINUX20_ENV) + (unsigned long)&sys_close, +#else + (unsigned long)&init_mm, +#endif + + /* default base address for scan */ + /* base address bits to force to zero */ + /* default length for scan */ +#if defined(AFS_SPARC64_LINUX20_ENV) + (unsigned long)(&sys_close), + 0xfffff, + 0x10000, +#else + (unsigned long)&init_mm, + 0, + 16384, +#endif + + 0, 0, 0, 0, + + /* number and list of unimplemented system calls */ + ((sizeof(sct32_zapped_syscalls)/sizeof(sct32_zapped_syscalls[0])) - 1), + sct32_zapped_syscalls, + + /* number and list of unique system calls */ + (sizeof(sct32_unique_syscalls)/sizeof(sct32_unique_syscalls[0])), + sct32_unique_syscalls, + + /* syscall number and pointer to verify match */ + __NR_close, &sys_close, + + /* syscalls to ignore for debugging */ + { +#if defined(AFS_SPARC64_LINUX20_ENV) + 227, +#else + 137, +#endif + __NR_setgroups, + -1, + -1, + } +}; + +static probectl *probe_list[] = { + &main_probe, &sct32_probe +}; + + +/********** End of Probing Configuration **********/ + +#else /* no per-platform probe control, so use the default list */ +static probectl *probe_list[] = { + &main_probe +}; +#endif + +#define N_PROBE_LIST (sizeof(probe_list) / sizeof(*probe_list)) +#define DEBUG_IN_RANGE(P,x) (!probe_debug_range || \ + (P->debug_answer && \ + (unsigned long)(x) >= (unsigned long)P->debug_answer && \ + (unsigned long)(x) < (unsigned long)P->debug_answer + probe_debug_range)) + + + +static int check_table(probectl *P, PROBETYPE *ptr) +{ + PROBETYPE *x; + int i, j; + + for (x = ptr, i = 0; i < _SS(NR_syscalls); i++, x++) { +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0040) { + for (j = 0; j < 4; j++) { + if (_SS(P->debug_ignore_NR[j]) == _SX(i + P->offset)) break; + } + if (j < 4) continue; + } +#endif + for (j = 0; j < 8; j++) { + if (_SS(probe_ignore_syscalls[j]) == _SX(i) + P->offset) break; + } + if (j < 8) continue; + if (*x <= ktxt_lower_bound) { +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0008) && DEBUG_IN_RANGE(P,ptr)) + printk("<7>check 0x%lx -> %d [0x%lx]\n", + (unsigned long)ptr, i, (unsigned long)*x); +#endif + return i; + } + } +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0008) && DEBUG_IN_RANGE(P,ptr)) + printk("<7>check 0x%lx -> ok\n", (unsigned long)ptr); +#endif + return -1; +} + +static void *try(probectl *P, tryctl *T, PROBETYPE *aptr, + unsigned long datalen) +{ +#ifdef OSI_PROBE_KALLSYMS + char *mod_name, *sec_name, *sym_name; + unsigned long mod_start, mod_end; + unsigned long sec_start, sec_end; + unsigned long sym_start, sym_end; +#endif + unsigned long offset, ip1, ip2, ip3; + int ret; + PROBETYPE *ptr; + +#if defined(AFS_IA64_LINUX20_ENV) || defined(AFS_PPC64_LINUX20_ENV) + ip1 = T->fn1 ? (unsigned long)((struct fptr *)T->fn1)->ip : 0; + ip2 = T->fn2 ? (unsigned long)((struct fptr *)T->fn2)->ip : 0; + ip3 = T->fn3 ? (unsigned long)((struct fptr *)T->fn3)->ip : 0; +#else + ip1 = (unsigned long)T->fn1; + ip2 = (unsigned long)T->fn2; + ip3 = (unsigned long)T->fn3; +#endif + +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0001) + printk("<7>osi_probe: %s %s (%d->0x%lx, %d->0x%lx, %d->0x%lx)\n", + P->symbol, T->name, T->NR1, ip1, T->NR2, ip2, T->NR3, ip3); +#endif + + if (!ip1 || !ip2 || (T->NR3 >= 0 && !ip3)) + return 0; + + for (offset = 0; offset < datalen; offset++, aptr++) { +#if defined(AFS_PPC64_LINUX20_ENV) + ptr = (PROBETYPE*)(*aptr); + if ((unsigned long)ptr <= KERNELBASE) { + continue; + } +#else + ptr = aptr; +#endif + if ((unsigned long)ptr < init_mm.start_code || +#if defined(AFS_AMD64_LINUX20_ENV) + (unsigned long)ptr > init_mm.brk) +#else + (unsigned long)ptr > init_mm.end_data) +#endif + { +/* printk("address 0x%lx (from 0x%lx %d) is out of range in check_table. wtf?\n", (unsigned long)x, (unsigned long)ptr, i);*/ + continue; + } + + ret = check_table(P, ptr); + if (ret >= 0) { + /* return value is number of entries to skip */ + aptr += ret; + offset += ret; + continue; + } + +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0002) && DEBUG_IN_RANGE(P,ptr)) + printk("<7>try 0x%lx\n", (unsigned long)ptr); +#endif + if (ptr[_SS(T->NR1 - P->offset)] != ip1) continue; + if (ptr[_SS(T->NR2 - P->offset)] != ip2) continue; + if (ip3 && ptr[_SS(T->NR3 - P->offset)] != ip3) continue; + +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0002) + printk("<7>try found 0x%lx\n", (unsigned long)ptr); +#endif +#ifdef OSI_PROBE_KALLSYMS + if (kallsyms_address_to_symbol) { + ret = kallsyms_address_to_symbol((unsigned long)ptr, + &mod_name, &mod_start, &mod_end, + &sec_name, &sec_start, &sec_end, + &sym_name, &sym_start, &sym_end); + if (!ret || strcmp(sym_name, P->symbol)) continue; + } +#endif + /* XXX should we make sure there is only one match? */ + return (void *)ptr; + } + return 0; +} + + +static int check_harder(probectl *P, PROBETYPE *p) +{ + unsigned long ip1; + int i, s; + + /* Check zapped syscalls */ + for (i = 1; i < P->n_zapped_syscalls; i++) { + if (p[_SS(P->zapped_syscalls[i])] != p[_SS(P->zapped_syscalls[0])]) { +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0020) && DEBUG_IN_RANGE(P,p)) + printk("<7>check_harder 0x%lx zapped failed i=%d\n", (unsigned long)p, i); +#endif + return 0; + } + } + + /* Check unique syscalls */ + for (i = 0; i < P->n_unique_syscalls; i++) { + for (s = 0; s < NR_syscalls; s++) { + if (p[_SS(s)] == p[_SS(P->unique_syscalls[i])] + && s != P->unique_syscalls[i]) { +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0010) && DEBUG_IN_RANGE(P,p)) + printk("<7>check_harder 0x%lx unique failed i=%d s=%d\n", (unsigned long)p, i, s); +#endif + return 0; + } + } + } + +#if defined(AFS_IA64_LINUX20_ENV) || defined(AFS_PPC64_LINUX20_ENV) + ip1 = P->verify_fn ? (unsigned long)((struct fptr *)(P->verify_fn))->ip : 0; +#else + ip1 = (unsigned long)(P->verify_fn); +#endif + + if (ip1 && p[_SS(P->verifyNR - P->offset)] != ip1) { +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0010) && DEBUG_IN_RANGE(P,p)) + printk("<7>check_harder 0x%lx verify failed\n", (unsigned long)p); +#endif + return 0; + } + +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0010) && DEBUG_IN_RANGE(P,p)) + printk("<7>check_harder 0x%lx success!\n", (unsigned long)p); +#endif + return 1; +} + +static void *try_harder(probectl *P, PROBETYPE *ptr, unsigned long datalen) +{ +#ifdef OSI_PROBE_KALLSYMS + char *mod_name, *sec_name, *sym_name; + unsigned long mod_start, mod_end; + unsigned long sec_start, sec_end; + unsigned long sym_start, sym_end; +#endif + unsigned long offset; + void *match = 0; + int ret; + +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0001) + printk("<7>osi_probe: %s try_harder\n", P->symbol); +#endif + for (offset = 0; offset < datalen; offset++, ptr++) { + if ((unsigned long)ptr < init_mm.start_code || +#if defined(AFS_AMD64_LINUX20_ENV) + (unsigned long)ptr > init_mm.brk) +#else + (unsigned long)ptr > init_mm.end_data) +#endif + { +/* printk("address 0x%lx (from 0x%lx %d) is out of range in check_table. wtf?\n", (unsigned long)x, (unsigned long)ptr, i);*/ + continue; + } + ret = check_table(P, ptr); + if (ret >= 0) { + /* return value is number of entries to skip */ + ptr += ret; + offset += ret; + continue; + } + +#ifdef OSI_PROBE_DEBUG + if ((probe_debug & 0x0004) && DEBUG_IN_RANGE(P,ptr)) + printk("<7>try_harder 0x%lx\n", (unsigned long)ptr); +#endif + if (!check_harder(P, ptr)) + continue; + +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0004) + printk("<7>try_harder found 0x%lx\n", (unsigned long)ptr); +#endif + +#ifdef OSI_PROBE_KALLSYMS + if (kallsyms_address_to_symbol) { + ret = kallsyms_address_to_symbol((unsigned long)ptr, + &mod_name, &mod_start, &mod_end, + &sec_name, &sec_start, &sec_end, + &sym_name, &sym_start, &sym_end); + if (!ret || strcmp(sym_name, P->symbol)) continue; + } +#endif + + if (match) { +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0005) + printk("<7>%s: try_harder found multiple matches!\n", P->symbol); +#endif + return 0; + } + + match = (void *)ptr; + if (!probe_carefully) + break; + } + return match; +} + + +#ifdef OSI_PROBE_DEBUG +#define check_result(x,m) do { \ + if (probe_debug & 0x0001) { \ + printk("<7>osi_probe: %s = 0x%016lx %s\n", P->symbol, (unsigned long)(x), (m)); \ + } \ + if ((x) && ((int)(x)) != -ENOENT) { \ + *method = (m); \ + final_answer = (void *)(x); \ + } \ +} while (0) +#else +#define check_result(x,m) do { \ + if ((x) && ((int)(x)) != -ENOENT) { \ + *method = (m); \ + return (void *)(x); \ + } \ +} while (0) +#endif +static void *scan_for_syscall_table(probectl *P, PROBETYPE *B, unsigned long L) +{ + tryctl *T; + void *answer; +#if defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + void *answer2; +#endif +#ifdef OSI_PROBE_DEBUG + void *final_answer = 0; +#endif +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0007) + printk("<7>osi_probe: %s base=0x%lx, len=0x%lx\n", + P->symbol, (unsigned long)B, L); + if (probe_debug & 0x0009) { + printk("<7>osi_probe: %s ktxt_lower_bound=0x%lx\n", + P->symbol, ktxt_lower_bound); + printk("<7>osi_probe: %s NR_syscalls=%d\n", + P->symbol, NR_syscalls); + } +#endif + + for (T = P->trylist; T->name; T++) { + answer = try(P, T, B, L); +#if defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + answer2 = try(P, T, (PROBETYPE *)(2 + (void *)B), L); +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0003) { + printk("<7>osi_probe: %s = 0x%016lx %s (even)\n", + P->symbol, (unsigned long)(answer), T->name); + printk("<7>osi_probe: %s = 0x%016lx %s (odd)\n", + P->symbol, (unsigned long)(answer2), T->name); + } +#endif + if (answer && answer2) answer = 0; + else if (answer2) answer = answer2; +#endif + if (answer) + return answer; + } + + /* XXX more checks here */ + + answer = try_harder(P, B, L); +#if defined(AFS_S390_LINUX20_ENV) || defined(AFS_S390X_LINUX20_ENV) + answer2 = try_harder(P, (PROBETYPE *)(2 + (void *)B), L); +#ifdef OSI_PROBE_DEBUG + if (probe_debug & 0x0005) { + printk("<7>osi_probe: %s = 0x%016lx pattern scan (even)\n", + P->symbol, (unsigned long)(answer)); + printk("<7>osi_probe: %s = 0x%016lx pattern scan (odd)\n", + P->symbol, (unsigned long)(answer2)); + } +#endif + if (answer && answer2) answer = 0; + else if (answer2) answer = answer2; +#endif + return answer; +} + +static void *do_find_syscall_table(probectl *P, char **method) +{ +#ifdef OSI_PROBE_KALLSYMS + char *mod_name, *sec_name, *sym_name; + unsigned long mod_start, mod_end; + unsigned long sec_start, sec_end; + unsigned long sym_start, sym_end; + unsigned long token; + int ret; +#endif + PROBETYPE *B; + unsigned long L; + void *answer; +#ifdef OSI_PROBE_DEBUG + void *final_answer = 0; +#endif + + *method = "not found"; + + /* if it's exported, there's nothing to do */ + check_result(P->weak_answer, "exported"); + + /* ask the kernel to do the name lookup, if it's willing */ +#ifdef OSI_PROBE_KALLSYMS + if (kallsyms_symbol_to_address) { + token = 0; + sym_start = 0; + do { + ret = kallsyms_symbol_to_address(P->symbol, &token, + &mod_name, &mod_start, &mod_end, + &sec_name, &sec_start, &sec_end, + &sym_name, &sym_start, &sym_end); + if (ret && !strcmp(mod_name, "kernel") && sym_start) + break; + sym_start = 0; + } while (ret); + check_result(sym_start, "kallsyms_symbol_to_address"); + } +#endif + + /* Maybe a little birdie told us */ + check_result(P->parm_answer, "module parameter"); + check_result(P->given_answer, "compiled-in"); + + /* OK, so we have to scan. */ + B = (PROBETYPE *)((P->try_base) & ~(P->try_base_mask)); + L = P->try_length; + /* Now, see if the kernel will tell us something better than the default */ +#ifdef OSI_PROBE_KALLSYMS + if (kallsyms_address_to_symbol) { + ret = kallsyms_address_to_symbol(P->try_sect_sym, + &mod_name, &mod_start, &mod_end, + &sec_name, &sec_start, &sec_end, + &sym_name, &sym_start, &sym_end); + if (ret) { + B = (PROBETYPE *)sec_start; + L = (sec_end - sec_start) / sizeof(unsigned long); + } + } +#endif + + answer = scan_for_syscall_table(P, B, L); + check_result(answer, "pattern scan"); + B = (PROBETYPE *)((P->alt_try_base) & ~(P->alt_try_base_mask)); + L = P->alt_try_length; + /* Now, see if the kernel will tell us something better than the default */ +#ifdef OSI_PROBE_KALLSYMS + if (kallsyms_address_to_symbol && P->alt_try_sect_sym) { + ret = kallsyms_address_to_symbol(P->alt_try_sect_sym, + &mod_name, &mod_start, &mod_end, + &sec_name, &sec_start, &sec_end, + &sym_name, &sym_start, &sym_end); + if (ret) { + B = (PROBETYPE *)sec_start; + L = (sec_end - sec_start) / sizeof(unsigned long); + } + } +#endif + if (B && L) { + answer = scan_for_syscall_table(P, B, L); + check_result(answer, "pattern scan"); + } +#ifdef OSI_PROBE_DEBUG + return final_answer; +#else + return 0; +#endif +} + +void *osi_find_syscall_table(int which) +{ + probectl *P; + void *answer; + char *method; + + if (which < 0 || which >= N_PROBE_LIST) { + printk("error - afs_find_syscall_table called with invalid index!\n"); + return 0; + } + P = probe_list[which]; + if (which < 4) { + P->parm_answer = (void *)sys_call_table_addr[which]; +#ifdef OSI_PROBE_DEBUG + P->debug_answer = (void *)probe_debug_addr[which]; +#endif + } + answer = do_find_syscall_table(P, &method); + if (!answer) { + printk("Warning: failed to find address of %s\n", P->desc); + printk("System call hooks will not be installed; proceeding anyway\n"); + return 0; + } + printk("Found %s at 0x%lx (%s)\n", P->desc, (unsigned long)answer, method); + return answer; +} + + +#ifdef OSI_PROBE_STANDALONE +int __init osi_probe_init(void) +{ + int i; + + if (!probe_debug_tag) probe_debug_tag = jiffies; + printk("*** osi_probe %ld debug = 0x%04x ***\n", + probe_debug_tag, probe_debug); + for (i = 0; i < N_PROBE_LIST; i++) + (void)osi_find_syscall_table(i); + return 0; +} + +void osi_probe_exit(void) { } + +module_init(osi_probe_init); +module_exit(osi_probe_exit); +#endif +#endif +#else +void *osi_find_syscall_table(int which) +{ + return 0; +} +#endif /* EXPORTED_INIT_MM */ diff --git a/src/afs/LINUX24/osi_proc.c b/src/afs/LINUX24/osi_proc.c new file mode 100644 index 0000000..6e370be --- /dev/null +++ b/src/afs/LINUX24/osi_proc.c @@ -0,0 +1,369 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux module support routines. + * + */ +#include +#include "afs/param.h" + + +#include /* early to avoid printf->printk mapping */ +#ifdef HAVE_KERNEL_LINUX_SEQ_FILE_H +#include +#endif +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "afs/nfsclient.h" +#include "h/unistd.h" /* For syscall numbers. */ +#include "h/mm.h" + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif + +#include +#include +#include +#include +#include + +struct proc_dir_entry *openafs_procfs; + +#ifdef HAVE_KERNEL_LINUX_SEQ_FILE_H +static void *c_start(struct seq_file *m, loff_t *pos) +{ + struct afs_q *cq, *tq; + loff_t n = 0; + + AFS_GLOCK(); + ObtainReadLock(&afs_xcell); + for (cq = CellLRU.next; cq != &CellLRU; cq = tq) { + tq = QNext(cq); + + if (n++ == *pos) + break; + } + if (cq == &CellLRU) + cq = NULL; + + AFS_GUNLOCK(); + return cq; +} + +static void *c_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct afs_q *cq = p, *tq; + + AFS_GLOCK(); + (*pos)++; + tq = QNext(cq); + + if (tq == &CellLRU) + return NULL; + + AFS_GUNLOCK(); + return tq; +} + +static void c_stop(struct seq_file *m, void *p) +{ + AFS_GLOCK(); + ReleaseReadLock(&afs_xcell); + AFS_GUNLOCK(); +} + +static int c_show(struct seq_file *m, void *p) +{ + struct afs_q *cq = p; + struct cell *tc = QTOC(cq); + int j; + + seq_printf(m, ">%s #(%d/%d)\n", tc->cellName, + tc->cellNum, tc->cellIndex); + + for (j = 0; j < MAXCELLHOSTS; j++) { + afs_uint32 addr; + + if (!tc->cellHosts[j]) break; + + addr = tc->cellHosts[j]->addr->sa_ip; + seq_printf(m, "%u.%u.%u.%u #%u.%u.%u.%u\n", + NIPQUAD(addr), NIPQUAD(addr)); + } + + return 0; +} + +static struct seq_operations afs_csdb_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show, +}; + +static int afs_csdb_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &afs_csdb_op); +} + +static struct file_operations afs_csdb_operations = { + .open = afs_csdb_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + +static void *uu_start(struct seq_file *m, loff_t *pos) +{ + struct unixuser *tu; + loff_t n = 0; + afs_int32 i; + + ObtainReadLock(&afs_xuser); + if (!*pos) + return (void *)(1); + + for (i = 0; i < NUSERS; i++) { + for (tu = afs_users[i]; tu; tu = tu->next) { + if (++n == *pos) + return tu; + } + } + + return NULL; +} + +static void *uu_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct unixuser *tu = p; + afs_int32 i = 0; + + (*pos)++; + if (!p) return NULL; + + if (p != (void *)1) { + if (tu->next) return tu->next; + i = UHash(tu->uid) + 1; + } + + for (; i < NUSERS; i++) + if (afs_users[i]) return afs_users[i]; + return NULL; +} + +static void uu_stop(struct seq_file *m, void *p) +{ + ReleaseReadLock(&afs_xuser); +} + +static int uu_show(struct seq_file *m, void *p) +{ + struct cell *tc = 0; + struct unixuser *tu = p; + char *cellname; + + if (p == (void *)1) { + seq_printf(m, "%10s %4s %-6s %-25s %10s", + "UID/PAG", "Refs", "States", "Cell", "ViceID"); + seq_printf(m, " %10s %10s %10s %3s", + "Tok Set", "Tok Begin", "Tok Expire", "vno"); + seq_printf(m, " %-15s %10s %10s %s\n", + "NFS Client", "UID/PAG", "Client UID", "Sysname(s)"); + + return 0; + } + + if (tu->cell == -1) { + cellname = ""; + } else { + tc = afs_GetCellStale(tu->cell, READ_LOCK); + if (tc) cellname = tc->cellName; + else cellname = ""; + } + + seq_printf(m, "%10d %4d %04x %-25s %10d", + tu->uid, tu->refCount, tu->states, cellname, tu->vid); + + if (tc) afs_PutCell(tc, READ_LOCK); + + if (tu->states & UHasTokens) { + seq_printf(m, " %10d %10d %10d %3d", + tu->tokenTime, tu->ct.BeginTimestamp, tu->ct.EndTimestamp, + tu->ct.AuthHandle); + } else { + seq_printf(m, " %-36s", "Tokens Not Set"); + } + + if (tu->exporter && tu->exporter->exp_type == EXP_NFS) { + struct nfsclientpag *np = (struct nfsclientpag *)(tu->exporter); + char ipaddr[16]; + int i; + + sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(np->host)); + seq_printf(m, " %-15s %10d %10d", ipaddr, np->uid, np->client_uid); + if (np->sysnamecount) { + for (i = 0; i < np->sysnamecount; i++) + seq_printf(m, " %s", np->sysname[i]); + } else { + seq_printf(m, " "); + } + + } else if (tu->exporter) { + seq_printf(m, " Unknown exporter type %d", tu->exporter->exp_type); + } + seq_printf(m, "\n"); + + return 0; +} + +static struct seq_operations afs_unixuser_seqop = { + .start = uu_start, + .next = uu_next, + .stop = uu_stop, + .show = uu_show, +}; + +static int afs_unixuser_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &afs_unixuser_seqop); +} + +static struct file_operations afs_unixuser_fops = { + .open = afs_unixuser_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + +#else /* HAVE_KERNEL_LINUX_SEQ_FILE_H */ + +static int +csdbproc_info(char *buffer, char **start, off_t offset, int +length) +{ + int len = 0; + off_t pos = 0; + int cnt; + struct afs_q *cq, *tq; + struct cell *tc; + char tbuffer[16]; + /* 90 - 64 cellname, 10 for 32 bit num and index, plus + decor */ + char temp[91]; + afs_uint32 addr; + + ObtainReadLock(&afs_xcell); + + for (cq = CellLRU.next; cq != &CellLRU; cq = tq) { + tc = QTOC(cq); tq = QNext(cq); + + pos += 90; + + if (pos <= offset) { + len = 0; + } else { + sprintf(temp, ">%s #(%d/%d)\n", tc->cellName, + tc->cellNum, tc->cellIndex); + sprintf(buffer + len, "%-89s\n", temp); + len += 90; + if (pos >= offset+length) { + ReleaseReadLock(&afs_xcell); + goto done; + } + } + + for (cnt = 0; cnt < MAXCELLHOSTS; cnt++) { + if (!tc->cellHosts[cnt]) break; + pos += 90; + if (pos <= offset) { + len = 0; + } else { + addr = ntohl(tc->cellHosts[cnt]->addr->sa_ip); + sprintf(tbuffer, "%d.%d.%d.%d", + (int)((addr>>24) & 0xff), +(int)((addr>>16) & 0xff), + (int)((addr>>8) & 0xff), (int)( addr & 0xff)); + sprintf(temp, "%s #%s\n", tbuffer, tbuffer); + sprintf(buffer + len, "%-89s\n", temp); + len += 90; + if (pos >= offset+length) { + ReleaseReadLock(&afs_xcell); + goto done; + } + } + } + } + + ReleaseReadLock(&afs_xcell); + +done: + *start = buffer + len - (pos - offset); + len = pos - offset; + if (len > length) + len = length; + return len; +} + +#endif /* HAVE_KERNEL_LINUX_SEQ_FILE_H */ + +void +osi_proc_init(void) +{ + struct proc_dir_entry *entry; +#if !defined(EXPORTED_PROC_ROOT_FS) + char path[64]; +#endif + +#if defined(EXPORTED_PROC_ROOT_FS) + openafs_procfs = proc_mkdir(PROC_FSDIRNAME, proc_root_fs); +#else + sprintf(path, "fs/%s", PROC_FSDIRNAME); + openafs_procfs = proc_mkdir(path, NULL); +#endif +#ifdef HAVE_KERNEL_LINUX_SEQ_FILE_H + entry = create_proc_entry("unixusers", 0, openafs_procfs); + if (entry) { + entry->proc_fops = &afs_unixuser_fops; +#if defined(STRUCT_PROC_DIR_ENTRY_HAS_OWNER) + entry->owner = THIS_MODULE; +#endif + } + entry = create_proc_entry(PROC_CELLSERVDB_NAME, 0, openafs_procfs); + if (entry) + entry->proc_fops = &afs_csdb_operations; +#else + entry = create_proc_info_entry(PROC_CELLSERVDB_NAME, (S_IFREG|S_IRUGO), openafs_procfs, csdbproc_info); +#endif +#if defined(STRUCT_PROC_DIR_ENTRY_HAS_OWNER) + entry->owner = THIS_MODULE; +#endif +} + +void +osi_proc_clean(void) +{ +#if !defined(EXPORTED_PROC_ROOT_FS) + char path[64]; +#endif + + remove_proc_entry(PROC_CELLSERVDB_NAME, openafs_procfs); +#ifdef HAVE_KERNEL_LINUX_SEQ_FILE_H + remove_proc_entry("unixusers", openafs_procfs); +#endif +#if defined(EXPORTED_PROC_ROOT_FS) + remove_proc_entry(PROC_FSDIRNAME, proc_root_fs); +#else + sprintf(path, "fs/%s", PROC_FSDIRNAME); + remove_proc_entry(path, NULL); +#endif +} diff --git a/src/afs/LINUX24/osi_prototypes.h b/src/afs/LINUX24/osi_prototypes.h new file mode 100644 index 0000000..9bc8bca --- /dev/null +++ b/src/afs/LINUX24/osi_prototypes.h @@ -0,0 +1,101 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Exported linux support routines. + */ +#ifndef _OSI_PROTO_H_ +#define _OSI_PROTO_H_ + +/* osi_alloc.c */ +extern void *osi_linux_alloc(unsigned int size, int drop_glock); +extern void osi_linux_free(void *addr); +extern void osi_linux_free_afs_memory(void); +/* Debugging aid */ +extern void osi_linux_verify_alloced_memory(void); + +/* osi_cred.c */ +extern cred_t *crget(void); +extern void crfree(cred_t * cr); +extern cred_t *crdup(cred_t * cr); +extern cred_t *crref(void); +extern void crset(cred_t * cr); + +/* osi_nfssrv.c */ +extern int osi_linux_nfs_initreq(struct vrequest *av, AFS_UCRED *cr, + int *code); +extern void osi_linux_nfssrv_init(void); +extern void osi_linux_nfssrv_shutdown(void); +extern afs_rwlock_t afs_xnfssrv; + +/* osi_file.c */ +extern afs_rwlock_t afs_xosi; +extern int osi_InitCacheInfo(char *aname); +extern int osi_rdwr(struct osi_file *osifile, uio_t * uiop, int rw); +extern struct file *afs_linux_raw_open(afs_dcache_id_t *ainode, ino_t *hint); + +/* osi_ioctl.c */ +extern void osi_ioctl_init(void); +extern void osi_ioctl_clean(void); + +/* osi_misc.c */ +extern void afs_osi_SetTime(osi_timeval_t * tvp); +extern int osi_lookupname_internal(char *aname, int followlink, + struct vfsmount **mnt, struct dentry **dpp); +extern int osi_lookupname(char *aname, uio_seg_t seg, int followlink, + struct dentry **dpp); +extern int osi_abspath(char *aname, char *buf, int buflen, + int followlink, char **pathp); +extern void afs_start_thread(void (*proc)(void), char *name); + +/* osi_probe.c */ +extern void *osi_find_syscall_table(int which); + +/* osi_proc.c */ +extern void osi_proc_init(void); +extern void osi_proc_clean(void); + +/* osi_syscall.c */ +extern int osi_syscall_init(void); +extern void osi_syscall_clean(void); + +/* osi_sysctl.c */ +extern int osi_sysctl_init(void); +extern void osi_sysctl_clean(void); + +/* osi_vm.c */ +extern int osi_VM_FlushVCache(struct vcache *avc, int *slept); +extern void osi_VM_TryToSmush(struct vcache *avc, AFS_UCRED *acred, + int sync); +extern void osi_VM_FSyncInval(struct vcache *avc); +extern void osi_VM_StoreAllSegments(struct vcache *avc); +extern void osi_VM_FlushPages(struct vcache *avc, AFS_UCRED *credp); +extern void osi_VM_Truncate(struct vcache *avc, int alen, + AFS_UCRED *acred); + +/* osi_vfsops.c */ +extern void vattr2inode(struct inode *ip, struct vattr *vp); +extern int afs_init_inodecache(void); +extern void afs_destroy_inodecache(void); +extern void osi_linux_free_inode_pages(void); + +/* osi_vnodeops.c */ +extern void afs_fill_inode(struct inode *ip, struct vattr *vattr); + +/* osi_groups.c */ +extern void osi_keyring_init(void); +extern void osi_keyring_shutdown(void); +extern int __setpag(cred_t **cr, afs_uint32 pagvalue, afs_uint32 *newpag, + int change_parent); +#ifdef LINUX_KEYRING_SUPPORT +extern struct key_type key_type_afs_pag; +#endif /* LINUX_KEYRING_SUPPORT */ + + +#endif /* _OSI_PROTO_H_ */ diff --git a/src/afs/LINUX24/osi_sleep.c b/src/afs/LINUX24/osi_sleep.c new file mode 100644 index 0000000..93ba264 --- /dev/null +++ b/src/afs/LINUX24/osi_sleep.c @@ -0,0 +1,303 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afsincludes.h" /* Afs-based standard headers */ +#include "afs/afs_stats.h" /* afs statistics */ + +#if defined(FREEZER_H_EXISTS) +#include +#endif + +static int osi_TimedSleep(char *event, afs_int32 ams, int aintok); + +static char waitV, dummyV; + +void +afs_osi_InitWaitHandle(struct afs_osi_WaitHandle *achandle) +{ + AFS_STATCNT(osi_InitWaitHandle); + achandle->proc = (caddr_t) 0; +} + +/* cancel osi_Wait */ +void +afs_osi_CancelWait(struct afs_osi_WaitHandle *achandle) +{ + caddr_t proc; + + AFS_STATCNT(osi_CancelWait); + proc = achandle->proc; + if (proc == 0) + return; + achandle->proc = (caddr_t) 0; /* so dude can figure out he was signalled */ + afs_osi_Wakeup(&waitV); +} + +/* afs_osi_Wait + * Waits for data on ahandle, or ams ms later. ahandle may be null. + * Returns 0 if timeout and EINTR if signalled. + */ +int +afs_osi_Wait(afs_int32 ams, struct afs_osi_WaitHandle *ahandle, int aintok) +{ + afs_int32 endTime; + int code; + + AFS_STATCNT(osi_Wait); + endTime = osi_Time() + (ams / 1000); + if (ahandle) + ahandle->proc = (caddr_t) current; + + do { + AFS_ASSERT_GLOCK(); + code = osi_TimedSleep(&waitV, ams, 1); + if (code) + break; + if (ahandle && (ahandle->proc == (caddr_t) 0)) { + /* we've been signalled */ + break; + } + } while (osi_Time() < endTime); + return code; +} + + + + +typedef struct afs_event { + struct afs_event *next; /* next in hash chain */ + char *event; /* lwp event: an address */ + int refcount; /* Is it in use? */ + int seq; /* Sequence number: this is incremented + * by wakeup calls; wait will not return until + * it changes */ +#if defined(AFS_LINUX24_ENV) + wait_queue_head_t cond; +#else + struct wait_queue *cond; +#endif +} afs_event_t; + +#define HASHSIZE 128 +afs_event_t *afs_evhasht[HASHSIZE]; /* Hash table for events */ +#define afs_evhash(event) (afs_uint32) ((((long)event)>>2) & (HASHSIZE-1)); +int afs_evhashcnt = 0; + +/* Get and initialize event structure corresponding to lwp event (i.e. address) + * */ +static afs_event_t * +afs_getevent(char *event) +{ + afs_event_t *evp, *newp = 0; + int hashcode; + + AFS_ASSERT_GLOCK(); + hashcode = afs_evhash(event); + evp = afs_evhasht[hashcode]; + while (evp) { + if (evp->event == event) { + evp->refcount++; + return evp; + } + if (evp->refcount == 0) + newp = evp; + evp = evp->next; + } + if (!newp) + return NULL; + + newp->event = event; + newp->refcount = 1; + return newp; +} + +/* afs_addevent -- allocates a new event for the address. It isn't returned; + * instead, afs_getevent should be called again. Thus, the real effect of + * this routine is to add another event to the hash bucket for this + * address. + * + * Locks: + * Called with GLOCK held. However the function might drop + * GLOCK when it calls osi_AllocSmallSpace for allocating + * a new event (In Linux, the allocator drops GLOCK to avoid + * a deadlock). + */ + +static void +afs_addevent(char *event) +{ + int hashcode; + afs_event_t *newp; + + AFS_ASSERT_GLOCK(); + hashcode = afs_evhash(event); + newp = osi_linux_alloc(sizeof(afs_event_t), 0); + afs_evhashcnt++; + newp->next = afs_evhasht[hashcode]; + afs_evhasht[hashcode] = newp; +#if defined(AFS_LINUX24_ENV) + init_waitqueue_head(&newp->cond); +#else + init_waitqueue(&newp->cond); +#endif + newp->seq = 0; + newp->event = &dummyV; /* Dummy address for new events */ + newp->refcount = 0; +} + +#ifndef set_current_state +#define set_current_state(x) current->state = (x); +#endif + +/* Release the specified event */ +#define relevent(evp) ((evp)->refcount--) + +/* afs_osi_SleepSig + * + * Waits for an event to be notified, returning early if a signal + * is received. Returns EINTR if signaled, and 0 otherwise. + */ +int +afs_osi_SleepSig(void *event) +{ + struct afs_event *evp; + int seq, retval; +#ifdef DECLARE_WAITQUEUE + DECLARE_WAITQUEUE(wait, current); +#else + struct wait_queue wait = { current, NULL }; +#endif + + evp = afs_getevent(event); + if (!evp) { + afs_addevent(event); + evp = afs_getevent(event); + } + + seq = evp->seq; + retval = 0; + + add_wait_queue(&evp->cond, &wait); + while (seq == evp->seq) { + set_current_state(TASK_INTERRUPTIBLE); + AFS_ASSERT_GLOCK(); + AFS_GUNLOCK(); + schedule(); + AFS_GLOCK(); + if (signal_pending(current)) { + retval = EINTR; + break; + } + } + remove_wait_queue(&evp->cond, &wait); + set_current_state(TASK_RUNNING); + + relevent(evp); + return retval; +} + +/* afs_osi_Sleep -- waits for an event to be notified, ignoring signals. + * - NOTE: that on Linux, there are circumstances in which TASK_INTERRUPTIBLE + * can wake up, even if all signals are blocked + * - TODO: handle signals correctly by passing an indication back to the + * caller that the wait has been interrupted and the stack should be cleaned + * up preparatory to signal delivery + */ +void +afs_osi_Sleep(void *event) +{ + sigset_t saved_set; + + SIG_LOCK(current); + saved_set = current->blocked; + sigfillset(¤t->blocked); + RECALC_SIGPENDING(current); + SIG_UNLOCK(current); + + afs_osi_SleepSig(event); + + SIG_LOCK(current); + current->blocked = saved_set; + RECALC_SIGPENDING(current); + SIG_UNLOCK(current); +} + +/* osi_TimedSleep + * + * Arguments: + * event - event to sleep on + * ams --- max sleep time in milliseconds + * aintok - 1 if should sleep interruptibly + * + * Returns 0 if timeout, EINTR if signalled, and EGAIN if it might + * have raced. + */ +static int +osi_TimedSleep(char *event, afs_int32 ams, int aintok) +{ + int code = 0; + long ticks = (ams * HZ / 1000) + 1; + struct afs_event *evp; +#ifdef DECLARE_WAITQUEUE + DECLARE_WAITQUEUE(wait, current); +#else + struct wait_queue wait = { current, NULL }; +#endif + + evp = afs_getevent(event); + if (!evp) { + afs_addevent(event); + evp = afs_getevent(event); + } + + add_wait_queue(&evp->cond, &wait); + set_current_state(TASK_INTERRUPTIBLE); + /* always sleep TASK_INTERRUPTIBLE to keep load average + * from artifically increasing. */ + AFS_GUNLOCK(); + + if (aintok) { + if (schedule_timeout(ticks)) + code = EINTR; + } else + schedule_timeout(ticks); + + AFS_GLOCK(); + remove_wait_queue(&evp->cond, &wait); + set_current_state(TASK_RUNNING); + + relevent(evp); + + return code; +} + + +int +afs_osi_Wakeup(void *event) +{ + int ret = 2; + struct afs_event *evp; + + evp = afs_getevent(event); + if (!evp) /* No sleepers */ + return 1; + + if (evp->refcount > 1) { + evp->seq++; + wake_up(&evp->cond); + ret = 0; + } + relevent(evp); + return ret; +} diff --git a/src/afs/LINUX24/osi_syscall.c b/src/afs/LINUX24/osi_syscall.c new file mode 100644 index 0000000..ac548ca --- /dev/null +++ b/src/afs/LINUX24/osi_syscall.c @@ -0,0 +1,458 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux module support routines. + * + */ +#include +#include "afs/param.h" + + +#ifdef AFS_LINUX24_ENV +#include /* early to avoid printf->printk mapping */ +#endif +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "h/unistd.h" /* For syscall numbers. */ +#include "h/mm.h" + +#ifdef AFS_AMD64_LINUX20_ENV +#include +#endif + +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +#include +#include +#endif + +#ifndef NR_syscalls +#define NR_syscalls 222 +#endif + +/* On SPARC64 and S390X, sys_call_table contains 32-bit entries + * even though pointers are 64 bit quantities. + * XXX unify this with osi_probe.c + */ +#if defined(AFS_SPARC64_LINUX20_ENV) || defined(AFS_S390X_LINUX24_ENV) +#define SYSCALLTYPE unsigned int +#define POINTER2SYSCALL (unsigned int)(unsigned long) +#define SYSCALL2POINTER (void *)(long) +#else +#define SYSCALLTYPE void * +#define POINTER2SYSCALL (void *) +#define SYSCALL2POINTER (void *) +#endif + +#if defined(AFS_S390X_LINUX24_ENV) +#define INSERT_SYSCALL(SLOT, TMPPAGE, FUNC) \ + if (SYSCALL2POINTER FUNC > 0x7fffffff) { \ + TMPPAGE = kmalloc ( PAGE_SIZE, GFP_DMA|GFP_KERNEL ); \ + if (SYSCALL2POINTER TMPPAGE > 0x7fffffff) { \ + printf("Cannot allocate page for FUNC syscall jump vector\n"); \ + return EINVAL; \ + } \ + memcpy(TMPPAGE, syscall_jump_code, sizeof(syscall_jump_code)); \ + *(void **)(TMPPAGE + 0x0c) = &FUNC; \ + afs_sys_call_table[_S(SLOT)] = POINTER2SYSCALL TMPPAGE; \ + } else \ + afs_sys_call_table[_S(SLOT)] = POINTER2SYSCALL FUNC; +#else +#define INSERT_SYSCALL(SLOT, TMPPAGE, FUNC) \ + afs_sys_call_table[_S(SLOT)] = POINTER2SYSCALL FUNC; +#endif + +#if defined(AFS_S390X_LINUX24_ENV) +#define _S(x) ((x)<<1) +#elif defined(AFS_IA64_LINUX20_ENV) +#define _S(x) ((x)-1024) +#else +#define _S(x) x +#endif + + +/***** ALL PLATFORMS *****/ +extern asmlinkage long +afs_syscall(long syscall, long parm1, long parm2, long parm3, long parm4); + +static SYSCALLTYPE *afs_sys_call_table; +static SYSCALLTYPE afs_ni_syscall = 0; + +#ifdef AFS_S390X_LINUX24_ENV +static void *afs_sys_setgroups_page = 0; +static void *afs_sys_setgroups32_page = 0; +static void *afs_syscall_page = 0; + +/* Because of how the syscall table is handled, we need to ensure our + syscalls are within the first 2gb of address space. This means we need + self-modifying code we can inject to call our handlers if the module + is loaded high. If keyrings had advanced as fast as false protection + this would be unnecessary. */ + +uint32_t syscall_jump_code[] = { + 0xe3d0f030, 0x00240dd0, 0xa7f40006, 0xffffffff, 0xffffffff, 0xe310d004, + 0x0004e3d0, 0xf0300004, 0x07f10000, +}; +#endif + +extern long afs_xsetgroups(int gidsetsize, gid_t * grouplist); +asmlinkage long (*sys_setgroupsp) (int gidsetsize, gid_t * grouplist); + +#ifdef AFS_LINUX24_ENV +extern int afs_xsetgroups32(int gidsetsize, gid_t * grouplist); +asmlinkage int (*sys_setgroups32p) (int gidsetsize, + __kernel_gid32_t * grouplist); +#endif + +#if !defined(AFS_LINUX24_ENV) +asmlinkage int (*sys_settimeofdayp) (struct timeval * tv, struct timezone * tz); +#endif + + +/***** AMD64 *****/ +#ifdef AFS_AMD64_LINUX20_ENV +static SYSCALLTYPE *afs_ia32_sys_call_table; +static SYSCALLTYPE ia32_ni_syscall = 0; + +extern int afs32_xsetgroups(); +asmlinkage long (*sys32_setgroupsp) (int gidsetsize, u16 * grouplist); +#ifdef AFS_LINUX24_ENV +extern int afs32_xsetgroups32(); +asmlinkage long (*sys32_setgroups32p) (int gidsetsize, gid_t * grouplist); +#endif /* __NR_ia32_setgroups32 */ +#endif /* AFS_AMD64_LINUX20_ENV */ + + +/***** SPARC64 *****/ +#ifdef AFS_SPARC64_LINUX20_ENV +extern SYSCALLTYPE *afs_sys_call_table32; +static SYSCALLTYPE afs_ni_syscall32 = 0; + +extern int afs32_xsetgroups(); +asmlinkage int (*sys32_setgroupsp) (int gidsetsize, + __kernel_gid_t32 * grouplist); +#ifdef AFS_LINUX24_ENV +/* This number is not exported for some bizarre reason. */ +#define __NR_setgroups32 82 +extern int afs32_xsetgroups32(); +asmlinkage int (*sys32_setgroups32p) (int gidsetsize, + __kernel_gid_t32 * grouplist); +#endif + +asmlinkage int +afs_syscall32(long syscall, long parm1, long parm2, long parm3, long parm4, + long parm5) +{ + __asm__ __volatile__("srl %o4, 0, %o4\n\t" + "mov %o7, %i7\n\t" + "call afs_syscall\n\t" + "srl %o5, 0, %o5\n\t" + "ret\n\t" + "nop"); +} +#endif /* AFS_SPARC64_LINUX20_ENV */ + + +/***** IA64 *****/ +#ifdef AFS_IA64_LINUX20_ENV + +asmlinkage long +afs_syscall_stub(int r0, int r1, long r2, long r3, long r4, long gp) +{ + __asm__ __volatile__("alloc r42 = ar.pfs, 8, 3, 6, 0\n\t" + "mov r41 = b0\n\t" /* save rp */ + "mov out0 = in0\n\t" + "mov out1 = in1\n\t" + "mov out2 = in2\n\t" + "mov out3 = in3\n\t" + "mov out4 = in4\n\t" + "mov out5 = gp\n\t" /* save gp */ + ";;\n" + ".L1:\n\t" + "mov r3 = ip\n\t" + ";;\n\t" + "addl r15=.fptr_afs_syscall-.L1,r3\n\t" + ";;\n\t" + "ld8 r15=[r15]\n\t" + ";;\n\t" + "ld8 r16=[r15],8\n\t" + ";;\n\t" + "ld8 gp=[r15]\n\t" + "mov b6=r16\n\t" + "br.call.sptk.many b0 = b6\n\t" + ";;\n\t" + "mov ar.pfs = r42\n\t" + "mov b0 = r41\n\t" + "mov gp = r48\n\t" /* restore gp */ + "br.ret.sptk.many b0\n" + ".fptr_afs_syscall:\n\t" + "data8 @fptr(afs_syscall)\n\t" + ".skip 8"); +} + +asmlinkage long +afs_xsetgroups_stub(int r0, int r1, long r2, long r3, long r4, long gp) +{ + __asm__ __volatile__("alloc r42 = ar.pfs, 8, 3, 6, 0\n\t" + "mov r41 = b0\n\t" /* save rp */ + "mov out0 = in0\n\t" + "mov out1 = in1\n\t" + "mov out2 = in2\n\t" + "mov out3 = in3\n\t" + "mov out4 = in4\n\t" + "mov out5 = gp\n\t" /* save gp */ + ";;\n" + ".L2:\n\t" + "mov r3 = ip\n\t" + ";;\n\t" + "addl r15=.fptr_afs_xsetgroups - .L2,r3\n\t" + ";;\n\t" + "ld8 r15=[r15]\n\t" + ";;\n\t" + "ld8 r16=[r15],8\n\t" + ";;\n\t" + "ld8 gp=[r15]\n\t" + "mov b6=r16\n\t" + "br.call.sptk.many b0 = b6\n\t" + ";;\n\t" + "mov ar.pfs = r42\n\t" + "mov b0 = r41\n\t" + "mov gp = r48\n\t" /* restore gp */ + "br.ret.sptk.many b0\n" + ".fptr_afs_xsetgroups:\n\t" + "data8 @fptr(afs_xsetgroups)\n\t" + ".skip 8"); +} + +struct fptr { + void *ip; + unsigned long gp; +}; + +#endif /* AFS_IA64_LINUX20_ENV */ + +/**********************************************************************/ +/********************* System Call Initialization *********************/ +/**********************************************************************/ + +int osi_syscall_init(void) +{ +/***** IA64 *****/ +#ifdef AFS_IA64_LINUX20_ENV + /* This needs to be first because we are declaring variables, and + * also because the handling of syscall pointers is bizarre enough + * that we want to special-case even the "common" part. + */ + unsigned long kernel_gp = 0; + static struct fptr sys_setgroups; + + afs_sys_call_table = osi_find_syscall_table(0); + if (afs_sys_call_table) { + +#if !defined(AFS_LINUX24_ENV) + /* XXX no sys_settimeofday on IA64? */ +#endif + + /* check we aren't already loaded */ + /* XXX this can't be right */ + if (SYSCALL2POINTER afs_sys_call_table[_S(__NR_afs_syscall)] + == afs_syscall) { + printf("AFS syscall entry point already in use!\n"); + return -EBUSY; + } + + /* setup AFS entry point */ + afs_ni_syscall = afs_sys_call_table[_S(__NR_afs_syscall)]; + afs_sys_call_table[_S(__NR_afs_syscall)] = + POINTER2SYSCALL((struct fptr *)afs_syscall_stub)->ip; + + /* setup setgroups */ + sys_setgroupsp = (void *)&sys_setgroups; + + ((struct fptr *)sys_setgroupsp)->ip = + SYSCALL2POINTER afs_sys_call_table[_S(__NR_setgroups)]; + ((struct fptr *)sys_setgroupsp)->gp = kernel_gp; + + afs_sys_call_table[_S(__NR_setgroups)] = + POINTER2SYSCALL((struct fptr *)afs_xsetgroups_stub)->ip; + } + + /* XXX no 32-bit syscalls on IA64? */ + + +/***** COMMON (except IA64 or PPC64) *****/ +#else /* !AFS_IA64_LINUX20_ENV */ + + afs_sys_call_table = osi_find_syscall_table(0); + if (afs_sys_call_table) { +#if !defined(AFS_LINUX24_ENV) + sys_settimeofdayp = + SYSCALL2POINTER afs_sys_call_table[_S(__NR_settimeofday)]; +#endif /* AFS_LINUX24_ENV */ + + /* check we aren't already loaded */ + if (SYSCALL2POINTER afs_sys_call_table[_S(__NR_afs_syscall)] + == afs_syscall) { + printf("AFS syscall entry point already in use!\n"); + return -EBUSY; + } + + /* setup AFS entry point */ + afs_ni_syscall = afs_sys_call_table[_S(__NR_afs_syscall)]; + + INSERT_SYSCALL(__NR_afs_syscall, afs_syscall_page, afs_syscall) + + /* setup setgroups */ + sys_setgroupsp = SYSCALL2POINTER afs_sys_call_table[_S(__NR_setgroups)]; + INSERT_SYSCALL(__NR_setgroups, afs_sys_setgroups_page, afs_xsetgroups) + +#if defined(__NR_setgroups32) + /* setup setgroups32 */ + sys_setgroups32p = SYSCALL2POINTER afs_sys_call_table[__NR_setgroups32]; + INSERT_SYSCALL(__NR_setgroups32, afs_sys_setgroups32_page, afs_xsetgroups32) +#endif + } +#endif /* !AFS_IA64_LINUX20_ENV */ + + +/***** AMD64 *****/ +#ifdef AFS_AMD64_LINUX20_ENV + afs_ia32_sys_call_table = osi_find_syscall_table(1); + if (afs_ia32_sys_call_table) { + /* setup AFS entry point for IA32 */ + ia32_ni_syscall = afs_ia32_sys_call_table[__NR_ia32_afs_syscall]; + afs_ia32_sys_call_table[__NR_ia32_afs_syscall] = + POINTER2SYSCALL afs_syscall; + + /* setup setgroups for IA32 */ + sys32_setgroupsp = + SYSCALL2POINTER afs_ia32_sys_call_table[__NR_ia32_setgroups]; + afs_ia32_sys_call_table[__NR_ia32_setgroups] = + POINTER2SYSCALL afs32_xsetgroups; + +#if AFS_LINUX24_ENV + /* setup setgroups32 for IA32 */ + sys32_setgroups32p = + SYSCALL2POINTER afs_ia32_sys_call_table[__NR_ia32_setgroups32]; + afs_ia32_sys_call_table[__NR_ia32_setgroups32] = + POINTER2SYSCALL afs32_xsetgroups32; +#endif /* __NR_ia32_setgroups32 */ + } +#endif /* AFS_AMD64_LINUX20_ENV */ + + +/***** SPARC64 *****/ +#ifdef AFS_SPARC64_LINUX20_ENV + afs_sys_call_table32 = osi_find_syscall_table(1); + if (afs_sys_call_table32) { + /* setup AFS entry point for 32-bit SPARC */ + afs_ni_syscall32 = afs_sys_call_table32[__NR_afs_syscall]; + afs_sys_call_table32[__NR_afs_syscall] = POINTER2SYSCALL afs_syscall32; + + /* setup setgroups for 32-bit SPARC */ + sys32_setgroupsp = SYSCALL2POINTER afs_sys_call_table32[__NR_setgroups]; + afs_sys_call_table32[__NR_setgroups] = POINTER2SYSCALL afs32_xsetgroups; + +#ifdef AFS_LINUX24_ENV + /* setup setgroups32 for 32-bit SPARC */ + sys32_setgroups32p = + SYSCALL2POINTER afs_sys_call_table32[__NR_setgroups32]; + afs_sys_call_table32[__NR_setgroups32] = + POINTER2SYSCALL afs32_xsetgroups32; +#endif + } +#endif /* AFS_SPARC64_LINUX20_ENV */ + return 0; +} + + + +/**********************************************************************/ +/************************ System Call Cleanup *************************/ +/**********************************************************************/ + +void osi_syscall_clean(void) +{ +/***** COMMON *****/ + if (afs_sys_call_table) { + /* put back the AFS entry point */ + afs_sys_call_table[_S(__NR_afs_syscall)] = afs_ni_syscall; + + /* put back setgroups */ +#if defined(AFS_IA64_LINUX20_ENV) + afs_sys_call_table[_S(__NR_setgroups)] = + POINTER2SYSCALL((struct fptr *)sys_setgroupsp)->ip; +#else /* AFS_IA64_LINUX20_ENV */ + afs_sys_call_table[_S(__NR_setgroups)] = + POINTER2SYSCALL sys_setgroupsp; +#endif + +#if defined(__NR_setgroups32) && !defined(AFS_IA64_LINUX20_ENV) + /* put back setgroups32 */ + afs_sys_call_table[__NR_setgroups32] = POINTER2SYSCALL sys_setgroups32p; +#endif +#if defined(AFS_S390X_LINUX24_ENV) +#if defined(__NR_setgroups32) && !defined(AFS_IA64_LINUX20_ENV) + if (afs_sys_setgroups32_page) + kfree(afs_sys_setgroups32_page); +#endif + if (afs_sys_setgroups_page) + kfree(afs_sys_setgroups_page); + if (afs_syscall_page) + kfree(afs_syscall_page); +#endif + } + + +/***** IA64 *****/ +#ifdef AFS_IA64_LINUX20_ENV + /* XXX no 32-bit syscalls on IA64? */ +#endif + + +/***** AMD64 *****/ +#ifdef AFS_AMD64_LINUX20_ENV + if (afs_ia32_sys_call_table) { + /* put back AFS entry point for IA32 */ + afs_ia32_sys_call_table[__NR_ia32_afs_syscall] = + POINTER2SYSCALL ia32_ni_syscall; + + /* put back setgroups for IA32 */ + afs_ia32_sys_call_table[__NR_ia32_setgroups] = + POINTER2SYSCALL sys32_setgroupsp; + +#ifdef AFS_LINUX24_ENV + /* put back setgroups32 for IA32 */ + afs_ia32_sys_call_table[__NR_ia32_setgroups32] = + POINTER2SYSCALL sys32_setgroups32p; +#endif + } +#endif + + +/***** SPARC64 *****/ +#ifdef AFS_SPARC64_LINUX20_ENV + if (afs_sys_call_table32) { + /* put back AFS entry point for 32-bit SPARC */ + afs_sys_call_table32[__NR_afs_syscall] = afs_ni_syscall32; + + /* put back setgroups for IA32 */ + afs_sys_call_table32[__NR_setgroups] = + POINTER2SYSCALL sys32_setgroupsp; + +#ifdef AFS_LINUX24_ENV + /* put back setgroups32 for IA32 */ + afs_sys_call_table32[__NR_setgroups32] = + POINTER2SYSCALL sys32_setgroups32p; +#endif + } +#endif +} diff --git a/src/afs/LINUX24/osi_sysctl.c b/src/afs/LINUX24/osi_sysctl.c new file mode 100644 index 0000000..4ee95a4 --- /dev/null +++ b/src/afs/LINUX24/osi_sysctl.c @@ -0,0 +1,246 @@ +/* + * osi_sysctl.c: Linux sysctl interface to OpenAFS + * + * $Id$ + * + * Written Jan 30, 2002 by Kris Van Hees (Sine Nomine Associates) + */ + +#include +#include "afs/param.h" + +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afsincludes.h" /* Afs-based standard headers */ +#include "afs/afs_stats.h" /* afs statistics */ + +#include +#ifdef CONFIG_H_EXISTS +#include +#endif + +/* From afs_util.c */ +extern afs_int32 afs_new_inum; + +/* From afs_analyze.c */ +extern afs_int32 hm_retry_RO; +extern afs_int32 hm_retry_RW; +extern afs_int32 hm_retry_int; +extern afs_int32 afs_blocksUsed_0; +extern afs_int32 afs_blocksUsed_1; +extern afs_int32 afs_blocksUsed_2; +extern afs_int32 afs_pct1; +extern afs_int32 afs_pct2; + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *afs_sysctl = NULL; + +static ctl_table afs_sysctl_table[] = { + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 1, +#endif + .procname = "hm_retry_RO", + .data = &hm_retry_RO, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 2, +#endif + .procname = "hm_retry_RW", + .data = &hm_retry_RW, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 3, +#endif + .procname = "hm_retry_int", + .data = &hm_retry_int, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 4, +#endif + .procname = "GCPAGs", + .data = &afs_gcpags, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 5, +#endif + .procname = "rx_deadtime", + .data = &afs_rx_deadtime, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 6, +#endif + .procname = "bkVolPref", + .data = &afs_bkvolpref, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 7, +#endif + .procname = "afs_blocksUsed", + .data = &afs_blocksUsed, + .maxlen = sizeof(afs_int32), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 8, +#endif + .procname = "afs_blocksUsed_0", + .data = &afs_blocksUsed_0, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 9, +#endif + .procname = "afs_blocksUsed_1", + .data = &afs_blocksUsed_1, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 10, +#endif + .procname = "afs_blocksUsed_2", + .data = &afs_blocksUsed_2, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 11, +#endif + .procname = "afs_pct1", + .data = &afs_pct1, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 12, +#endif + .procname = "afs_pct2", + .data = &afs_pct2, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 13, +#endif + .procname = "afs_cacheBlocks", + .data = &afs_cacheBlocks, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 14, +#endif + .procname = "md5inum", + .data = &afs_new_inum, + .maxlen = sizeof(afs_int32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {0} +}; + +static ctl_table fs_sysctl_table[] = { + { +#if defined(SYSCTL_TABLE_CHECKING) + .ctl_name = CTL_UNNUMBERED, +#else + .ctl_name = 1, +#endif + .procname = "afs", + .mode = 0555, + .child = afs_sysctl_table + }, + {0} +}; + +int +osi_sysctl_init() +{ +#if defined(REGISTER_SYSCTL_TABLE_NOFLAG) + afs_sysctl = register_sysctl_table(fs_sysctl_table); +#else + afs_sysctl = register_sysctl_table(fs_sysctl_table, 0); +#endif + if (!afs_sysctl) + return -1; + + return 0; +} + +void +osi_sysctl_clean() +{ + if (afs_sysctl) { + unregister_sysctl_table(afs_sysctl); + afs_sysctl = NULL; + } +} + +#endif /* CONFIG_SYSCTL */ diff --git a/src/afs/LINUX24/osi_vfs.hin b/src/afs/LINUX24/osi_vfs.hin new file mode 100644 index 0000000..bd6de53 --- /dev/null +++ b/src/afs/LINUX24/osi_vfs.hin @@ -0,0 +1,86 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux interpretations of vnode and vfs structs. + */ + +#ifndef OSI_VFS_H_ +#define OSI_VFS_H_ + +typedef struct inode vnode_t; +#define vnode inode + +/* Map vnode fields to inode fields */ +#define i_number i_ino +#define v_count i_count +#define v_op i_op +#define v_fop i_fop +#define v_type i_mode +#define v_vfsp i_sb +#define v_data u.generic_ip + +/* v_type bits map to mode bits */ +#define VNON 0 +#define VREG S_IFREG +#define VDIR S_IFDIR +#define VBLK S_IFBLK +#define VCHR S_IFCHR +#define VLNK S_IFLNK +#define VSOCK S_IFSOCK +#define VFIFO S_IFIFO + +/* vcexcl - used only by afs_create */ +enum vcexcl { EXCL, NONEXCL } ; + +/* afs_open and afs_close needs to distinguish these cases */ +#define FWRITE O_WRONLY|O_RDWR|O_APPEND +#define FTRUNC O_TRUNC + +#define IO_APPEND O_APPEND +#define FSYNC O_SYNC + +#define VTOI(V) (V) +#define VFS_STATFS(V, S) ((V)->s_op->statfs)((V), (S), sizeof(*(S))) + +/* Various mode bits */ +#define VWRITE S_IWUSR +#define VREAD S_IRUSR +#define VEXEC S_IXUSR +#define VSUID S_ISUID +#define VSGID S_ISGID + +#define vfs super_block + +typedef struct vattr { + int va_type; /* One of v_types above. */ + afs_size_t va_size; + unsigned long va_blocks; + unsigned long va_blocksize; + int va_mask; /* AT_xxx operation to perform. */ + umode_t va_mode; /* mode bits. */ + uid_t va_uid; + gid_t va_gid; + int va_fsid; /* Not used? */ + dev_t va_rdev; + ino_t va_nodeid; /* Inode number */ + nlink_t va_nlink; /* link count for file. */ + struct timeval va_atime; + struct timeval va_mtime; + struct timeval va_ctime; +} vattr_t; + +#define VATTR_NULL(A) memset(A, 0, sizeof(struct vattr)) + +#ifndef HAVE_LINUX_I_SIZE_READ +#define i_size_read(X) ((X)->i_size) +#define i_size_write(X,Y) (X)->i_size = Y +#endif + +#endif /* OSI_VFS_H_ */ diff --git a/src/afs/LINUX24/osi_vfsops.c b/src/afs/LINUX24/osi_vfsops.c new file mode 100644 index 0000000..078035f --- /dev/null +++ b/src/afs/LINUX24/osi_vfsops.c @@ -0,0 +1,483 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * VFS operations for Linux + * + * super_block operations should return negated errno to Linux. + */ +#include +#include "afs/param.h" + + +#define __NO_VERSION__ /* don't define kernel_version in module.h */ +#include /* early to avoid printf->printk mapping */ +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "afs/afs_stats.h" +#include "h/locks.h" +#if defined(AFS_LINUX24_ENV) +#include "h/smp_lock.h" +#endif + + +struct vcache *afs_globalVp = 0; +struct vfs *afs_globalVFS = 0; +#if defined(AFS_LINUX24_ENV) +struct vfsmount *afs_cacheMnt; +#endif +int afs_was_mounted = 0; /* Used to force reload if mount/unmount/mount */ + +extern struct super_operations afs_sops; +extern afs_rwlock_t afs_xvcache; +extern struct afs_q VLRU; + +extern struct dentry_operations afs_dentry_operations; + +/* Forward declarations */ +static void iattr2vattr(struct vattr *vattrp, struct iattr *iattrp); +static int afs_root(struct super_block *afsp); +struct super_block *afs_read_super(struct super_block *sb, void *data, int silent); +int afs_fill_super(struct super_block *sb, void *data, int silent); + +/* afs_file_system + * VFS entry for Linux - installed in init_module + * Linux mounts file systems by: + * 1) register_filesystem(&afs_file_system) - done in init_module + * 2) Mount call comes to us via do_mount -> read_super -> afs_read_super. + * We are expected to setup the super_block. See afs_read_super. + */ + + +/* afs_read_super + * read the "super block" for AFS - roughly eguivalent to struct vfs. + * dev, covered, s_rd_only, s_dirt, and s_type will be set by read_super. + */ +#if defined(AFS_LINUX24_ENV) +DECLARE_FSTYPE(afs_fs_type, "afs", afs_read_super, 0); +#else +struct file_system_type afs_fs_type = { + "afs", /* name - used by mount operation. */ + 0, /* requires_dev - no for network filesystems. mount() will + * pass us an "unnamed" device. */ + afs_read_super, /* wrapper to afs_mount */ + NULL /* pointer to next file_system_type once registered. */ +}; +#endif + +struct super_block * +afs_read_super(struct super_block *sb, void *data, int silent) +{ + int code = 0; + + AFS_GLOCK(); + if (afs_was_mounted) { + printf + ("You must reload the AFS kernel extensions before remounting AFS.\n"); + AFS_GUNLOCK(); + return NULL; + } + afs_was_mounted = 1; + + /* Set basics of super_block */ +#if !defined(AFS_LINUX24_ENV) + lock_super(sb); +#endif + MOD_INC_USE_COUNT; + + afs_globalVFS = sb; + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = AFS_VFSMAGIC; + sb->s_op = &afs_sops; /* Super block (vfs) ops */ +#if defined(HAVE_BDI_INIT) + bdi_init(&afs_backing_dev_info); +#endif +#if defined(MAX_NON_LFS) +#ifdef AFS_64BIT_CLIENT +#if !defined(MAX_LFS_FILESIZE) +#if BITS_PER_LONG==32 +#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#elif BITS_PER_LONG==64 +#define MAX_LFS_FILESIZE 0x7fffffffffffffff +#endif +#endif + sb->s_maxbytes = MAX_LFS_FILESIZE; +#else + sb->s_maxbytes = MAX_NON_LFS; +#endif +#endif + code = afs_root(sb); + if (code) { + afs_globalVFS = NULL; + MOD_DEC_USE_COUNT; + } + +#if !defined(AFS_LINUX24_ENV) + unlock_super(sb); +#endif + + AFS_GUNLOCK(); + return code ? NULL : sb; +} + + +/* afs_root - stat the root of the file system. AFS global held on entry. */ +static int +afs_root(struct super_block *afsp) +{ + register afs_int32 code = 0; + struct vrequest treq; + register struct vcache *tvp = 0; + + AFS_STATCNT(afs_root); + if (afs_globalVp && (afs_globalVp->f.states & CStatd)) { + tvp = afs_globalVp; + } else { + cred_t *credp = crref(); + + if (afs_globalVp) { + afs_PutVCache(afs_globalVp); + afs_globalVp = NULL; + } + + if (!(code = afs_InitReq(&treq, credp)) && !(code = afs_CheckInit())) { + tvp = afs_GetVCache(&afs_rootFid, &treq, NULL, NULL); + if (tvp) { + struct inode *ip = AFSTOV(tvp); + struct vattr vattr; + + afs_getattr(tvp, &vattr, credp); + afs_fill_inode(ip, &vattr); + + /* setup super_block and mount point inode. */ + afs_globalVp = tvp; +#if defined(AFS_LINUX24_ENV) + afsp->s_root = d_alloc_root(ip); +#else + afsp->s_root = d_alloc_root(ip, NULL); +#endif + afsp->s_root->d_op = &afs_dentry_operations; + } else + code = ENOENT; + } + crfree(credp); + } + + afs_Trace2(afs_iclSetp, CM_TRACE_VFSROOT, ICL_TYPE_POINTER, afs_globalVp, + ICL_TYPE_INT32, code); + return code; +} + +/* super_operations */ + +/* afs_notify_change + * Linux version of setattr call. What to change is in the iattr struct. + * We need to set bits in both the Linux inode as well as the vcache. + */ +int +afs_notify_change(struct dentry *dp, struct iattr *iattrp) +{ + struct vattr vattr; + cred_t *credp = crref(); + struct inode *ip = dp->d_inode; + int code; + + VATTR_NULL(&vattr); + iattr2vattr(&vattr, iattrp); /* Convert for AFS vnodeops call. */ + + AFS_GLOCK(); + code = afs_setattr(VTOAFS(ip), &vattr, credp); + if (!code) { + afs_getattr(VTOAFS(ip), &vattr, credp); + vattr2inode(ip, &vattr); + } + AFS_GUNLOCK(); + crfree(credp); + return -code; +} + + +#if defined(STRUCT_SUPER_HAS_ALLOC_INODE) +#if defined(HAVE_KMEM_CACHE_T) +static kmem_cache_t *afs_inode_cachep; +#else +struct kmem_cache *afs_inode_cachep; +#endif + +static struct inode * +afs_alloc_inode(struct super_block *sb) +{ + struct vcache *vcp; + +#if defined(SLAB_KERNEL) + vcp = (struct vcache *) kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL); +#else + vcp = (struct vcache *) kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL); +#endif + if (!vcp) + return NULL; + + return AFSTOV(vcp); +} + +static void +afs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(afs_inode_cachep, inode); +} + +static void +#if defined(HAVE_KMEM_CACHE_T) +init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +#else +#if defined(KMEM_CACHE_INIT) +init_once(struct kmem_cache * cachep, void * foo) +#else +init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +#endif +#endif +{ + struct vcache *vcp = (struct vcache *) foo; + +#if defined(SLAB_CTOR_VERIFY) + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) +#endif + inode_init_once(AFSTOV(vcp)); +} + +int +afs_init_inodecache(void) +{ +#ifndef SLAB_RECLAIM_ACCOUNT +#define SLAB_RECLAIM_ACCOUNT 0 +#endif + +#if defined(KMEM_CACHE_TAKES_DTOR) + afs_inode_cachep = kmem_cache_create("afs_inode_cache", + sizeof(struct vcache), + 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT, + init_once, NULL); +#else + afs_inode_cachep = kmem_cache_create("afs_inode_cache", + sizeof(struct vcache), + 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT, + init_once); +#endif + if (afs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +void +afs_destroy_inodecache(void) +{ + if (afs_inode_cachep) + (void) kmem_cache_destroy(afs_inode_cachep); +} +#else +int +afs_init_inodecache(void) +{ + return 0; +} + +void +afs_destroy_inodecache(void) +{ + return; +} +#endif + +static void +afs_clear_inode(struct inode *ip) +{ + struct vcache *vcp = VTOAFS(ip); + + if (vcp->vlruq.prev || vcp->vlruq.next) + osi_Panic("inode freed while on LRU"); + if (vcp->hnext) + osi_Panic("inode freed while still hashed"); + +#if !defined(STRUCT_SUPER_HAS_ALLOC_INODE) + afs_osi_Free(ip->u.generic_ip, sizeof(struct vcache)); +#endif +} + +/* afs_put_super + * Called from unmount to release super_block. */ +static void +afs_put_super(struct super_block *sbp) +{ + AFS_GLOCK(); + AFS_STATCNT(afs_unmount); + + if (!suser()) { + AFS_GUNLOCK(); + return; + } + + afs_globalVFS = 0; + afs_globalVp = 0; + + osi_linux_free_inode_pages(); /* invalidate and release remaining AFS inodes. */ + afs_shutdown(); +#if defined(AFS_LINUX24_ENV) + mntput(afs_cacheMnt); +#endif + + osi_linux_verify_alloced_memory(); +#if defined(HAVE_BDI_INIT) + bdi_destroy(&afs_backing_dev_info); +#endif + AFS_GUNLOCK(); + + sbp->s_dev = 0; + MOD_DEC_USE_COUNT; +} + + +/* afs_statfs + * statp is in user space, so we need to cobble together a statfs, then + * copy it. + */ +#if defined(AFS_LINUX24_ENV) +int +afs_statfs(struct super_block *sbp, struct statfs *statp) +#else +int +afs_statfs(struct super_block *sbp, struct statfs *__statp, int size) +#endif +{ +#if !defined(AFS_LINUX24_ENV) + struct statfs stat, *statp; + + if (size < sizeof(struct statfs)) + return; + + memset(&stat, 0, size); + statp = &stat; +#else + memset(statp, 0, sizeof(*statp)); +#endif + + AFS_STATCNT(afs_statfs); + + /* hardcode in case that which is giveth is taken away */ + statp->f_type = 0x5346414F; +#if defined(STATFS_TAKES_DENTRY) + statp->f_bsize = dentry->d_sb->s_blocksize; +#else + statp->f_bsize = sbp->s_blocksize; +#endif + statp->f_blocks = statp->f_bfree = statp->f_bavail = statp->f_files = + statp->f_ffree = 9000000; + statp->f_fsid.val[0] = AFS_VFSMAGIC; + statp->f_fsid.val[1] = AFS_VFSFSID; + statp->f_namelen = 256; + +#if !defined(AFS_LINUX24_ENV) + memcpy_tofs(__statp, &stat, size); +#endif + return 0; +} + +struct super_operations afs_sops = { +#if defined(STRUCT_SUPER_HAS_ALLOC_INODE) + .alloc_inode = afs_alloc_inode, + .destroy_inode = afs_destroy_inode, +#endif + .clear_inode = afs_clear_inode, + .put_super = afs_put_super, + .statfs = afs_statfs, +#if !defined(AFS_LINUX24_ENV) + .notify_change = afs_notify_change, +#endif +}; + +/************** Support routines ************************/ + +/* vattr_setattr + * Set iattr data into vattr. Assume vattr cleared before call. + */ +static void +iattr2vattr(struct vattr *vattrp, struct iattr *iattrp) +{ + vattrp->va_mask = iattrp->ia_valid; + if (iattrp->ia_valid & ATTR_MODE) + vattrp->va_mode = iattrp->ia_mode; + if (iattrp->ia_valid & ATTR_UID) + vattrp->va_uid = iattrp->ia_uid; + if (iattrp->ia_valid & ATTR_GID) + vattrp->va_gid = iattrp->ia_gid; + if (iattrp->ia_valid & ATTR_SIZE) + vattrp->va_size = iattrp->ia_size; + if (iattrp->ia_valid & ATTR_ATIME) { + vattrp->va_atime.tv_sec = iattrp->ia_atime; + vattrp->va_atime.tv_usec = 0; + } + if (iattrp->ia_valid & ATTR_MTIME) { + vattrp->va_mtime.tv_sec = iattrp->ia_mtime; + vattrp->va_mtime.tv_usec = 0; + } + if (iattrp->ia_valid & ATTR_CTIME) { + vattrp->va_ctime.tv_sec = iattrp->ia_ctime; + vattrp->va_ctime.tv_usec = 0; + } +} + +/* vattr2inode + * Rewrite the inode cache from the attr. Assumes all vattr fields are valid. + */ +void +vattr2inode(struct inode *ip, struct vattr *vp) +{ + ip->i_ino = vp->va_nodeid; + ip->i_nlink = vp->va_nlink; + ip->i_blocks = vp->va_blocks; +#ifdef STRUCT_INODE_HAS_I_BLKBITS + ip->i_blkbits = AFS_BLKBITS; +#endif +#ifdef STRUCT_INODE_HAS_I_BLKSIZE + ip->i_blksize = vp->va_blocksize; +#endif + ip->i_rdev = vp->va_rdev; + ip->i_mode = vp->va_mode; + ip->i_uid = vp->va_uid; + ip->i_gid = vp->va_gid; + i_size_write(ip, vp->va_size); + ip->i_atime = vp->va_atime.tv_sec; + ip->i_mtime = vp->va_mtime.tv_sec; + ip->i_ctime = vp->va_ctime.tv_sec; +} + +/* osi_linux_free_inode_pages + * + * Free all vnodes remaining in the afs hash. Must be done before + * shutting down afs and freeing all memory. + */ +void +osi_linux_free_inode_pages(void) +{ + int i; + struct vcache *tvc, *nvc; + extern struct vcache *afs_vhashT[VCSIZE]; + + for (i = 0; i < VCSIZE; i++) { + for (tvc = afs_vhashT[i]; tvc; ) { + int slept; + + nvc = tvc->hnext; + if (afs_FlushVCache(tvc, &slept)) /* slept always 0 for linux? */ + printf("Failed to invalidate all pages on inode 0x%p\n", tvc); + tvc = nvc; + } + } +} diff --git a/src/afs/LINUX24/osi_vm.c b/src/afs/LINUX24/osi_vm.c new file mode 100644 index 0000000..6c602d6 --- /dev/null +++ b/src/afs/LINUX24/osi_vm.c @@ -0,0 +1,156 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afsincludes.h" /* Afs-based standard headers */ +#include "afs/afs_stats.h" /* statistics */ + +/* Linux VM operations + * + * The general model for Linux is to treat vm as a cache that's: + * 1) explicitly updated by AFS when AFS writes the data to the cache file. + * 2) reads go through the cache. A cache miss is satisfied by the filesystem. + * + * This means flushing VM is not required on this OS. + */ + +/* Try to discard pages, in order to recycle a vcache entry. + * + * We also make some sanity checks: ref count, open count, held locks. + * + * We also do some non-VM-related chores, such as releasing the cred pointer + * (for AIX and Solaris) and releasing the gnode (for AIX). + * + * Locking: afs_xvcache lock is held. If it is dropped and re-acquired, + * *slept should be set to warn the caller. + * + * Formerly, afs_xvcache was dropped and re-acquired for Solaris, but now it + * is not dropped and re-acquired for any platform. It may be that *slept is + * therefore obsolescent. + */ +int +osi_VM_FlushVCache(struct vcache *avc, int *slept) +{ + struct inode *ip = AFSTOV(avc); + + if (VREFCOUNT(avc) > 1) + return EBUSY; + + if (avc->opens != 0) + return EBUSY; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + return vmtruncate(ip, 0); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,15) + truncate_inode_pages(ip, 0); +#else + invalidate_inode_pages(ip); +#endif + return 0; +} + +/* Try to invalidate pages, for "fs flush" or "fs flushv"; or + * try to free pages, when deleting a file. + * + * Locking: the vcache entry's lock is held. It may be dropped and + * re-obtained. + * + * Since we drop and re-obtain the lock, we can't guarantee that there won't + * be some pages around when we return, newly created by concurrent activity. + */ +void +osi_VM_TryToSmush(struct vcache *avc, AFS_UCRED *acred, int sync) +{ + struct inode *ip = AFSTOV(avc); + + invalidate_inode_pages(ip); +} + +/* Flush and invalidate pages, for fsync() with INVAL flag + * + * Locking: only the global lock is held. + */ +void +osi_VM_FSyncInval(struct vcache *avc) +{ + +} + +/* Try to store pages to cache, in order to store a file back to the server. + * + * Locking: the vcache entry's lock is held. It will usually be dropped and + * re-obtained. + */ +void +osi_VM_StoreAllSegments(struct vcache *avc) +{ + struct inode *ip = AFSTOV(avc); + + if (avc->f.states & CPageWrite) + return; /* someone already writing */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,5) + /* filemap_fdatasync() only exported in 2.4.5 and above */ + ReleaseWriteLock(&avc->lock); + AFS_GUNLOCK(); + filemap_fdatasync(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); + AFS_GLOCK(); + ObtainWriteLock(&avc->lock, 121); +#endif +} + +/* Purge VM for a file when its callback is revoked. + * + * Locking: No lock is held, not even the global lock. + */ + +/* Note that for speed some of our Linux vnodeops do not initialise credp + * before calling osi_FlushPages(). If credp is ever required on Linux, + * then these callers should be updated. + */ +void +osi_VM_FlushPages(struct vcache *avc, AFS_UCRED *credp) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + struct inode *ip = AFSTOV(avc); + + truncate_inode_pages(&ip->i_data, 0); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,15) + struct inode *ip = AFSTOV(avc); + + truncate_inode_pages(ip, 0); +#else + invalidate_inode_pages(AFSTOV(avc)); +#endif +} + +/* Purge pages beyond end-of-file, when truncating a file. + * + * Locking: no lock is held, not even the global lock. + * activeV is raised. This is supposed to block pageins, but at present + * it only works on Solaris. + */ +void +osi_VM_Truncate(struct vcache *avc, int alen, AFS_UCRED *acred) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + vmtruncate(AFSTOV(avc), alen); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,15) + struct inode *ip = AFSTOV(avc); + + truncate_inode_pages(ip, alen); +#else + invalidate_inode_pages(AFSTOV(avc)); +#endif +} diff --git a/src/afs/LINUX24/osi_vnodeops.c b/src/afs/LINUX24/osi_vnodeops.c new file mode 100644 index 0000000..2945554 --- /dev/null +++ b/src/afs/LINUX24/osi_vnodeops.c @@ -0,0 +1,2507 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * Linux specific vnodeops. Also includes the glue routines required to call + * AFS vnodeops. + * + * So far the only truly scary part is that Linux relies on the inode cache + * to be up to date. Don't you dare break a callback and expect an fstat + * to give you meaningful information. This appears to be fixed in the 2.1 + * development kernels. As it is we can fix this now by intercepting the + * stat calls. + */ + +#include +#include "afs/param.h" + + +#include "afs/sysincludes.h" +#include "afsincludes.h" +#include "afs/afs_stats.h" +#include "h/mm.h" +#ifdef HAVE_MM_INLINE_H +#include "h/mm_inline.h" +#endif +#include "h/pagemap.h" +#if defined(AFS_LINUX24_ENV) +#include "h/smp_lock.h" +#endif +#if defined(AFS_CACHE_BYPASS) +#include "afs/lock.h" +#include "afs/afs_bypasscache.h" +#endif + +#include "osi_pagecopy.h" + +#ifdef pgoff2loff +#define pageoff(pp) pgoff2loff((pp)->index) +#else +#define pageoff(pp) pp->offset +#endif + +#ifndef HAVE_PAGEVEC_LRU_ADD_FILE +#define __pagevec_lru_add_file __pagevec_lru_add +#endif + +#ifndef MAX_ERRNO +#define MAX_ERRNO 1000L +#endif + +extern struct vcache *afs_globalVp; +extern int afs_notify_change(struct dentry *dp, struct iattr *iattrp); +#if defined(AFS_LINUX24_ENV) +/* Some uses of BKL are perhaps not needed for bypass or memcache-- + * why don't we try it out? */ +extern struct afs_cacheOps afs_UfsCacheOps; +#define maybe_lock_kernel() \ + do { \ + if(afs_cacheType == &afs_UfsCacheOps) \ + lock_kernel(); \ + } while(0); + + +#define maybe_unlock_kernel() \ + do { \ + if(afs_cacheType == &afs_UfsCacheOps) \ + unlock_kernel(); \ + } while(0); +#endif /* AFS_LINUX24_ENV */ + + +/* This function converts a positive error code from AFS into a negative + * code suitable for passing into the Linux VFS layer. It checks that the + * error code is within the permissable bounds for the ERR_PTR mechanism. + * + * _All_ error codes which come from the AFS layer should be passed through + * this function before being returned to the kernel. + */ + +static inline int afs_convert_code(int code) { + if ((code >= 0) && (code <= MAX_ERRNO)) + return -code; + else + return -EIO; +} + +/* Linux doesn't require a credp for many functions, and crref is an expensive + * operation. This helper function avoids obtaining it for VerifyVCache calls + */ + +static inline int afs_linux_VerifyVCache(struct vcache *avc, cred_t **retcred) { + cred_t *credp = NULL; + struct vrequest treq; + int code; + + if (avc->f.states & CStatd) { + if (retcred) + *retcred = NULL; + return 0; + } + + credp = crref(); + + code = afs_InitReq(&treq, credp); + if (code == 0) + code = afs_VerifyVCache2(avc, &treq); + + if (retcred != NULL) + *retcred = credp; + else + crfree(credp); + + return afs_convert_code(code); +} + +static ssize_t +afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp) +{ + ssize_t code = 0; + struct vcache *vcp = VTOAFS(fp->f_dentry->d_inode); +#if defined(AFS_CACHE_BYPASS) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + afs_size_t isize, offindex; +#endif + + AFS_GLOCK(); + afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, + 99999); + code = afs_linux_VerifyVCache(vcp, NULL); + + if (code == 0) { +#if defined(AFS_CACHE_BYPASS) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + offindex = *offp >> PAGE_CACHE_SHIFT; + if(offindex > isize) { + code=0; + goto done; + } +#endif + /* Linux's FlushPages implementation doesn't ever use credp, + * so we optimise by not using it */ + osi_FlushPages(vcp, NULL); /* ensure stale pages are gone */ + AFS_GUNLOCK(); +#ifdef DO_SYNC_READ + code = do_sync_read(fp, buf, count, offp); +#else + code = generic_file_read(fp, buf, count, offp); +#endif + AFS_GLOCK(); + } + + afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, + code); +#if defined(AFS_CACHE_BYPASS) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +done: +#endif + AFS_GUNLOCK(); + return code; +} + + +/* Now we have integrated VM for writes as well as reads. generic_file_write + * also takes care of re-positioning the pointer if file is open in append + * mode. Call fake open/close to ensure we do writes of core dumps. + */ +static ssize_t +afs_linux_write(struct file *fp, const char *buf, size_t count, loff_t * offp) +{ + ssize_t code = 0; + struct vcache *vcp = VTOAFS(fp->f_dentry->d_inode); + cred_t *credp; + + AFS_GLOCK(); + + afs_Trace4(afs_iclSetp, CM_TRACE_WRITEOP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, + (fp->f_flags & O_APPEND) ? 99998 : 99999); + + code = afs_linux_VerifyVCache(vcp, &credp); + + ObtainWriteLock(&vcp->lock, 529); + afs_FakeOpen(vcp); + ReleaseWriteLock(&vcp->lock); + if (code == 0) { + AFS_GUNLOCK(); +#ifdef DO_SYNC_READ + code = do_sync_write(fp, buf, count, offp); +#else + code = generic_file_write(fp, buf, count, offp); +#endif + AFS_GLOCK(); + } + + ObtainWriteLock(&vcp->lock, 530); + + if (vcp->execsOrWriters == 1 && !credp) + credp = crref(); + + afs_FakeClose(vcp, credp); + ReleaseWriteLock(&vcp->lock); + + afs_Trace4(afs_iclSetp, CM_TRACE_WRITEOP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, + code); + + if (credp) + crfree(credp); + AFS_GUNLOCK(); + return code; +} + +extern int BlobScan(struct dcache * afile, afs_int32 ablob); + +/* This is a complete rewrite of afs_readdir, since we can make use of + * filldir instead of afs_readdir_move. Note that changes to vcache/dcache + * handling and use of bulkstats will need to be reflected here as well. + */ +static int +afs_linux_readdir(struct file *fp, void *dirbuf, filldir_t filldir) +{ + struct vcache *avc = VTOAFS(FILE_INODE(fp)); + struct vrequest treq; + register struct dcache *tdc; + int code; + int offset; + int dirpos; + struct DirEntry *de; + ino_t ino; + int len; + afs_size_t origOffset, tlen; + cred_t *credp = crref(); + struct afs_fakestat_state fakestat; + + AFS_GLOCK(); + AFS_STATCNT(afs_readdir); + + code = afs_convert_code(afs_InitReq(&treq, credp)); + crfree(credp); + if (code) + goto out1; + + afs_InitFakeStat(&fakestat); + code = afs_convert_code(afs_EvalFakeStat(&avc, &fakestat, &treq)); + if (code) + goto out; + + /* update the cache entry */ + tagain: + code = afs_convert_code(afs_VerifyVCache2(avc, &treq)); + if (code) + goto out; + + /* get a reference to the entire directory */ + tdc = afs_GetDCache(avc, (afs_size_t) 0, &treq, &origOffset, &tlen, 1); + len = tlen; + if (!tdc) { + code = -ENOENT; + goto out; + } + ObtainSharedLock(&avc->lock, 810); + UpgradeSToWLock(&avc->lock, 811); + ObtainReadLock(&tdc->lock); + /* + * Make sure that the data in the cache is current. There are two + * cases we need to worry about: + * 1. The cache data is being fetched by another process. + * 2. The cache data is no longer valid + */ + while ((avc->f.states & CStatd) + && (tdc->dflags & DFFetching) + && hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); + ReleaseSharedLock(&avc->lock); + afs_osi_Sleep(&tdc->validPos); + ObtainSharedLock(&avc->lock, 812); + ObtainReadLock(&tdc->lock); + } + if (!(avc->f.states & CStatd) + || !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); + ReleaseSharedLock(&avc->lock); + afs_PutDCache(tdc); + goto tagain; + } + + /* Set the readdir-in-progress flag, and downgrade the lock + * to shared so others will be able to acquire a read lock. + */ + avc->f.states |= CReadDir; + avc->dcreaddir = tdc; + avc->readdir_pid = MyPidxx2Pid(MyPidxx); + ConvertWToSLock(&avc->lock); + + /* Fill in until we get an error or we're done. This implementation + * takes an offset in units of blobs, rather than bytes. + */ + code = 0; + offset = (int) fp->f_pos; + while (1) { + dirpos = BlobScan(tdc, offset); + if (!dirpos) + break; + + de = afs_dir_GetBlob(tdc, dirpos); + if (!de) + break; + + ino = afs_calc_inum (avc->f.fid.Fid.Volume, ntohl(de->fid.vnode)); + + if (de->name) + len = strlen(de->name); + else { + printf("afs_linux_readdir: afs_dir_GetBlob failed, null name (inode %lx, dirpos %d)\n", + (unsigned long)&tdc->f.inode, dirpos); + DRelease(de, 0); + ReleaseSharedLock(&avc->lock); + afs_PutDCache(tdc); + code = -ENOENT; + goto out; + } + + /* filldir returns -EINVAL when the buffer is full. */ +#if (defined(AFS_LINUX24_ENV) || defined(pgoff2loff)) && defined(DECLARE_FSTYPE) + { + unsigned int type = DT_UNKNOWN; + struct VenusFid afid; + struct vcache *tvc; + int vtype; + afid.Cell = avc->f.fid.Cell; + afid.Fid.Volume = avc->f.fid.Fid.Volume; + afid.Fid.Vnode = ntohl(de->fid.vnode); + afid.Fid.Unique = ntohl(de->fid.vunique); + if ((avc->f.states & CForeign) == 0 && (ntohl(de->fid.vnode) & 1)) { + type = DT_DIR; + } else if ((tvc = afs_FindVCache(&afid, 0, 0))) { + if (tvc->mvstat) { + type = DT_DIR; + } else if (((tvc->f.states) & (CStatd | CTruth))) { + /* CTruth will be set if the object has + *ever* been statd */ + vtype = vType(tvc); + if (vtype == VDIR) + type = DT_DIR; + else if (vtype == VREG) + type = DT_REG; + /* Don't do this until we're sure it can't be a mtpt */ + /* else if (vtype == VLNK) + * type=DT_LNK; */ + /* what other types does AFS support? */ + } + /* clean up from afs_FindVCache */ + afs_PutVCache(tvc); + } + /* + * If this is NFS readdirplus, then the filler is going to + * call getattr on this inode, which will deadlock if we're + * holding the GLOCK. + */ + AFS_GUNLOCK(); + code = (*filldir) (dirbuf, de->name, len, offset, ino, type); + AFS_GLOCK(); + } +#else + code = (*filldir) (dirbuf, de->name, len, offset, ino); +#endif + DRelease(de, 0); + if (code) + break; + offset = dirpos + 1 + ((len + 16) >> 5); + } + /* If filldir didn't fill in the last one this is still pointing to that + * last attempt. + */ + fp->f_pos = (loff_t) offset; + + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + UpgradeSToWLock(&avc->lock, 813); + avc->f.states &= ~CReadDir; + avc->dcreaddir = 0; + avc->readdir_pid = 0; + ReleaseSharedLock(&avc->lock); + code = 0; + +out: + afs_PutFakeStat(&fakestat); +out1: + AFS_GUNLOCK(); + return code; +} + + +/* in afs_pioctl.c */ +extern int afs_xioctl(struct inode *ip, struct file *fp, unsigned int com, + unsigned long arg); + +#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) +static long afs_unlocked_xioctl(struct file *fp, unsigned int com, + unsigned long arg) { + return afs_xioctl(FILE_INODE(fp), fp, com, arg); + +} +#endif + + +static int +afs_linux_mmap(struct file *fp, struct vm_area_struct *vmap) +{ + struct vcache *vcp = VTOAFS(FILE_INODE(fp)); + int code; + + AFS_GLOCK(); +#if defined(AFS_LINUX24_ENV) + afs_Trace3(afs_iclSetp, CM_TRACE_GMAP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, vmap->vm_start, ICL_TYPE_INT32, + vmap->vm_end - vmap->vm_start); +#else + afs_Trace4(afs_iclSetp, CM_TRACE_GMAP, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, vmap->vm_start, ICL_TYPE_INT32, + vmap->vm_end - vmap->vm_start, ICL_TYPE_INT32, + vmap->vm_offset); +#endif + + /* get a validated vcache entry */ + code = afs_linux_VerifyVCache(vcp, NULL); + + /* Linux's Flushpage implementation doesn't use credp, so optimise + * our code to not need to crref() it */ + osi_FlushPages(vcp, NULL); /* ensure stale pages are gone */ + AFS_GUNLOCK(); + code = generic_file_mmap(fp, vmap); + AFS_GLOCK(); + if (!code) + vcp->f.states |= CMAPPED; + + AFS_GUNLOCK(); + return code; +} + +static int +afs_linux_open(struct inode *ip, struct file *fp) +{ + struct vcache *vcp = VTOAFS(ip); + cred_t *credp = crref(); + int code; + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + code = afs_open(&vcp, fp->f_flags, credp); + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + + crfree(credp); + return afs_convert_code(code); +} + +static int +afs_linux_release(struct inode *ip, struct file *fp) +{ + struct vcache *vcp = VTOAFS(ip); + cred_t *credp = crref(); + int code = 0; + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + code = afs_close(vcp, fp->f_flags, credp); + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + + crfree(credp); + return afs_convert_code(code); +} + +static int +#if defined(AFS_LINUX24_ENV) +afs_linux_fsync(struct file *fp, struct dentry *dp, int datasync) +#else +afs_linux_fsync(struct file *fp, struct dentry *dp) +#endif +{ + int code; + struct inode *ip = FILE_INODE(fp); + cred_t *credp = crref(); + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + code = afs_fsync(VTOAFS(ip), credp); + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + crfree(credp); + return afs_convert_code(code); + +} + + +static int +afs_linux_lock(struct file *fp, int cmd, struct file_lock *flp) +{ + int code = 0; + struct vcache *vcp = VTOAFS(FILE_INODE(fp)); + cred_t *credp = crref(); + struct AFS_FLOCK flock; +#if defined(POSIX_TEST_LOCK_CONFLICT_ARG) + struct file_lock conflict; +#elif defined(POSIX_TEST_LOCK_RETURNS_CONFLICT) + struct file_lock *conflict; +#endif + + /* Convert to a lock format afs_lockctl understands. */ + memset((char *)&flock, 0, sizeof(flock)); + flock.l_type = flp->fl_type; + flock.l_pid = flp->fl_pid; + flock.l_whence = 0; + flock.l_start = flp->fl_start; + flock.l_len = flp->fl_end - flp->fl_start + 1; + + /* Safe because there are no large files, yet */ +#if defined(F_GETLK64) && (F_GETLK != F_GETLK64) + if (cmd == F_GETLK64) + cmd = F_GETLK; + else if (cmd == F_SETLK64) + cmd = F_SETLK; + else if (cmd == F_SETLKW64) + cmd = F_SETLKW; +#endif /* F_GETLK64 && F_GETLK != F_GETLK64 */ + + AFS_GLOCK(); + code = afs_lockctl(vcp, &flock, cmd, credp); + AFS_GUNLOCK(); + +#ifdef AFS_LINUX24_ENV + if ((code == 0 || flp->fl_type == F_UNLCK) && + (cmd == F_SETLK || cmd == F_SETLKW)) { +# ifdef POSIX_LOCK_FILE_WAIT_ARG + code = posix_lock_file(fp, flp, 0); +# else + flp->fl_flags &=~ FL_SLEEP; + code = posix_lock_file(fp, flp); +# endif + if (code && flp->fl_type != F_UNLCK) { + struct AFS_FLOCK flock2; + flock2 = flock; + flock2.l_type = F_UNLCK; + AFS_GLOCK(); + afs_lockctl(vcp, &flock2, F_SETLK, credp); + AFS_GUNLOCK(); + } + } + /* If lockctl says there are no conflicting locks, then also check with the + * kernel, as lockctl knows nothing about byte range locks + */ + if (code == 0 && cmd == F_GETLK && flock.l_type == F_UNLCK) { +# if defined(POSIX_TEST_LOCK_CONFLICT_ARG) + if (posix_test_lock(fp, flp, &conflict)) { + locks_copy_lock(flp, &conflict); + flp->fl_type = F_UNLCK; + crfree(credp); + return 0; + } +# elif defined(POSIX_TEST_LOCK_RETURNS_CONFLICT) + if ((conflict = posix_test_lock(fp, flp))) { + locks_copy_lock(flp, conflict); + flp->fl_type = F_UNLCK; + crfee(credp); + return 0; + } +# else + posix_test_lock(fp, flp); + /* If we found a lock in the kernel's structure, return it */ + if (flp->fl_type != F_UNLCK) { + crfree(credp); + return 0; + } +# endif + } + +#endif + /* Convert flock back to Linux's file_lock */ + flp->fl_type = flock.l_type; + flp->fl_pid = flock.l_pid; + flp->fl_start = flock.l_start; + flp->fl_end = flock.l_start + flock.l_len - 1; + + crfree(credp); + return afs_convert_code(code); +} + +#ifdef STRUCT_FILE_OPERATIONS_HAS_FLOCK +static int +afs_linux_flock(struct file *fp, int cmd, struct file_lock *flp) { + int code = 0; + struct vcache *vcp = VTOAFS(FILE_INODE(fp)); + cred_t *credp = crref(); + struct AFS_FLOCK flock; + /* Convert to a lock format afs_lockctl understands. */ + memset((char *)&flock, 0, sizeof(flock)); + flock.l_type = flp->fl_type; + flock.l_pid = flp->fl_pid; + flock.l_whence = 0; + flock.l_start = 0; + flock.l_len = OFFSET_MAX; + + /* Safe because there are no large files, yet */ +#if defined(F_GETLK64) && (F_GETLK != F_GETLK64) + if (cmd == F_GETLK64) + cmd = F_GETLK; + else if (cmd == F_SETLK64) + cmd = F_SETLK; + else if (cmd == F_SETLKW64) + cmd = F_SETLKW; +#endif /* F_GETLK64 && F_GETLK != F_GETLK64 */ + + AFS_GLOCK(); + code = afs_lockctl(vcp, &flock, cmd, credp); + AFS_GUNLOCK(); + + if ((code == 0 || flp->fl_type == F_UNLCK) && + (cmd == F_SETLK || cmd == F_SETLKW)) { + flp->fl_flags &=~ FL_SLEEP; + code = flock_lock_file_wait(fp, flp); + if (code && flp->fl_type != F_UNLCK) { + struct AFS_FLOCK flock2; + flock2 = flock; + flock2.l_type = F_UNLCK; + AFS_GLOCK(); + afs_lockctl(vcp, &flock2, F_SETLK, credp); + AFS_GUNLOCK(); + } + } + /* Convert flock back to Linux's file_lock */ + flp->fl_type = flock.l_type; + flp->fl_pid = flock.l_pid; + + crfree(credp); + return afs_convert_code(code); +} +#endif + +/* afs_linux_flush + * essentially the same as afs_fsync() but we need to get the return + * code for the sys_close() here, not afs_linux_release(), so call + * afs_StoreAllSegments() with AFS_LASTSTORE + */ +static int +#if defined(FOP_FLUSH_TAKES_FL_OWNER_T) +afs_linux_flush(struct file *fp, fl_owner_t id) +#else +afs_linux_flush(struct file *fp) +#endif +{ + struct vrequest treq; + struct vcache *vcp; + cred_t *credp; + int code; +#if defined(AFS_CACHE_BYPASS) + int bypasscache; +#endif + + AFS_GLOCK(); + + if ((fp->f_flags & O_ACCMODE) == O_RDONLY) { /* readers dont flush */ + AFS_GUNLOCK(); + return 0; + } + + AFS_DISCON_LOCK(); + + credp = crref(); + vcp = VTOAFS(FILE_INODE(fp)); + + code = afs_InitReq(&treq, credp); + if (code) + goto out; +#if defined(AFS_CACHE_BYPASS) + /* If caching is bypassed for this file, or globally, just return 0 */ + if(cache_bypass_strategy == ALWAYS_BYPASS_CACHE) + bypasscache = 1; + else { + ObtainReadLock(&vcp->lock); + if(vcp->cachingStates & FCSBypass) + bypasscache = 1; + ReleaseReadLock(&vcp->lock); + } + if(bypasscache) { + /* future proof: don't rely on 0 return from afs_InitReq */ + code = 0; goto out; + } +#endif + + ObtainSharedLock(&vcp->lock, 535); + if ((vcp->execsOrWriters > 0) && (file_count(fp) == 1)) { + UpgradeSToWLock(&vcp->lock, 536); + if (!AFS_IS_DISCONNECTED) { + code = afs_StoreAllSegments(vcp, + &treq, + AFS_SYNC | AFS_LASTSTORE); + } else { + afs_DisconAddDirty(vcp, VDisconWriteOsiFlush, 1); + } + ConvertWToSLock(&vcp->lock); + } + code = afs_CheckCode(code, &treq, 54); + ReleaseSharedLock(&vcp->lock); + +out: + AFS_DISCON_UNLOCK(); + AFS_GUNLOCK(); + + crfree(credp); + return afs_convert_code(code); +} + +#if !defined(AFS_LINUX24_ENV) +/* Not allowed to directly read a directory. */ +ssize_t +afs_linux_dir_read(struct file * fp, char *buf, size_t count, loff_t * ppos) +{ + return -EISDIR; +} +#endif + + + +struct file_operations afs_dir_fops = { +#if !defined(AFS_LINUX24_ENV) + .read = afs_linux_dir_read, + .lock = afs_linux_lock, + .fsync = afs_linux_fsync, +#else + .read = generic_read_dir, +#endif + .readdir = afs_linux_readdir, +#ifdef HAVE_UNLOCKED_IOCTL + .unlocked_ioctl = afs_unlocked_xioctl, +#else + .ioctl = afs_xioctl, +#endif +#ifdef HAVE_COMPAT_IOCTL + .compat_ioctl = afs_unlocked_xioctl, +#endif + .open = afs_linux_open, + .release = afs_linux_release, +}; + +struct file_operations afs_file_fops = { + .read = afs_linux_read, + .write = afs_linux_write, +#ifdef GENERIC_FILE_AIO_READ + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, +#endif +#ifdef HAVE_UNLOCKED_IOCTL + .unlocked_ioctl = afs_unlocked_xioctl, +#else + .ioctl = afs_xioctl, +#endif +#ifdef HAVE_COMPAT_IOCTL + .compat_ioctl = afs_unlocked_xioctl, +#endif + .mmap = afs_linux_mmap, + .open = afs_linux_open, + .flush = afs_linux_flush, + .release = afs_linux_release, + .fsync = afs_linux_fsync, + .lock = afs_linux_lock, +#ifdef STRUCT_FILE_OPERATIONS_HAS_FLOCK + .flock = afs_linux_flock, +#endif +}; + + +/********************************************************************** + * AFS Linux dentry operations + **********************************************************************/ + +/* check_bad_parent() : Checks if this dentry's vcache is a root vcache + * that has its mvid (parent dir's fid) pointer set to the wrong directory + * due to being mounted in multiple points at once. If so, check_bad_parent() + * calls afs_lookup() to correct the vcache's mvid, as well as the volume's + * dotdotfid and mtpoint fid members. + * Parameters: + * dp - dentry to be checked. + * Return Values: + * None. + * Sideeffects: + * This dentry's vcache's mvid will be set to the correct parent directory's + * fid. + * This root vnode's volume will have its dotdotfid and mtpoint fids set + * to the correct parent and mountpoint fids. + */ + +static inline void +check_bad_parent(struct dentry *dp) +{ + cred_t *credp; + struct vcache *vcp = VTOAFS(dp->d_inode), *avc = NULL; + struct vcache *pvc = VTOAFS(dp->d_parent->d_inode); + + if (vcp->mvid->Fid.Volume != pvc->f.fid.Fid.Volume) { /* bad parent */ + credp = crref(); + + /* force a lookup, so vcp->mvid is fixed up */ + afs_lookup(pvc, (char *)dp->d_name.name, &avc, credp); + if (!avc || vcp != avc) { /* bad, very bad.. */ + afs_Trace4(afs_iclSetp, CM_TRACE_TMP_1S3L, ICL_TYPE_STRING, + "check_bad_parent: bad pointer returned from afs_lookup origvc newvc dentry", + ICL_TYPE_POINTER, vcp, ICL_TYPE_POINTER, avc, + ICL_TYPE_POINTER, dp); + } + if (avc) + AFS_RELE(AFSTOV(avc)); + crfree(credp); + } + + return; +} + +/* afs_linux_revalidate + * Ensure vcache is stat'd before use. Return 0 if entry is valid. + */ +static int +afs_linux_revalidate(struct dentry *dp) +{ + struct vattr vattr; + struct vcache *vcp = VTOAFS(dp->d_inode); + cred_t *credp; + int code; + + if (afs_shuttingdown) + return EIO; + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + +#ifdef notyet + /* Make this a fast path (no crref), since it's called so often. */ + if (vcp->f.states & CStatd) { + + if (*dp->d_name.name != '/' && vcp->mvstat == 2) /* root vnode */ + check_bad_parent(dp); /* check and correct mvid */ + + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + unlock_kernel(); +#endif + return 0; + } +#endif + + /* This avoids the crref when we don't have to do it. Watch for + * changes in afs_getattr that don't get replicated here! + */ + if (vcp->f.states & CStatd && + (!afs_fakestat_enable || vcp->mvstat != 1) && + !afs_nfsexporter) { + code = afs_CopyOutAttrs(vcp, &vattr); + } else { + credp = crref(); + code = afs_getattr(vcp, &vattr, credp); + crfree(credp); + } + if (!code) + afs_fill_inode(AFSTOV(vcp), &vattr); + + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + + return afs_convert_code(code); +} + +/* Validate a dentry. Return 1 if unchanged, 0 if VFS layer should re-evaluate. + * In kernels 2.2.10 and above, we are passed an additional flags var which + * may have either the LOOKUP_FOLLOW OR LOOKUP_DIRECTORY set in which case + * we are advised to follow the entry if it is a link or to make sure that + * it is a directory. But since the kernel itself checks these possibilities + * later on, we shouldn't have to do it until later. Perhaps in the future.. + */ +static int +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,10) +#ifdef DOP_REVALIDATE_TAKES_NAMEIDATA +afs_linux_dentry_revalidate(struct dentry *dp, struct nameidata *nd) +#else +afs_linux_dentry_revalidate(struct dentry *dp, int flags) +#endif +#else +afs_linux_dentry_revalidate(struct dentry *dp) +#endif +{ + struct vattr vattr; + cred_t *credp = NULL; + struct vcache *vcp, *pvcp, *tvc = NULL; + int valid; + struct afs_fakestat_state fakestate; + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + afs_InitFakeStat(&fakestate); + + if (dp->d_inode) { + + vcp = VTOAFS(dp->d_inode); + pvcp = VTOAFS(dp->d_parent->d_inode); /* dget_parent()? */ + + if (vcp == afs_globalVp) + goto good_dentry; + + if (vcp->mvstat == 1) { /* mount point */ + if (vcp->mvid && (vcp->f.states & CMValid)) { + int tryEvalOnly = 0; + int code = 0; + struct vrequest treq; + + credp = crref(); + code = afs_InitReq(&treq, credp); + if ( +#ifdef AFS_DARWIN_ENV + (strcmp(dp->d_name.name, ".DS_Store") == 0) || + (strcmp(dp->d_name.name, "Contents") == 0) || +#endif + (strcmp(dp->d_name.name, ".directory") == 0)) { + tryEvalOnly = 1; + } + if (tryEvalOnly) + code = afs_TryEvalFakeStat(&vcp, &fakestate, &treq); + else + code = afs_EvalFakeStat(&vcp, &fakestate, &treq); + if ((tryEvalOnly && vcp->mvstat == 1) || code) { + /* a mount point, not yet replaced by its directory */ + goto bad_dentry; + } + } + } else + if (*dp->d_name.name != '/' && vcp->mvstat == 2) /* root vnode */ + check_bad_parent(dp); /* check and correct mvid */ + +#ifdef notdef + /* If the last looker changes, we should make sure the current + * looker still has permission to examine this file. This would + * always require a crref() which would be "slow". + */ + if (vcp->last_looker != treq.uid) { + if (!afs_AccessOK(vcp, (vType(vcp) == VREG) ? PRSFS_READ : PRSFS_LOOKUP, &treq, CHECK_MODE_BITS)) + goto bad_dentry; + + vcp->last_looker = treq.uid; + } +#endif + + /* If the parent's DataVersion has changed or the vnode + * is longer valid, we need to do a full lookup. VerifyVCache + * isn't enough since the vnode may have been renamed. + */ + + if (hgetlo(pvcp->f.m.DataVersion) > dp->d_time || !(vcp->f.states & CStatd)) { + + credp = crref(); + afs_lookup(pvcp, (char *)dp->d_name.name, &tvc, credp); + if (!tvc || tvc != vcp) + goto bad_dentry; + + if (afs_getattr(vcp, &vattr, credp)) + goto bad_dentry; + + vattr2inode(AFSTOV(vcp), &vattr); + dp->d_time = hgetlo(pvcp->f.m.DataVersion); + } + + /* should we always update the attributes at this point? */ + /* unlikely--the vcache entry hasn't changed */ + + } else { +#ifdef notyet + pvcp = VTOAFS(dp->d_parent->d_inode); /* dget_parent()? */ + if (hgetlo(pvcp->f.m.DataVersion) > dp->d_time) + goto bad_dentry; +#endif + + /* No change in parent's DataVersion so this negative + * lookup is still valid. BUT, if a server is down a + * negative lookup can result so there should be a + * liftime as well. For now, always expire. + */ + + goto bad_dentry; + } + + good_dentry: + valid = 1; + + done: + /* Clean up */ + if (tvc) + afs_PutVCache(tvc); + afs_PutFakeStat(&fakestate); + AFS_GUNLOCK(); + if (credp) + crfree(credp); + + if (!valid) { + shrink_dcache_parent(dp); + d_drop(dp); + } +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + return valid; + + bad_dentry: + if (have_submounts(dp)) + valid = 1; + else + valid = 0; + goto done; +} + +static void +afs_dentry_iput(struct dentry *dp, struct inode *ip) +{ + struct vcache *vcp = VTOAFS(ip); + + AFS_GLOCK(); + if (!AFS_IS_DISCONNECTED || (vcp->f.states & CUnlinked)) { + (void) afs_InactiveVCache(vcp, NULL); + } + AFS_GUNLOCK(); +#ifdef DCACHE_NFSFS_RENAMED + dp->d_flags &= ~DCACHE_NFSFS_RENAMED; +#endif + + iput(ip); +} + +static int +afs_dentry_delete(struct dentry *dp) +{ + if (dp->d_inode && (VTOAFS(dp->d_inode)->f.states & CUnlinked)) + return 1; /* bad inode? */ + + return 0; +} + +struct dentry_operations afs_dentry_operations = { + .d_revalidate = afs_linux_dentry_revalidate, + .d_delete = afs_dentry_delete, + .d_iput = afs_dentry_iput, +}; + +/********************************************************************** + * AFS Linux inode operations + **********************************************************************/ + +/* afs_linux_create + * + * Merely need to set enough of vattr to get us through the create. Note + * that the higher level code (open_namei) will take care of any tuncation + * explicitly. Exclusive open is also taken care of in open_namei. + * + * name is in kernel space at this point. + */ +static int +#ifdef IOP_CREATE_TAKES_NAMEIDATA +afs_linux_create(struct inode *dip, struct dentry *dp, int mode, + struct nameidata *nd) +#else +afs_linux_create(struct inode *dip, struct dentry *dp, int mode) +#endif +{ + struct vattr vattr; + cred_t *credp = crref(); + const char *name = dp->d_name.name; + struct vcache *vcp; + int code; + + VATTR_NULL(&vattr); + vattr.va_mode = mode; + vattr.va_type = mode & S_IFMT; + + AFS_GLOCK(); + code = afs_create(VTOAFS(dip), (char *)name, &vattr, NONEXCL, mode, + &vcp, credp); + + if (!code) { + struct inode *ip = AFSTOV(vcp); + + afs_getattr(vcp, &vattr, credp); + afs_fill_inode(ip, &vattr); + insert_inode_hash(ip); + dp->d_op = &afs_dentry_operations; + dp->d_time = hgetlo(VTOAFS(dip)->f.m.DataVersion); + d_instantiate(dp, ip); + } + AFS_GUNLOCK(); + + crfree(credp); + return afs_convert_code(code); +} + +/* afs_linux_lookup */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,10) +static struct dentry * +#ifdef IOP_LOOKUP_TAKES_NAMEIDATA +afs_linux_lookup(struct inode *dip, struct dentry *dp, + struct nameidata *nd) +#else +afs_linux_lookup(struct inode *dip, struct dentry *dp) +#endif +#else +static int +afs_linux_lookup(struct inode *dip, struct dentry *dp) +#endif +{ + cred_t *credp = crref(); + struct vcache *vcp = NULL; + const char *comp = dp->d_name.name; + struct inode *ip = NULL; + int code; + + AFS_GLOCK(); + code = afs_lookup(VTOAFS(dip), (char *)comp, &vcp, credp); + + if (vcp) { + struct vattr vattr; + + ip = AFSTOV(vcp); + afs_getattr(vcp, &vattr, credp); + afs_fill_inode(ip, &vattr); + if ( +#ifdef HAVE_KERNEL_HLIST_UNHASHED + hlist_unhashed(&ip->i_hash) +#else + ip->i_hash.prev == NULL +#endif + ) + insert_inode_hash(ip); + } + dp->d_op = &afs_dentry_operations; + dp->d_time = hgetlo(VTOAFS(dip)->f.m.DataVersion); + AFS_GUNLOCK(); + +#if defined(AFS_LINUX24_ENV) + if (ip && S_ISDIR(ip->i_mode)) { + struct dentry *alias; + + /* Try to invalidate an existing alias in favor of our new one */ + alias = d_find_alias(ip); + if (alias) { + if (d_invalidate(alias) == 0) { + dput(alias); + } else { + iput(ip); + crfree(credp); + return alias; + } + } + } +#endif + d_add(dp, ip); + + crfree(credp); + + /* It's ok for the file to not be found. That's noted by the caller by + * seeing that the dp->d_inode field is NULL. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,10) + if (code == ENOENT) + return ERR_PTR(0); + else + return ERR_PTR(afs_convert_code(code)); +#else + if (code == ENOENT) + code = 0; + return afs_convert_code(code); +#endif +} + +static int +afs_linux_link(struct dentry *olddp, struct inode *dip, struct dentry *newdp) +{ + int code; + cred_t *credp = crref(); + const char *name = newdp->d_name.name; + struct inode *oldip = olddp->d_inode; + + /* If afs_link returned the vnode, we could instantiate the + * dentry. Since it's not, we drop this one and do a new lookup. + */ + d_drop(newdp); + + AFS_GLOCK(); + code = afs_link(VTOAFS(oldip), VTOAFS(dip), (char *)name, credp); + + AFS_GUNLOCK(); + crfree(credp); + return afs_convert_code(code); +} + +static int +afs_linux_unlink(struct inode *dip, struct dentry *dp) +{ + int code = EBUSY; + cred_t *credp = crref(); + const char *name = dp->d_name.name; + struct vcache *tvc = VTOAFS(dp->d_inode); + + if (VREFCOUNT(tvc) > 1 && tvc->opens > 0 + && !(tvc->f.states & CUnlinked)) { + struct dentry *__dp; + char *__name; + + __dp = NULL; + __name = NULL; + do { + dput(__dp); + + AFS_GLOCK(); + if (__name) + osi_FreeSmallSpace(__name); + __name = afs_newname(); + AFS_GUNLOCK(); + + __dp = lookup_one_len(__name, dp->d_parent, strlen(__name)); + + if (IS_ERR(__dp)) + goto out; + } while (__dp->d_inode != NULL); + + AFS_GLOCK(); + code = afs_rename(VTOAFS(dip), (char *)dp->d_name.name, VTOAFS(dip), (char *)__dp->d_name.name, credp); + if (!code) { + tvc->mvid = (void *) __name; + crhold(credp); + if (tvc->uncred) { + crfree(tvc->uncred); + } + tvc->uncred = credp; + tvc->f.states |= CUnlinked; +#ifdef DCACHE_NFSFS_RENAMED + dp->d_flags |= DCACHE_NFSFS_RENAMED; +#endif + } else { + osi_FreeSmallSpace(__name); + } + AFS_GUNLOCK(); + + if (!code) { + __dp->d_time = hgetlo(VTOAFS(dip)->f.m.DataVersion); + d_move(dp, __dp); + } + dput(__dp); + + goto out; + } + + AFS_GLOCK(); + code = afs_remove(VTOAFS(dip), (char *)name, credp); + AFS_GUNLOCK(); + if (!code) + d_drop(dp); +out: + crfree(credp); + return afs_convert_code(code); +} + + +static int +afs_linux_symlink(struct inode *dip, struct dentry *dp, const char *target) +{ + int code; + cred_t *credp = crref(); + struct vattr vattr; + const char *name = dp->d_name.name; + + /* If afs_symlink returned the vnode, we could instantiate the + * dentry. Since it's not, we drop this one and do a new lookup. + */ + d_drop(dp); + + VATTR_NULL(&vattr); + AFS_GLOCK(); + code = afs_symlink(VTOAFS(dip), (char *)name, &vattr, (char *)target, credp); + AFS_GUNLOCK(); + crfree(credp); + return afs_convert_code(code); +} + +static int +afs_linux_mkdir(struct inode *dip, struct dentry *dp, int mode) +{ + int code; + cred_t *credp = crref(); + struct vcache *tvcp = NULL; + struct vattr vattr; + const char *name = dp->d_name.name; + + VATTR_NULL(&vattr); + vattr.va_mask = ATTR_MODE; + vattr.va_mode = mode; + AFS_GLOCK(); + code = afs_mkdir(VTOAFS(dip), (char *)name, &vattr, &tvcp, credp); + + if (tvcp) { + struct inode *ip = AFSTOV(tvcp); + + afs_getattr(tvcp, &vattr, credp); + afs_fill_inode(ip, &vattr); + + dp->d_op = &afs_dentry_operations; + dp->d_time = hgetlo(VTOAFS(dip)->f.m.DataVersion); + d_instantiate(dp, ip); + } + AFS_GUNLOCK(); + + crfree(credp); + return afs_convert_code(code); +} + +static int +afs_linux_rmdir(struct inode *dip, struct dentry *dp) +{ + int code; + cred_t *credp = crref(); + const char *name = dp->d_name.name; + + /* locking kernel conflicts with glock? */ + + AFS_GLOCK(); + code = afs_rmdir(VTOAFS(dip), (char *)name, credp); + AFS_GUNLOCK(); + + /* Linux likes to see ENOTEMPTY returned from an rmdir() syscall + * that failed because a directory is not empty. So, we map + * EEXIST to ENOTEMPTY on linux. + */ + if (code == EEXIST) { + code = ENOTEMPTY; + } + + if (!code) { + d_drop(dp); + } + + crfree(credp); + return afs_convert_code(code); +} + + +static int +afs_linux_rename(struct inode *oldip, struct dentry *olddp, + struct inode *newip, struct dentry *newdp) +{ + int code; + cred_t *credp = crref(); + const char *oldname = olddp->d_name.name; + const char *newname = newdp->d_name.name; + struct dentry *rehash = NULL; + + if (!list_empty(&newdp->d_hash)) { + d_drop(newdp); + rehash = newdp; + } + +#if defined(AFS_LINUX24_ENV) + if (atomic_read(&olddp->d_count) > 1) + shrink_dcache_parent(olddp); +#endif + + AFS_GLOCK(); + code = afs_rename(VTOAFS(oldip), (char *)oldname, VTOAFS(newip), (char *)newname, credp); + AFS_GUNLOCK(); + + if (!code) + olddp->d_time = 0; /* force to revalidate */ + + if (rehash) + d_rehash(rehash); + + crfree(credp); + return afs_convert_code(code); +} + + +/* afs_linux_ireadlink + * Internal readlink which can return link contents to user or kernel space. + * Note that the buffer is NOT supposed to be null-terminated. + */ +static int +afs_linux_ireadlink(struct inode *ip, char *target, int maxlen, uio_seg_t seg) +{ + int code; + cred_t *credp = crref(); + uio_t tuio; + struct iovec iov; + + setup_uio(&tuio, &iov, target, (afs_offs_t) 0, maxlen, UIO_READ, seg); + code = afs_readlink(VTOAFS(ip), &tuio, credp); + crfree(credp); + + if (!code) + return maxlen - tuio.uio_resid; + else + return afs_convert_code(code); +} + +#if !defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) +/* afs_linux_readlink + * Fill target (which is in user space) with contents of symlink. + */ +static int +afs_linux_readlink(struct dentry *dp, char *target, int maxlen) +{ + int code; + struct inode *ip = dp->d_inode; + + AFS_GLOCK(); + code = afs_linux_ireadlink(ip, target, maxlen, AFS_UIOUSER); + AFS_GUNLOCK(); + return code; +} + + +/* afs_linux_follow_link + * a file system dependent link following routine. + */ +#if defined(AFS_LINUX24_ENV) +static int afs_linux_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int code; + char *name; + + name = osi_Alloc(PATH_MAX); + if (!name) { + return -EIO; + } + + AFS_GLOCK(); + code = afs_linux_ireadlink(dentry->d_inode, name, PATH_MAX - 1, AFS_UIOSYS); + AFS_GUNLOCK(); + + if (code < 0) { + goto out; + } + + name[code] = '\0'; + code = vfs_follow_link(nd, name); + +out: + osi_Free(name, PATH_MAX); + + return code; +} + +#else /* !defined(AFS_LINUX24_ENV) */ + +static struct dentry * +afs_linux_follow_link(struct dentry *dp, struct dentry *basep, + unsigned int follow) +{ + int code = 0; + char *name; + struct dentry *res; + + + AFS_GLOCK(); + name = osi_Alloc(PATH_MAX + 1); + if (!name) { + AFS_GUNLOCK(); + dput(basep); + return ERR_PTR(-EIO); + } + + code = afs_linux_ireadlink(dp->d_inode, name, PATH_MAX, AFS_UIOSYS); + AFS_GUNLOCK(); + + if (code < 0) { + dput(basep); + if (code < -MAX_ERRNO) + res = ERR_PTR(-EIO); + else + res = ERR_PTR(code); + } else { + name[code] = '\0'; + res = lookup_dentry(name, basep, follow); + } + + AFS_GLOCK(); + osi_Free(name, PATH_MAX + 1); + AFS_GUNLOCK(); + return res; +} +#endif /* AFS_LINUX24_ENV */ +#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */ + +#if defined(AFS_CACHE_BYPASS) + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +static inline int +afs_linux_can_bypass(struct inode *ip) { + switch(cache_bypass_strategy) { + case NEVER_BYPASS_CACHE: + return 0; + case ALWAYS_BYPASS_CACHE: + return 1; + case LARGE_FILES_BYPASS_CACHE: + if(i_size_read(ip) > cache_bypass_threshold) + return 1; + default: + } + return 0; +} + +static int +afs_linux_cache_bypass_read(struct file *fp, struct address_space *mapping, + struct list_head *page_list, unsigned num_pages) +{ + afs_int32 page_ix; + uio_t *auio; + afs_offs_t offset; + struct iovec* iovecp; + struct nocache_read_request *ancr; + struct page *pp, *ppt; + struct pagevec lrupv; + afs_int32 code = 0; + + cred_t *credp; + struct inode *ip = FILE_INODE(fp); + struct vcache *avc = VTOAFS(ip); + afs_int32 bypasscache = 0; /* bypass for this read */ + afs_int32 base_index = 0; + afs_int32 page_count = 0; + afs_int32 isize; + + /* background thread must free: iovecp, auio, ancr */ + iovecp = osi_Alloc(num_pages * sizeof(struct iovec)); + + auio = osi_Alloc(sizeof(uio_t)); + auio->uio_iov = iovecp; + auio->uio_iovcnt = num_pages; + auio->uio_flag = UIO_READ; + auio->uio_seg = AFS_UIOSYS; + auio->uio_resid = num_pages * PAGE_SIZE; + + ancr = osi_Alloc(sizeof(struct nocache_read_request)); + ancr->auio = auio; + ancr->offset = auio->uio_offset; + ancr->length = auio->uio_resid; + + pagevec_init(&lrupv, 0); + + for(page_ix = 0; page_ix < num_pages; ++page_ix) { + + if(list_empty(page_list)) + break; + + pp = list_entry(page_list->prev, struct page, lru); + /* If we allocate a page and don't remove it from page_list, + * the page cache gets upset. */ + list_del(&pp->lru); + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + if(pp->index > isize) { + if(PageLocked(pp)) + UnlockPage(pp); + continue; + } + + if(page_ix == 0) { + offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT; + auio->uio_offset = offset; + base_index = pp->index; + } + iovecp[page_ix].iov_len = PAGE_SIZE; + code = add_to_page_cache(pp, mapping, pp->index, GFP_KERNEL); + if(base_index != pp->index) { + if(PageLocked(pp)) + UnlockPage(pp); + page_cache_release(pp); + iovecp[page_ix].iov_base = (void *) 0; + base_index++; + continue; + } + base_index++; + if(code) { + if(PageLocked(pp)) + UnlockPage(pp); + page_cache_release(pp); + iovecp[page_ix].iov_base = (void *) 0; + } else { + page_count++; + if(!PageLocked(pp)) { + LockPage(pp); + } + + /* save the page for background map */ + iovecp[page_ix].iov_base = (void*) pp; + + /* and put it on the LRU cache */ + if (!pagevec_add(&lrupv, pp)) + __pagevec_lru_add(&lrupv); + } + } + + /* If there were useful pages in the page list, make sure all pages + * are in the LRU cache, then schedule the read */ + if(page_count) { + pagevec_lru_add(&lrupv); + credp = crref(); + code = afs_ReadNoCache(avc, ancr, credp); + crfree(credp); + } else { + /* If there is nothing for the background thread to handle, + * it won't be freeing the things that we never gave it */ + osi_Free(iovecp, num_pages * sizeof(struct iovec)); + osi_Free(auio, sizeof(uio_t)); + osi_Free(ancr, sizeof(struct nocache_read_request)); + } + /* we do not flush, release, or unmap pages--that will be + * done for us by the background thread as each page comes in + * from the fileserver */ +out: + return afs_convert_code(code); +} + +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) */ +#endif /* defined(AFS_CACHE_BYPASS */ + +static int +afs_linux_read_cache(struct file *cachefp, struct page *page, + int chunk, struct pagevec *lrupv, + struct afs_pagecopy_task *task) { + loff_t offset = page_offset(page); + struct page *newpage, *cachepage; + struct address_space *cachemapping; + int pageindex; + int code = 0; + + cachemapping = cachefp->f_dentry->d_inode->i_mapping; + newpage = NULL; + cachepage = NULL; + + /* From our offset, we now need to work out which page in the disk + * file it corresponds to. This will be fun ... */ + pageindex = (offset - AFS_CHUNKTOBASE(chunk)) >> PAGE_CACHE_SHIFT; + + while (cachepage == NULL) { + cachepage = find_get_page(cachemapping, pageindex); + if (!cachepage) { + if (!newpage) + newpage = page_cache_alloc_cold(cachemapping); + if (!newpage) { + code = -ENOMEM; + goto out; + } + + code = add_to_page_cache(newpage, cachemapping, + pageindex, GFP_KERNEL); + if (code == 0) { + cachepage = newpage; + newpage = NULL; + + page_cache_get(cachepage); + if (!pagevec_add(lrupv, cachepage)) + __pagevec_lru_add_file(lrupv); + + } else { + page_cache_release(newpage); + newpage = NULL; + if (code != -EEXIST) + goto out; + } + } else { + lock_page(cachepage); + } + } + + if (!PageUptodate(cachepage)) { + ClearPageError(cachepage); + code = cachemapping->a_ops->readpage(NULL, cachepage); + if (!code && !task) { + wait_on_page_locked(cachepage); + } + } else { + unlock_page(cachepage); + } + + if (!code) { + if (PageUptodate(cachepage)) { + copy_highpage(page, cachepage); + flush_dcache_page(page); + SetPageUptodate(page); + UnlockPage(page); + } else if (task) { + afs_pagecopy_queue_page(task, cachepage, page); + } else { + code = -EIO; + } + } + + if (code) { + UnlockPage(page); + } + +out: + if (cachepage) + page_cache_release(cachepage); + + return code; +} + +static int inline +afs_linux_readpage_fastpath(struct file *fp, struct page *pp, int *codep) +{ + loff_t offset = page_offset(pp); + struct inode *ip = FILE_INODE(fp); + struct vcache *avc = VTOAFS(ip); + struct dcache *tdc; + struct file *cacheFp = NULL; + int code; + int dcLocked = 0; + struct pagevec lrupv; + + /* Not a UFS cache, don't do anything */ + if (cacheDiskType != AFS_FCACHE_TYPE_UFS) + return 0; + + /* Can't do anything if the vcache isn't statd , or if the read + * crosses a chunk boundary. + */ + if (!(avc->f.states & CStatd) || + AFS_CHUNK(offset) != AFS_CHUNK(offset + PAGE_SIZE)) { + return 0; + } + + ObtainWriteLock(&avc->lock, 911); + + /* XXX - See if hinting actually makes things faster !!! */ + + /* See if we have a suitable entry already cached */ + tdc = avc->dchint; + + if (tdc) { + /* We need to lock xdcache, then dcache, to handle situations where + * the hint is on the free list. However, we can't safely do this + * according to the locking hierarchy. So, use a non blocking lock. + */ + ObtainReadLock(&afs_xdcache); + dcLocked = ( 0 == NBObtainReadLock(&tdc->lock)); + + if (dcLocked && (tdc->index != NULLIDX) + && !FidCmp(&tdc->f.fid, &avc->f.fid) + && tdc->f.chunk == AFS_CHUNK(offset) + && !(afs_indexFlags[tdc->index] & (IFFree | IFDiscarded))) { + /* Bonus - the hint was correct */ + afs_RefDCache(tdc); + } else { + /* Only destroy the hint if its actually invalid, not if there's + * just been a locking failure */ + if (dcLocked) { + ReleaseReadLock(&tdc->lock); + avc->dchint = NULL; + } + + tdc = NULL; + dcLocked = 0; + } + ReleaseReadLock(&afs_xdcache); + } + + /* No hint, or hint is no longer valid - see if we can get something + * directly from the dcache + */ + if (!tdc) + tdc = afs_FindDCache(avc, offset); + + if (!tdc) { + ReleaseWriteLock(&avc->lock); + return 0; + } + + if (!dcLocked) + ObtainReadLock(&tdc->lock); + + /* Is the dcache we've been given currently up to date */ + if (!hsame(avc->f.m.DataVersion, tdc->f.versionNo) || + (tdc->dflags & DFFetching)) { + ReleaseWriteLock(&avc->lock); + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + return 0; + } + + /* Update our hint for future abuse */ + avc->dchint = tdc; + + /* Okay, so we've now got a cache file that is up to date */ + + /* XXX - I suspect we should be locking the inodes before we use them! */ + AFS_GUNLOCK(); + cacheFp = afs_linux_raw_open(&tdc->f.inode, NULL); + pagevec_init(&lrupv, 0); + + code = afs_linux_read_cache(cacheFp, pp, tdc->f.chunk, &lrupv, NULL); + + if (pagevec_count(&lrupv)) + __pagevec_lru_add_file(&lrupv); + + filp_close(cacheFp, NULL); + AFS_GLOCK(); + + ReleaseReadLock(&tdc->lock); + ReleaseWriteLock(&avc->lock); + afs_PutDCache(tdc); + + *codep = code; + return 1; +} + +/* afs_linux_readpage + * all reads come through here. A strategy-like read call. + */ +static int +afs_linux_readpage(struct file *fp, struct page *pp) +{ + afs_int32 code; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + char *address; + afs_offs_t offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT; +#else + ulong address = afs_linux_page_address(pp); + afs_offs_t offset = pageoff(pp); +#endif +#if defined(AFS_CACHE_BYPASS) + afs_int32 bypasscache = 0; /* bypass for this read */ + struct nocache_read_request *ancr; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + afs_int32 isize; +#endif +#endif + uio_t *auio; + struct iovec *iovecp; + struct inode *ip = FILE_INODE(fp); + afs_int32 cnt = page_count(pp); + struct vcache *avc = VTOAFS(ip); + cred_t *credp; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + AFS_GLOCK(); + if (afs_linux_readpage_fastpath(fp, pp, &code)) { + AFS_GUNLOCK(); + return code; + } + AFS_GUNLOCK(); +#endif + + credp = crref(); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + address = kmap(pp); + ClearPageError(pp); +#else + atomic_add(1, &pp->count); + set_bit(PG_locked, &pp->flags); /* other bits? See mm.h */ + clear_bit(PG_error, &pp->flags); +#endif +#if defined(AFS_CACHE_BYPASS) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + /* If the page is past the end of the file, skip it */ + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + if(pp->index > isize) { + if(PageLocked(pp)) + UnlockPage(pp); + goto done; + } +#endif +#endif + /* if bypasscache, receiver frees, else we do */ + auio = osi_Alloc(sizeof(uio_t)); + iovecp = osi_Alloc(sizeof(struct iovec)); + + setup_uio(auio, iovecp, (char *)address, offset, PAGE_SIZE, UIO_READ, + AFS_UIOSYS); + +#if defined(AFS_CACHE_BYPASS) + bypasscache = afs_linux_can_bypass(ip); + + /* In the new incarnation of selective caching, a file's caching policy + * can change, eg because file size exceeds threshold, etc. */ + trydo_cache_transition(avc, credp, bypasscache); + + if(bypasscache) { + if(address) + kunmap(pp); + /* save the page for background map */ + auio->uio_iov->iov_base = (void*) pp; + /* the background thread will free this */ + ancr = osi_Alloc(sizeof(struct nocache_read_request)); + ancr->auio = auio; + ancr->offset = offset; + ancr->length = PAGE_SIZE; + + maybe_lock_kernel(); + code = afs_ReadNoCache(avc, ancr, credp); + maybe_unlock_kernel(); + + goto done; /* skips release page, doing it in bg thread */ + } +#endif + +#ifdef AFS_LINUX24_ENV + maybe_lock_kernel(); +#endif + AFS_GLOCK(); + AFS_DISCON_LOCK(); + afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, + 99999); /* not a possible code value */ + + code = afs_rdwr(avc, auio, UIO_READ, 0, credp); + + afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, + code); + AFS_DISCON_UNLOCK(); + AFS_GUNLOCK(); +#ifdef AFS_LINUX24_ENV + maybe_unlock_kernel(); +#endif + if (!code) { + /* XXX valid for no-cache also? Check last bits of files... :) + * Cognate code goes in afs_NoCacheFetchProc. */ + if (auio->uio_resid) /* zero remainder of page */ + memset((void *)(address + (PAGE_SIZE - auio->uio_resid)), 0, + auio->uio_resid); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + flush_dcache_page(pp); + SetPageUptodate(pp); +#else + set_bit(PG_uptodate, &pp->flags); +#endif + } /* !code */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + kunmap(pp); + UnlockPage(pp); +#else + clear_bit(PG_locked, &pp->flags); + wake_up(&pp->wait); + free_page(address); +#endif + +#if defined(AFS_CACHE_BYPASS) + /* do not call afs_GetDCache if cache is bypassed */ + if(bypasscache) + goto done; +#endif + + /* free if not bypassing cache */ + osi_Free(auio, sizeof(uio_t)); + osi_Free(iovecp, sizeof(struct iovec)); + + if (!code && AFS_CHUNKOFFSET(offset) == 0) { + struct dcache *tdc; + struct vrequest treq; + + AFS_GLOCK(); + code = afs_InitReq(&treq, credp); + if (!code && !NBObtainWriteLock(&avc->lock, 534)) { + tdc = afs_FindDCache(avc, offset); + if (tdc) { + if (!(tdc->mflags & DFNextStarted)) + afs_PrefetchChunk(avc, tdc, credp, &treq); + afs_PutDCache(tdc); + } + ReleaseWriteLock(&avc->lock); + } + AFS_GUNLOCK(); + } + +#if defined(AFS_CACHE_BYPASS) +done: +#endif + crfree(credp); + return afs_convert_code(code); +} + +/* Readpages reads a number of pages for a particular file. We use + * this to optimise the reading, by limiting the number of times upon which + * we have to lookup, lock and open vcaches and dcaches + */ + +static int +afs_linux_readpages(struct file *fp, struct address_space *mapping, + struct list_head *page_list, unsigned int num_pages) +{ + struct inode *inode = mapping->host; + struct vcache *avc = VTOAFS(inode); + struct dcache *tdc; + struct file *cacheFp = NULL; + int code; + unsigned int page_idx; + loff_t offset; + struct pagevec lrupv; + struct afs_pagecopy_task *task; + +#if defined(AFS_CACHE_BYPASS) + bypasscache = afs_linux_can_bypass(ip); + + /* In the new incarnation of selective caching, a file's caching policy + * can change, eg because file size exceeds threshold, etc. */ + trydo_cache_transition(avc, credp, bypasscache); + + if (bypasscache) + return afs_linux_cache_bypass_read(ip, mapping, page_list, num_pages); +#endif + + AFS_GLOCK(); + if ((code = afs_linux_VerifyVCache(avc, NULL))) { + AFS_GUNLOCK(); + return code; + } + + ObtainWriteLock(&avc->lock, 912); + AFS_GUNLOCK(); + + task = afs_pagecopy_init_task(); + + tdc = NULL; + pagevec_init(&lrupv, 0); + for (page_idx = 0; page_idx < num_pages; page_idx++) { + struct page *page = list_entry(page_list->prev, struct page, lru); + list_del(&page->lru); + offset = page_offset(page); + + if (tdc && tdc->f.chunk != AFS_CHUNK(offset)) { + AFS_GLOCK(); + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + AFS_GUNLOCK(); + tdc = NULL; + if (cacheFp) + filp_close(cacheFp, NULL); + } + + if (!tdc) { + AFS_GLOCK(); + if ((tdc = afs_FindDCache(avc, offset))) { + ObtainReadLock(&tdc->lock); + if (!hsame(avc->f.m.DataVersion, tdc->f.versionNo) || + (tdc->dflags & DFFetching)) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + tdc = NULL; + } + } + AFS_GUNLOCK(); + if (tdc) + cacheFp = afs_linux_raw_open(&tdc->f.inode, NULL); + } + + if (tdc && !add_to_page_cache(page, mapping, page->index, + GFP_KERNEL)) { + page_cache_get(page); + if (!pagevec_add(&lrupv, page)) + __pagevec_lru_add_file(&lrupv); + + afs_linux_read_cache(cacheFp, page, tdc->f.chunk, &lrupv, task); + } + page_cache_release(page); + } + if (pagevec_count(&lrupv)) + __pagevec_lru_add_file(&lrupv); + + if (tdc) + filp_close(cacheFp, NULL); + + afs_pagecopy_put_task(task); + + AFS_GLOCK(); + if (tdc) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + } + + ReleaseWriteLock(&avc->lock); + AFS_GUNLOCK(); + return 0; +} + +#if defined(AFS_LINUX24_ENV) +static int +afs_linux_writepage_sync(struct inode *ip, struct page *pp, + unsigned long offset, unsigned int count) +{ + struct vcache *vcp = VTOAFS(ip); + char *buffer; + afs_offs_t base; + int code = 0; + cred_t *credp; + uio_t tuio; + struct iovec iovec; + int f_flags = 0; + + buffer = kmap(pp) + offset; + base = (((loff_t) pp->index) << PAGE_CACHE_SHIFT) + offset; + + credp = crref(); + maybe_lock_kernel(); + AFS_GLOCK(); + afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp), + ICL_TYPE_INT32, 99999); + + ObtainWriteLock(&vcp->lock, 532); + if (vcp->f.states & CPageWrite) { + ReleaseWriteLock(&vcp->lock); + AFS_GUNLOCK(); + maybe_unlock_kernel(); + crfree(credp); + kunmap(pp); + /* should mark it dirty? */ + return(0); + } + vcp->f.states |= CPageWrite; + ReleaseWriteLock(&vcp->lock); + + setup_uio(&tuio, &iovec, buffer, base, count, UIO_WRITE, AFS_UIOSYS); + + code = afs_write(vcp, &tuio, f_flags, credp, 0); + + i_size_write(ip, vcp->f.m.Length); + ip->i_blocks = ((vcp->f.m.Length + 1023) >> 10) << 1; + + ObtainWriteLock(&vcp->lock, 533); + if (!code) { + struct vrequest treq; + + if (!afs_InitReq(&treq, credp)) + code = afs_DoPartialWrite(vcp, &treq); + } + code = code ? afs_convert_code(code) : count - tuio.uio_resid; + + vcp->f.states &= ~CPageWrite; + ReleaseWriteLock(&vcp->lock); + + afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp), + ICL_TYPE_INT32, code); + + AFS_GUNLOCK(); + maybe_unlock_kernel(); + crfree(credp); + kunmap(pp); + + return code; +} + + +static int +#ifdef AOP_WRITEPAGE_TAKES_WRITEBACK_CONTROL +afs_linux_writepage(struct page *pp, struct writeback_control *wbc) +#else +afs_linux_writepage(struct page *pp) +#endif +{ + struct address_space *mapping = pp->mapping; + struct inode *inode; + unsigned long end_index; + unsigned offset = PAGE_CACHE_SIZE; + long status; + + if (PageLaunder(pp)) { + return(fail_writepage(pp)); + } + + inode = (struct inode *)mapping->host; + end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; + + /* easy case */ + if (pp->index < end_index) + goto do_it; + /* things got complicated... */ + offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1); + /* OK, are we completely out? */ + if (pp->index >= end_index + 1 || !offset) + return -EIO; + do_it: + status = afs_linux_writepage_sync(inode, pp, 0, offset); + SetPageUptodate(pp); + UnlockPage(pp); + if (status == offset) + return 0; + else + return status; +} +#else +/* afs_linux_updatepage + * What one would have thought was writepage - write dirty page to file. + * Called from generic_file_write. buffer is still in user space. pagep + * has been filled in with old data if we're updating less than a page. + */ +static int +afs_linux_updatepage(struct file *fp, struct page *pp, unsigned long offset, + unsigned int count, int sync) +{ + struct vcache *vcp = VTOAFS(FILE_INODE(fp)); + u8 *page_addr = (u8 *) afs_linux_page_address(pp); + int code = 0; + cred_t *credp; + uio_t tuio; + struct iovec iovec; + + set_bit(PG_locked, &pp->flags); + + credp = crref(); + AFS_GLOCK(); + AFS_DISCON_LOCK(); + afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp), + ICL_TYPE_INT32, 99999); + setup_uio(&tuio, &iovec, page_addr + offset, + (afs_offs_t) (pageoff(pp) + offset), count, UIO_WRITE, + AFS_UIOSYS); + + code = afs_write(vcp, &tuio, fp->f_flags, credp, 0); + + i_size_write(ip, vcp->f.m.Length); + ip->i_blocks = ((vcp->f.m.Length + 1023) >> 10) << 1; + + if (!code) { + struct vrequest treq; + + ObtainWriteLock(&vcp->lock, 533); + vcp->f.m.Date = osi_Time(); /* set modification time */ + if (!afs_InitReq(&treq, credp)) + code = afs_DoPartialWrite(vcp, &treq); + ReleaseWriteLock(&vcp->lock); + } + + code = code ? afs_convert_code(code) : count - tuio.uio_resid; + afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp), + ICL_TYPE_INT32, code); + + AFS_DISCON_UNLOCK(); + AFS_GUNLOCK(); + crfree(credp); + + clear_bit(PG_locked, &pp->flags); + return code; +} +#endif + +/* afs_linux_permission + * Check access rights - returns error if can't check or permission denied. + */ +static int +#ifdef IOP_PERMISSION_TAKES_NAMEIDATA +afs_linux_permission(struct inode *ip, int mode, struct nameidata *nd) +#else +afs_linux_permission(struct inode *ip, int mode) +#endif +{ + int code; + cred_t *credp = crref(); + int tmp = 0; + + AFS_GLOCK(); + if (mode & MAY_EXEC) + tmp |= VEXEC; + if (mode & MAY_READ) + tmp |= VREAD; + if (mode & MAY_WRITE) + tmp |= VWRITE; + code = afs_access(VTOAFS(ip), tmp, credp); + + AFS_GUNLOCK(); + crfree(credp); + return afs_convert_code(code); +} + +#if defined(AFS_LINUX24_ENV) && !defined(HAVE_WRITE_BEGIN) +static int +afs_linux_commit_write(struct file *file, struct page *page, unsigned offset, + unsigned to) +{ + int code; + + code = afs_linux_writepage_sync(file->f_dentry->d_inode, page, + offset, to - offset); + kunmap(page); + + return code; +} + +static int +afs_linux_prepare_write(struct file *file, struct page *page, unsigned from, + unsigned to) +{ +/* sometime between 2.4.0 and 2.4.19, the callers of prepare_write began to + call kmap directly instead of relying on us to do it */ + kmap(page); + return 0; +} +#endif + +#if defined(HAVE_WRITE_BEGIN) +static int +afs_linux_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int code; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + + code = afs_linux_writepage_sync(file->f_dentry->d_inode, page, + from, copied); + unlock_page(page); + page_cache_release(page); + return code; +} + +static int +afs_linux_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; +#if defined(HAVE_GRAB_CACHE_PAGE_WRITE_BEGIN) + page = grab_cache_page_write_begin(mapping, index, flags); +#else + page = __grab_cache_page(mapping, index); +#endif + *pagep = page; + + return 0; +} +#endif + + +static struct inode_operations afs_file_iops = { +#if defined(AFS_LINUX24_ENV) + .permission = afs_linux_permission, + .revalidate = afs_linux_revalidate, + .setattr = afs_notify_change, +#else + .default_file_ops = &afs_file_fops, + .readpage = afs_linux_readpage, + .revalidate = afs_linux_revalidate, + .updatepage = afs_linux_updatepage, +#endif +}; + +#if defined(AFS_LINUX24_ENV) +static struct address_space_operations afs_file_aops = { + .readpage = afs_linux_readpage, + .readpages = afs_linux_readpages, + .writepage = afs_linux_writepage, +#if defined (HAVE_WRITE_BEGIN) + .write_begin = afs_linux_write_begin, + .write_end = afs_linux_write_end, +#else + .commit_write = afs_linux_commit_write, + .prepare_write = afs_linux_prepare_write, +#endif +}; +#endif + + +/* Separate ops vector for directories. Linux 2.2 tests type of inode + * by what sort of operation is allowed..... + */ + +static struct inode_operations afs_dir_iops = { +#if !defined(AFS_LINUX24_ENV) + .default_file_ops = &afs_dir_fops, +#else + .setattr = afs_notify_change, +#endif + .create = afs_linux_create, + .lookup = afs_linux_lookup, + .link = afs_linux_link, + .unlink = afs_linux_unlink, + .symlink = afs_linux_symlink, + .mkdir = afs_linux_mkdir, + .rmdir = afs_linux_rmdir, + .rename = afs_linux_rename, + .revalidate = afs_linux_revalidate, + .permission = afs_linux_permission, +}; + +/* We really need a separate symlink set of ops, since do_follow_link() + * determines if it _is_ a link by checking if the follow_link op is set. + */ +#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) +static int +afs_symlink_filler(struct file *file, struct page *page) +{ + struct inode *ip = (struct inode *)page->mapping->host; + char *p = (char *)kmap(page); + int code; + + maybe_lock_kernel(); + AFS_GLOCK(); + code = afs_linux_ireadlink(ip, p, PAGE_SIZE, AFS_UIOSYS); + AFS_GUNLOCK(); + + if (code < 0) + goto fail; + p[code] = '\0'; /* null terminate? */ + maybe_unlock_kernel(); + + SetPageUptodate(page); + kunmap(page); + UnlockPage(page); + return 0; + + fail: + maybe_unlock_kernel(); + + SetPageError(page); + kunmap(page); + UnlockPage(page); + return code; +} + +static struct address_space_operations afs_symlink_aops = { + .readpage = afs_symlink_filler +}; +#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */ + +static struct inode_operations afs_symlink_iops = { +#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) + .readlink = page_readlink, +#if defined(HAVE_KERNEL_PAGE_FOLLOW_LINK) + .follow_link = page_follow_link, +#else + .follow_link = page_follow_link_light, + .put_link = page_put_link, +#endif +#else /* !defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) */ + .readlink = afs_linux_readlink, + .follow_link = afs_linux_follow_link, +#if !defined(AFS_LINUX24_ENV) + .permission = afs_linux_permission, + .revalidate = afs_linux_revalidate, +#endif +#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */ +#if defined(AFS_LINUX24_ENV) + .setattr = afs_notify_change, +#endif +}; + +void +afs_fill_inode(struct inode *ip, struct vattr *vattr) +{ + + if (vattr) + vattr2inode(ip, vattr); + +/* Reset ops if symlink or directory. */ + if (S_ISREG(ip->i_mode)) { + ip->i_op = &afs_file_iops; +#if defined(AFS_LINUX24_ENV) + ip->i_fop = &afs_file_fops; + ip->i_data.a_ops = &afs_file_aops; +#endif + + } else if (S_ISDIR(ip->i_mode)) { + ip->i_op = &afs_dir_iops; +#if defined(AFS_LINUX24_ENV) + ip->i_fop = &afs_dir_fops; +#endif + + } else if (S_ISLNK(ip->i_mode)) { + ip->i_op = &afs_symlink_iops; +#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) + ip->i_data.a_ops = &afs_symlink_aops; + ip->i_mapping = &ip->i_data; +#endif + } + +} diff --git a/src/libafs/MakefileProto.LINUX.in b/src/libafs/MakefileProto.LINUX.in index c2fb1f6..bb5ebda 100644 --- a/src/libafs/MakefileProto.LINUX.in +++ b/src/libafs/MakefileProto.LINUX.in @@ -8,6 +8,12 @@ srcdir=@srcdir@ include @TOP_OBJDIR@/src/config/Makefile.config + + +# Override for Linux 2.4 and older +MKAFS_OSTYPE=LINUX24 + + INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -38,7 +44,9 @@ AFS_OS_OBJS = \ osi_proc.o \ osi_vnodeops.o \ + osi_pagecopy.o + AFS_OS_PAGOBJS = \ osi_alloc.o \ diff --git a/src/rx/LINUX24/rx_kmutex.c b/src/rx/LINUX24/rx_kmutex.c new file mode 100644 index 0000000..3a624ba --- /dev/null +++ b/src/rx/LINUX24/rx_kmutex.c @@ -0,0 +1,164 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * rx_kmutex.c - mutex and condition variable macros for kernel environment. + * + * Linux implementation. + */ + +#include +#include "afs/param.h" + + +#include "rx/rx_kcommon.h" +#include "rx_kmutex.h" +#include "rx/rx_kernel.h" + +void +afs_mutex_init(afs_kmutex_t * l) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + mutex_init(&l->mutex); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + init_MUTEX(&l->sem); +#else + l->sem = MUTEX; +#endif + l->owner = 0; +} + +void +afs_mutex_enter(afs_kmutex_t * l) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + mutex_lock(&l->mutex); +#else + down(&l->sem); +#endif + if (l->owner) + osi_Panic("mutex_enter: 0x%lx held by %d", (unsigned long)l, l->owner); + l->owner = current->pid; +} + +int +afs_mutex_tryenter(afs_kmutex_t * l) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + if (mutex_trylock(&l->mutex) == 0) +#else + if (down_trylock(&l->sem)) +#endif + return 0; + l->owner = current->pid; + return 1; +} + +void +afs_mutex_exit(afs_kmutex_t * l) +{ + if (l->owner != current->pid) + osi_Panic("mutex_exit: 0x%lx held by %d", (unsigned long)l, l->owner); + l->owner = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + mutex_unlock(&l->mutex); +#else + up(&l->sem); +#endif +} + +/* CV_WAIT and CV_TIMEDWAIT sleep until the specified event occurs, or, in the + * case of CV_TIMEDWAIT, until the specified timeout occurs. + * - NOTE: that on Linux, there are circumstances in which TASK_INTERRUPTIBLE + * can wake up, even if all signals are blocked + * - TODO: handle signals correctly by passing an indication back to the + * caller that the wait has been interrupted and the stack should be cleaned + * up preparatory to signal delivery + */ +int +afs_cv_wait(afs_kcondvar_t * cv, afs_kmutex_t * l, int sigok) +{ + int seq, isAFSGlocked = ISAFS_GLOCK(); + sigset_t saved_set; +#ifdef DECLARE_WAITQUEUE + DECLARE_WAITQUEUE(wait, current); +#else + struct wait_queue wait = { current, NULL }; +#endif + sigemptyset(&saved_set); + seq = cv->seq; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&cv->waitq, &wait); + + if (isAFSGlocked) + AFS_GUNLOCK(); + MUTEX_EXIT(l); + + if (!sigok) { + SIG_LOCK(current); + saved_set = current->blocked; + sigfillset(¤t->blocked); + RECALC_SIGPENDING(current); + SIG_UNLOCK(current); + } + + while(seq == cv->seq) { + schedule(); + } + + remove_wait_queue(&cv->waitq, &wait); + set_current_state(TASK_RUNNING); + + if (!sigok) { + SIG_LOCK(current); + current->blocked = saved_set; + RECALC_SIGPENDING(current); + SIG_UNLOCK(current); + } + + if (isAFSGlocked) + AFS_GLOCK(); + MUTEX_ENTER(l); + + return (sigok && signal_pending(current)) ? EINTR : 0; +} + +void +afs_cv_timedwait(afs_kcondvar_t * cv, afs_kmutex_t * l, int waittime) +{ + int seq, isAFSGlocked = ISAFS_GLOCK(); + long t = waittime * HZ / 1000; +#ifdef DECLARE_WAITQUEUE + DECLARE_WAITQUEUE(wait, current); +#else + struct wait_queue wait = { current, NULL }; +#endif + seq = cv->seq; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&cv->waitq, &wait); + + if (isAFSGlocked) + AFS_GUNLOCK(); + MUTEX_EXIT(l); + + while(seq == cv->seq) { + t = schedule_timeout(t); + if (!t) /* timeout */ + break; + } + + remove_wait_queue(&cv->waitq, &wait); + set_current_state(TASK_RUNNING); + + if (isAFSGlocked) + AFS_GLOCK(); + MUTEX_ENTER(l); +} diff --git a/src/rx/LINUX24/rx_kmutex.h b/src/rx/LINUX24/rx_kmutex.h new file mode 100644 index 0000000..6ea4faf --- /dev/null +++ b/src/rx/LINUX24/rx_kmutex.h @@ -0,0 +1,94 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * rx_kmutex.h - mutex and condition variable macros for kernel environment. + * + * Linux implementation. + * This are noops until such time as the kernel no longer has a global lock. + */ +#ifndef RX_KMUTEX_H_ +#define RX_KMUTEX_H_ + +#include "rx/rx_kernel.h" /* for osi_Panic() */ + +/* AFS_GLOBAL_RXLOCK_KERNEL is defined so that the busy tq code paths are + * used. The thread can sleep when sending packets. + */ +#define AFS_GLOBAL_RXLOCK_KERNEL 1 + + +#define RX_ENABLE_LOCKS 1 + +#ifndef _LINUX_CODA_FS_I +#define _LINUX_CODA_FS_I +struct coda_inode_info { +}; +#endif +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +#include +#else +#include +#endif + +typedef struct afs_kmutex { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + struct mutex mutex; +#else + struct semaphore sem; +#endif + int owner; +} afs_kmutex_t; + +#ifndef set_current_state +#define set_current_state(X) current->state=X +#endif + +typedef struct afs_kcondvar { + int seq; +#if defined(AFS_LINUX24_ENV) + wait_queue_head_t waitq; +#else + struct wait_queue *waitq; +#endif +} afs_kcondvar_t; + +static inline int +MUTEX_ISMINE(afs_kmutex_t * l) +{ + return l->owner == current->pid; +} + +#define MUTEX_INIT(a,b,c,d) afs_mutex_init(a) +#define MUTEX_DESTROY(a) +#define MUTEX_ENTER afs_mutex_enter +#define MUTEX_TRYENTER afs_mutex_tryenter +#define MUTEX_EXIT afs_mutex_exit + +#if defined(AFS_LINUX24_ENV) +#define CV_INIT(cv,b,c,d) do { (cv)->seq = 0; init_waitqueue_head(&(cv)->waitq); } while (0) +#else +#define CV_INIT(cv,b,c,d) do { (cv)->seq = 0; init_waitqueue(&(cv)->waitq); } while (0) +#endif +#define CV_DESTROY(cv) +#define CV_WAIT_SIG(cv, m) afs_cv_wait(cv, m, 1) +#define CV_WAIT(cv, m) afs_cv_wait(cv, m, 0) +#define CV_TIMEDWAIT afs_cv_timedwait + +#define CV_SIGNAL(cv) do { ++(cv)->seq; wake_up(&(cv)->waitq); } while (0) +#if defined(AFS_LINUX24_ENV) +#define CV_BROADCAST(cv) do { ++(cv)->seq; wake_up_all(&(cv)->waitq); } while (0) +#else +#define CV_BROADCAST(cv) do { ++(cv)->seq; wake_up(&(cv)->waitq); } while (0) +#endif + +#endif /* RX_KMUTEX_H_ */ diff --git a/src/rx/LINUX24/rx_knet.c b/src/rx/LINUX24/rx_knet.c new file mode 100644 index 0000000..6bb0985 --- /dev/null +++ b/src/rx/LINUX24/rx_knet.c @@ -0,0 +1,308 @@ +/* + * Copyright 2000, International Business Machines Corporation and others. + * All Rights Reserved. + * + * This software has been released under the terms of the IBM Public + * License. For details, see the LICENSE file in the top-level source + * directory or online at http://www.openafs.org/dl/license10.html + */ + +/* + * rx_knet.c - RX kernel send, receive and timer routines. + * + * Linux implementation. + */ +#include +#include "afs/param.h" + + +#include +#ifdef AFS_LINUX22_ENV +#include "rx/rx_kcommon.h" +#if defined(AFS_LINUX24_ENV) +#include "h/smp_lock.h" +#endif +#include +#ifdef ADAPT_PMTU +#include +#include +#endif + +/* rxk_NewSocket + * open and bind RX socket + */ +osi_socket * +rxk_NewSocketHost(afs_uint32 ahost, short aport) +{ + struct socket *sockp; + struct sockaddr_in myaddr; + int code; + KERNEL_SPACE_DECL; +#ifdef ADAPT_PMTU + int pmtu = IP_PMTUDISC_WANT; + int do_recverr = 1; +#else + int pmtu = IP_PMTUDISC_DONT; +#endif + + /* We need a better test for this. if you need it back, tell us + * how to detect it. + */ +#ifdef LINUX_KERNEL_SOCK_CREATE_V + code = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sockp, 0); +#else + code = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sockp); +#endif + if (code < 0) + return NULL; + + /* Bind socket */ + myaddr.sin_family = AF_INET; + myaddr.sin_addr.s_addr = ahost; + myaddr.sin_port = aport; + code = + sockp->ops->bind(sockp, (struct sockaddr *)&myaddr, sizeof(myaddr)); + + if (code < 0) { +#if defined(AFS_LINUX24_ENV) + printk("sock_release(rx_socket) FIXME\n"); +#else + sock_release(sockp); +#endif + return NULL; + } + + TO_USER_SPACE(); + sockp->ops->setsockopt(sockp, SOL_IP, IP_MTU_DISCOVER, (char *)&pmtu, + sizeof(pmtu)); +#ifdef ADAPT_PMTU + sockp->ops->setsockopt(sockp, SOL_IP, IP_RECVERR, (char *)&do_recverr, + sizeof(do_recverr)); +#endif + TO_KERNEL_SPACE(); + return (osi_socket *)sockp; +} + +osi_socket * +rxk_NewSocket(short aport) +{ + return rxk_NewSocketHost(htonl(INADDR_ANY), aport); +} + +/* free socket allocated by osi_NetSocket */ +int +rxk_FreeSocket(register struct socket *asocket) +{ + AFS_STATCNT(osi_FreeSocket); + return 0; +} + +#ifdef ADAPT_PMTU +void +handle_socket_error(osi_socket so) +{ + KERNEL_SPACE_DECL; + struct msghdr msg; + struct cmsghdr *cmsg; + struct sock_extended_err *err; + struct sockaddr_in addr; + struct sockaddr *offender; + char *controlmsgbuf; + int code; + struct socket *sop = (struct socket *)so; + + if (!(controlmsgbuf=rxi_Alloc(256))) + return; + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_control = controlmsgbuf; + msg.msg_controllen = 256; + msg.msg_flags = 0; + + TO_USER_SPACE(); + code = sock_recvmsg(sop, &msg, 256, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC); + TO_KERNEL_SPACE(); + + if (code < 0 || !(msg.msg_flags & MSG_ERRQUEUE)) + goto out; + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (CMSG_OK(&msg, cmsg) && cmsg->cmsg_level == SOL_IP && + cmsg->cmsg_type == IP_RECVERR) + break; + } + if (!cmsg) + goto out; + err = CMSG_DATA(cmsg); + offender = SO_EE_OFFENDER(err); + + if (offender->sa_family != AF_INET) + goto out; + + memcpy(&addr, offender, sizeof(addr)); + + if (err->ee_origin == SO_EE_ORIGIN_ICMP && + err->ee_type == ICMP_DEST_UNREACH && + err->ee_code == ICMP_FRAG_NEEDED) { + rxi_SetPeerMtu(ntohl(addr.sin_addr.s_addr), ntohs(addr.sin_port), + err->ee_info); + } + /* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */ + +out: + rxi_Free(controlmsgbuf, 256); + return; +} +#endif + +/* osi_NetSend + * + * Return codes: + * 0 = success + * non-zero = failure + */ +int +osi_NetSend(osi_socket sop, struct sockaddr_in *to, struct iovec *iovec, + int iovcnt, afs_int32 size, int istack) +{ + KERNEL_SPACE_DECL; + struct msghdr msg; + int code; +#ifdef ADAPT_PMTU + int sockerr; + size_t esize; + + while (1) { + sockerr=0; + esize = sizeof(sockerr); + TO_USER_SPACE(); + sop->ops->getsockopt(sop, SOL_SOCKET, SO_ERROR, (char *)&sockerr, + &esize); + TO_KERNEL_SPACE(); + if (sockerr == 0) + break; + handle_socket_error(sop); + } +#endif + + msg.msg_iovlen = iovcnt; + msg.msg_iov = iovec; + msg.msg_name = to; + msg.msg_namelen = sizeof(*to); + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + TO_USER_SPACE(); + code = sock_sendmsg(sop, &msg, size); + TO_KERNEL_SPACE(); + return (code < 0) ? code : 0; +} + + +/* osi_NetReceive + * OS dependent part of kernel RX listener thread. + * + * Arguments: + * so socket to receive on, typically rx_socket + * from pointer to a sockaddr_in. + * iov array of iovecs to fill in. + * iovcnt how many iovecs there are. + * lengthp IN/OUT in: total space available in iovecs. out: size of read. + * + * Return + * 0 if successful + * error code (such as EINTER) if not + * + * Environment + * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is + * so we have a little space to look for packets larger than + * rx_maxReceiveSize. + */ +int rxk_lastSocketError; +int rxk_nSocketErrors; +int +osi_NetReceive(osi_socket so, struct sockaddr_in *from, struct iovec *iov, + int iovcnt, int *lengthp) +{ + KERNEL_SPACE_DECL; + struct msghdr msg; + int code; +#ifdef ADAPT_PMTU + int sockerr; + size_t esize; +#endif + struct iovec tmpvec[RX_MAXWVECS + 2]; + struct socket *sop = (struct socket *)so; + + if (iovcnt > RX_MAXWVECS + 2) { + osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt); + } +#ifdef ADAPT_PMTU + while (1) { + sockerr=0; + esize = sizeof(sockerr); + TO_USER_SPACE(); + sop->ops->getsockopt(sop, SOL_SOCKET, SO_ERROR, (char *)&sockerr, + &esize); + TO_KERNEL_SPACE(); + if (sockerr == 0) + break; + handle_socket_error(so); + } +#endif + memcpy(tmpvec, iov, iovcnt * sizeof(struct iovec)); + msg.msg_name = from; + msg.msg_iov = tmpvec; + msg.msg_iovlen = iovcnt; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + TO_USER_SPACE(); + code = sock_recvmsg(sop, &msg, *lengthp, 0); + TO_KERNEL_SPACE(); + + if (code < 0) { + /* Clear the error before using the socket again. + * Oh joy, Linux has hidden header files as well. It appears we can + * simply call again and have it clear itself via sock_error(). + */ +#ifdef AFS_LINUX22_ENV + flush_signals(current); /* We don't want no stinkin' signals. */ +#else + current->signal = 0; /* We don't want no stinkin' signals. */ +#endif + rxk_lastSocketError = code; + rxk_nSocketErrors++; + } else { + *lengthp = code; + code = 0; + } + + return code; +} +#ifdef EXPORTED_TASKLIST_LOCK +extern rwlock_t tasklist_lock __attribute__((weak)); +#endif +void +osi_StopListener(void) +{ + extern struct task_struct *rxk_ListenerTask; + + while (rxk_ListenerTask) { + if (rxk_ListenerTask) { + flush_signals(rxk_ListenerTask); + force_sig(SIGKILL, rxk_ListenerTask); + } + if (!rxk_ListenerTask) + break; + afs_osi_Sleep(&rxk_ListenerTask); + } + sock_release(rx_socket); + rx_socket = NULL; +} + +#endif /* AFS_LINUX22_ENV */ -- 1.9.4