2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* This is a placeholder for routines unique to the port of AFS to hp-ux*/
12 #include <afsconfig.h>
13 #include "afs/param.h"
16 #include "afs/sysincludes.h" /* Standard vendor system headers */
17 #include "afsincludes.h" /* Afs-based standard headers */
18 #include "afs/afs_stats.h" /* statistics stuff */
22 #include <sys/mount.h>
23 #include <sys/vnode.h>
24 #include <sys/pathname.h>
26 extern struct vfsops Afs_vfsops;
27 extern int afs_hp_strategy();
28 extern int afs_bmap(), afs_badop(), afs_noop(), afs_lockf();
29 extern int afs_pagein();
30 extern int afs_pageout();
31 extern int afs_ioctl();
32 extern int afs_prealloc();
33 extern int afs_mapdbd();
34 extern int afs_mmap();
35 extern int afs_cachelimit();
36 extern int afs_vm_checkpage();
37 extern int afs_vm_fscontiguous();
38 extern int afs_vm_stopio();
39 extern int afs_read_ahead();
40 extern int afs_unmap();
41 extern int afs_release();
42 extern int afs_swapfs_len();
43 extern int afs_readdir2();
44 extern int afs_readdir();
45 extern int afs_readdir3();
46 extern int afs_pathconf();
47 extern int afs_close();
49 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
51 #if defined(AFS_HPUX110_ENV)
52 /* We no longer need to lock on the VM Empire,
53 * or at least that is what is claimed.
54 * so we will noopt the vmemp_ routines
55 * This needs to be looked at closer.
59 #define vmemp_returnx(a) return(a)
60 #define vmemp_unlockx()
63 #if !defined(AFS_HPUX110_ENV)
65 * Copy an mbuf to the contiguous area pointed to by cp.
66 * Skip <off> bytes and copy <len> bytes.
67 * Returns the number of bytes not transferred.
68 * The mbuf is NOT changed.
71 m_cpytoc(m, off, len, cp)
72 register struct mbuf *m;
73 register int off, len;
78 if (m == NULL || off < 0 || len < 0 || cp == NULL)
79 osi_Panic("m_cpytoc");
81 if (m->m_len <= off) {
90 ml = MIN(len, m->m_len - off);
91 memcpy(cp, mtod(m, caddr_t) + off, (u_int) ml);
98 memcpy(cp, mtod(m, caddr_t), (u_int) ml);
109 * Note that the standard Sun vnode interface doesn't haven't an vop_lockf(), so this code is
110 * totally new. This came about because HP-UX has lockf() implemented as
111 * a system call while Sun has it implemented as a library (apparently).
112 * To handle this, we have to translate the lockf() request into an
113 * fcntl() looking request, and then translate the results back if necessary.
114 * we call afs_lockctl() directly .
116 afs_lockf(vp, flag, len, cred, fp, LB, UB)
123 /*for now, just pretend it works */
124 struct k_flock flock;
128 * Create a flock structure and translate the lockf request
129 * into an appropriate looking fcntl() type request for afs_lockctl()
133 flock.l_start = fp->f_offset;
134 /* convert negative lengths to positive */
135 if (flock.l_len < 0) {
136 flock.l_start += flock.l_len;
137 flock.l_len = -(flock.l_len);
140 * Adjust values to look like fcntl() requests.
141 * All locks are write locks, only F_LOCK requests
142 * are blocking. F_TEST has to be translated into
143 * a get lock and then back again.
145 flock.l_type = F_WRLCK;
149 flock.l_type = F_UNLCK;
158 u.u_error = mp_afs_lockctl(vp, &flock, cmd, fp->f_cred);
160 return (u.u_error); /* some other error code */
163 * if request is F_TEST, and GETLK changed
164 * the lock type to ULOCK, then return 0, else
165 * set errno to EACCESS and return.
167 if (flag == F_TEST && flock.l_type != F_UNLCK) {
175 #if defined(AFS_HPUX1122_ENV)
176 #include "machine/vm/vmparam.h"
178 #include "../machine/vmparam.h" /* For KERNELSPACE */
182 #if !defined(AFS_HPUX1123_ENV)
183 /* 11.23 is using 64 bit in many cases */
184 #define kern_daddr_t daddr_t
189 #include "ufs/inode.h"
192 #if defined(AFS_HPUX1123_ENV)
194 #endif /* AFS_HPUX1123_ENV */
196 #include "h/region.h"
197 #include "h/pregion.h"
198 #include "h/vmmeter.h"
200 #include "h/sysinfo.h"
202 #if !defined(AFS_HPUX1123_ENV)
203 #include "h/tuneable.h"
206 #include "netinet/in.h"
208 /* a freelist of one */
209 struct buf *afs_bread_freebp = 0;
212 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
213 * Thus we can use fake bufs (ie not from the real buffer pool).
215 afs_bread(vp, lbn, bpp)
220 int offset, fsbsize, error;
225 AFS_STATCNT(afs_bread);
226 fsbsize = vp->v_vfsp->vfs_bsize;
227 offset = lbn * fsbsize;
228 if (afs_bread_freebp) {
229 bp = afs_bread_freebp;
230 afs_bread_freebp = 0;
232 bp = (struct buf *)AFS_KALLOC(sizeof(*bp));
233 bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
236 iov.iov_base = bp->b_un.b_addr;
237 iov.iov_len = fsbsize;
238 uio.afsio_iov = &iov;
239 uio.afsio_iovcnt = 1;
240 uio.afsio_seg = AFS_UIOSYS;
241 uio.afsio_offset = offset;
242 uio.afsio_resid = fsbsize;
246 error = afs_read(VTOAFS(vp), &uio, p_cred(u.u_procp), lbn, bpp, 0);
248 afs_bread_freebp = bp;
252 afs_bread_freebp = bp;
254 *(struct buf **)&bp->b_vp = bp; /* mark as fake */
264 AFS_STATCNT(afs_brelse);
266 if ((struct buf *)bp->b_vp != bp) { /* not fake */
267 ufs_brelse(bp->b_vp, bp);
268 } else if (afs_bread_freebp) {
269 AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
270 AFS_KFREE(bp, sizeof(*bp));
272 afs_bread_freebp = bp;
277 afs_bmap(avc, abn, anvp, anbn)
278 register struct vcache *avc;
279 kern_daddr_t abn, *anbn;
280 struct vcache **anvp;
282 AFS_STATCNT(afs_bmap);
286 *anbn = abn * (8192 / DEV_BSIZE); /* in 512 byte units */
290 afs_inactive(avc, acred)
291 register struct vcache *avc;
294 struct vnode *vp = AFSTOV(avc);
297 if (afs_shuttingdown)
301 * In Solaris and HPUX s800 and HP-UX10.0 they actually call us with
302 * v_count 1 on last reference!
304 MP_H_SPINLOCK_USAV(vn_h_sl_pool, vp, &sv_lock, &context);
305 if (avc->vrefCount < 1)
306 osi_Panic("afs_inactive : v_count < 1\n");
309 * If more than 1 don't unmap the vnode but do decrement the ref count
312 if (vp->v_count > 0) {
313 MP_SPINUNLOCK_USAV(sv_lock, context);
316 MP_SPINUNLOCK_USAV(sv_lock, context);
317 afs_InactiveVCache(avc, acred);
323 mp_afs_open(register struct vnode **avcp, int aflags, afs_ucred_t *acred)
328 code = afs_open(avcp, aflags, acred);
334 mp_afs_close(register struct vnode *avcp, int aflags, afs_ucred_t *acred)
339 code = afs_close(avcp, aflags, acred);
345 mp_afs_rdwr(register struct vnode *avcp, struct uio *uio, enum uio_rw arw,
346 int aio, afs_ucred_t *acred)
352 save_resid = uio->uio_resid;
353 code = afs_rdwr(avcp, uio, arw, aio, acred);
354 if (arw == UIO_WRITE && code == ENOSPC) {
355 /* HP clears code if any data written. */
356 uio->uio_resid = save_resid;
363 mp_afs_getattr(register struct vnode *avcp, struct vattr *attrs,
364 afs_ucred_t *acred, enum vsync unused1)
369 code = afs_getattr(avcp, attrs, acred);
375 mp_afs_setattr(register struct vnode *avcp, register struct vattr *attrs,
376 afs_ucred_t *acred, int unused1)
381 code = afs_setattr(avcp, attrs, acred);
387 mp_afs_access(register struct vnode *avcp, int mode, afs_ucred_t *acred)
392 code = afs_access(avcp, mode, acred);
398 mp_afs_lookup(register struct vnode *adp, char *aname,
399 register struct vnode **avcp, afs_ucred_t *acred,
400 struct vnode *unused1)
405 code = afs_lookup(adp, aname, avcp, acred);
411 mp_afs_create(register struct vnode *adp, char *aname, struct vattr *attrs,
412 enum vcexcl aexcl, int amode, struct vnode **avcp,
418 code = afs_create(adp, aname, attrs, aexcl, amode, avcp, acred);
425 mp_afs_remove(register struct vnode *adp, char *aname,
431 code = afs_remove(adp, aname, acred);
437 mp_afs_link(register struct vnode *avc, register struct vnode *adp,
438 char *aname, afs_ucred_t *acred)
443 code = afs_link(avc, adp, aname, acred);
449 mp_afs_rename(register struct vnode *aodp, char *aname1,
450 register struct vnode *andp, char *aname2,
456 code = afs_rename(aodp, aname1, andp, aname2, acred);
462 mp_afs_mkdir(register struct vnode *adp, char *aname, struct vattr *attrs,
463 register struct vnode **avcp, afs_ucred_t *acred)
468 code = afs_mkdir(adp, aname, attrs, avcp, acred);
475 mp_afs_rmdir(register struct vnode *adp, char *aname, afs_ucred_t *acred)
480 code = afs_rmdir(adp, aname, acred);
487 mp_afs_readdir(register struct vnode *avc, struct uio *auio,
493 code = afs_readdir(avc, auio, acred);
499 mp_afs_symlink(register struct vnode *adp, char *aname, struct vattr *attrs,
500 char *atargetName, afs_ucred_t *acred)
505 code = afs_symlink(adp, aname, attrs, atargetName, acred);
512 mp_afs_readlink(register struct vnode *avc, struct uio *auio,
518 code = afs_readlink(avc, auio, acred);
524 mp_afs_fsync(register struct vnode *avc, afs_ucred_t *acred, int unused1)
529 code = afs_fsync(avc, acred);
535 mp_afs_bread(register struct vnode *avc, kern_daddr_t lbn, struct buf **bpp,
536 struct vattr *unused1, struct ucred *unused2)
541 code = afs_bread(avc, lbn, bpp);
547 mp_afs_brelse(register struct vnode *avc, struct buf *bp)
552 code = afs_brelse(avc, bp);
559 mp_afs_inactive(register struct vnode *avc, afs_ucred_t *acred)
564 code = afs_inactive(avc, acred);
570 mp_afs_lockctl(struct vnode *avc, struct flock *af, int cmd,
571 afs_ucred_t *acred, struct file *unused1, off_t unused2,
577 code = afs_lockctl(avc, af, cmd, acred);
583 mp_afs_fid(struct vnode *avc, struct fid **fidpp)
588 code = afs_fid(avc, fidpp);
594 mp_afs_readdir2(register struct vnode *avc, struct uio *auio,
600 code = afs_readdir2(avc, auio, acred);
606 struct vnodeops Afs_vnodeops = {
629 #if !defined(AFS_NONFSTRANS)
630 /* on HPUX102 the nfs translator calls afs_bread but does
631 * not call afs_brelse. Hence we see a memory leak. If the
632 * VOP_BREAD() call fails, then nfs does VOP_RDWR() to get
633 * the same data : this is the path we follow now. */
640 afs_badop, /* pathsend */
641 afs_noop, /* setacl */
642 afs_noop, /* getacl */
646 afs_lockf, /* lockf */
669 struct vnodeops *afs_ops = &Afs_vnodeops;
671 /* vnode file operations, and our own */
673 extern int vno_ioctl();
674 extern int vno_select();
675 extern int afs_closex();
676 extern int vno_close();
677 struct fileops afs_fileops = {
684 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
687 ********************************************************************
689 **** afspgin_setup_io_ranges ()
690 **** similar to: nfspgin_setup_io_ranges ()
691 ********************************************************************
694 afspgin_setup_io_ranges(vfspage_t * vm_info, pgcnt_t bpages, k_off_t isize,
697 pgcnt_t file_offset = VM_FILE_OFFSET(vm_info);
698 pgcnt_t minpage; /* first page to bring in */
699 pgcnt_t maxpage; /* one past last page to bring in */
701 pgcnt_t multio_maxpage;
702 kern_daddr_t start_blk;
704 expnd_flags_t up_reason, down_reason;
711 VM_GET_IO_INFO(vm_info, maxpagein, max_num_io);
714 * We do not go past the end of the current pregion nor past the end
715 * of the current file.
718 maxpage = startindex + (bpages - (startindex + file_offset) % bpages);
719 maxpage = vm_reset_maxpage(vm_info, maxpage);
720 maxpage = MIN(maxpage, (pgcnt_t) btorp(isize) - file_offset);
721 maxpage = MIN(maxpage, startindex + maxpagein);
722 multio_maxpage = maxpage = vm_maxpage(vm_info, maxpage);
727 VASSERT(maxpage >= startindex);
730 * Expanding the fault will create calls to FINDENTRY() for new
731 * pages, which will obsolete "dbd", so copy what it points to
732 * and clear it to prevent using stale data.
735 prp = VM_PRP(vm_info);
736 dbdtype = DBD_TYPE(vm_info);
737 start_blk = DBD_DATA(vm_info);
740 VASSERT(dbdtype != DBD_NONE);
742 if (max_num_io == 1) {
744 * We need to set up one I/O: First we attempt to expand the
745 * I/O forward. Then we expand the I/O backwards.
748 expand_faultin_up(vm_info, dbdtype, (int)bpages, maxpage, count,
749 startindex, start_blk, &up_reason);
750 maxpage = startindex + count;
751 VASSERT(maxpage <= startindex + maxpagein);
752 minpage = startindex - (startindex + file_offset) % bpages;
753 minpage = MAX(minpage, maxpage - maxpagein);
754 VASSERT(startindex >= VM_BASE_OFFSET(vm_info));
755 minpage = vm_minpage(vm_info, minpage);
756 VASSERT(minpage <= startindex);
758 expand_faultin_down(vm_info, dbdtype, (int)bpages, minpage, count,
759 &startindex, &start_blk, &down_reason);
760 VM_SET_IO_STARTINDX(vm_info, 0, startindex);
761 VM_SET_IO_STARTBLK(vm_info, 0, start_blk);
762 VM_SET_IO_COUNT(vm_info, 0, count);
763 VM_SET_NUM_IO(vm_info, 1);
766 if (max_num_io > 1) {
768 * We need to set up multiple I/O information; beginning
769 * with the startindex, we will expand upwards. The expansion
770 * could stop for one of 2 reasons; we take the appropriate
771 * action in each of these cases:
772 * o VM reasons: abort setting up the multiple I/O
773 * information and return to our caller indicating
774 * that "retry" is required.
775 * o pagelimit: set up the next I/O info [we may have
776 * reached multio_maxpage at this point].
777 * Note that expansion involves no more than a block at a time;
778 * hence it could never stop due to "discontiguous block"
781 startindex = minpage = vm_minpage(vm_info, 0);
782 for (indx = 0; (indx < max_num_io) && (startindex < multio_maxpage);
783 indx++, startindex += count) {
784 dbd = FINDDBD(prp->p_reg, startindex);
785 start_blk = dbd->dbd_data;
787 startindex + (bpages - (startindex + file_offset) % bpages);
788 maxpage = min(maxpage, multio_maxpage);
790 expand_faultin_up(vm_info, dbdtype, bpages, maxpage,
792 startindex, start_blk, &up_reason);
793 VM_SET_IO_STARTINDX(vm_info, indx, startindex);
794 VM_SET_IO_STARTBLK(vm_info, indx, start_blk);
795 VM_SET_IO_COUNT(vm_info, indx, count);
796 if (up_reason & VM_REASONS)
798 VASSERT(!(up_reason & NONCONTIGUOUS_BLOCK));
799 VASSERT(up_reason & PAGELIMIT);
801 if (startindex < multio_maxpage) {
802 VM_MULT_IO_FAILURE(vm_info);
803 VM_REINIT_FAULT_DBDVFD(vm_info);
804 return (0); /* retry */
807 VM_SET_NUM_IO(vm_info, indx);
811 * Tell VM where the I/O intends to start. This may be different
812 * from the faulting point.
815 VM_SET_STARTINDX(vm_info, VM_GET_IO_STARTINDX(vm_info, 0));
822 ********************************************************************
824 **** afspgin_blkflsh ()
825 **** similar to: nfspgin_blkflsh ()
826 ********************************************************************
829 afspgin_blkflsh(vfspage_t * vm_info, struct vnode * devvp, pgcnt_t * num_4k)
832 pgcnt_t count = *num_4k;
835 int num_io = VM_GET_NUM_IO(vm_info);
838 * On this blkflush() we don't want to purge the buffer cache and we do
839 * want to wait, so the flags are '0'.
842 for (indx = 0; indx < num_io; indx++) {
844 blkflush(devvp, (kern_daddr_t) VM_GET_IO_STARTBLK(vm_info, indx),
845 ptob(VM_GET_IO_COUNT(vm_info, indx)), 0,
849 if (vm_page_now_valid(vm_info, &page_count)) {
850 vm_release_memory(vm_info);
851 vm_release_structs(vm_info);
852 *num_4k = page_count;
853 return (VM_PAGE_PRESENT);
862 ********************************************************************
865 **** similar to: nfspgin_io ()
866 ********************************************************************
869 afspgin_io(vfspage_t * vm_info, struct vnode *devvp, pgcnt_t bpages,
870 pgcnt_t maxpagein, pgcnt_t count)
874 caddr_t vaddr = VM_ADDR(vm_info);
875 caddr_t virt_addr = VM_MAPPED_ADDR(vm_info);
876 pagein_info_t *io = VM_PAGEIN_INFO(vm_info);
877 preg_t *prp = VM_PRP(vm_info);
878 int wrt = VM_WRT(vm_info);
879 space_t space = VM_SPACE(vm_info);
880 int num_io = VM_GET_NUM_IO(vm_info);
882 #ifdef notdef /* Not used in AFS */
884 * With VM_READ_AHEAD_ALLOWED() macro, check if read-ahead should
885 * be used in this case.
887 * Unlike UFS, NFS does not start the faulting page I/O
888 * asynchronously. Why? Asynchronous requests are handled by the
889 * biod's. It doesn't make sense to queue up the faulting request
890 * behind other asynchrnous requests. This is not true for UFS
891 * where the asynchrnous request is immediately handled.
894 if ((VM_READ_AHEAD_ALLOWED(vm_info)) && (nfs_read_ahead_on)
895 && (NFS_DO_READ_AHEAD) && (should_do_read_ahead(prp, vaddr))) {
897 pgcnt_t max_rhead_io;
899 pgcnt_t total_rheads_allowed;
902 * Determine the maximum amount of read-ahead I/O.
904 total_rheads_allowed = maxpagein - count;
907 * If the count is less than a block, raise it to one.
909 if (total_rheads_allowed < bpages)
910 total_rheads_allowed = bpages;
912 max_rhead_io = total_rheads_allowed;
913 rhead_vaddr = VM_MAPPED_ADDR(vm_info) + (count * NBPG);
915 nfs_read_ahead(vm_info->vp, prp, wrt, space, rhead_vaddr,
919 * Set the next fault location. If read_ahead launches any
920 * I/O it will adjust it accordingly.
922 vm_info->prp->p_nextfault = vm_info->startindex + count;
925 * Now perform the faulting I/O synchronously.
930 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, 0),
931 VM_MAPPED_SPACE(vm_info), VM_MAPPED_ADDR(vm_info),
932 (int)ptob(count), B_READ, devvp,
933 B_vfs_pagein | B_pagebf, VM_REGION(vm_info));
937 virt_addr = VM_MAPPED_ADDR(vm_info);
939 for (i = 0; i < num_io; i++) {
941 * REVISIT -- investigate doing asyncpageio().
943 error |= (io[i].error =
944 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, i),
945 VM_MAPPED_SPACE(vm_info), virt_addr,
946 (int)ptob(VM_GET_IO_COUNT(vm_info, i)),
947 B_READ, devvp, B_vfs_pagein | B_pagebf,
948 VM_REGION(vm_info)));
949 virt_addr += ptob(VM_GET_IO_COUNT(vm_info, i));
952 * Set the next fault location. If read_ahead launches any
953 * I/O it will adjust it accordingly.
955 vm_info->prp->p_nextfault = vm_info->startindex + count;
962 ********************************************************************
964 **** afspgin_update_dbd ()
965 **** similar to: nfspgin_update_dbd ()
966 ********************************************************************
969 afspgin_update_dbd(vfspage_t * vm_info, int bsize)
972 pgcnt_t count = bsize / NBPG;
977 int num_io = VM_GET_NUM_IO(vm_info);
980 for (i = 0; i < num_io; i++) {
982 pgindx = VM_GET_IO_STARTINDX(vm_info, i);
983 off = vnodindx(VM_REGION(vm_info), pgindx);
985 blkno = VM_GET_IO_STARTBLK(vm_info, i);
987 VASSERT(bsize % NBPG == 0);
988 VASSERT(rem % NBPG == 0);
990 pgindx -= (pgcnt_t) btop(rem);
991 blkno -= (kern_daddr_t) btodb(rem);
994 * This region could start in mid-block. If so, pgindx
995 * could be less than 0, so we adjust pgindx and blkno back
996 * up so that pgindx is 0.
1004 blkno += btodb(ptob(prem));
1007 for (m = 0; m < count && pgindx < VM_REGION_SIZE(vm_info);
1008 m++, pgindx++, blkno += btodb(NBPG)) {
1010 * Note: since this only changes one block, it
1011 * assumes only one block was faulted in. Currently
1012 * this is always true for remote files, and we only
1013 * get here for remote files, so everything is ok.
1015 vm_mark_dbd(vm_info, pgindx, blkno);
1021 afs_pagein(vp, prp, wrt, space, vaddr, ret_startindex)
1027 pgcnt_t *ret_startindex;
1030 pgcnt_t pgindx = *ret_startindex;
1032 struct vnode *devvp;
1034 kern_daddr_t start_blk = 0;
1038 int shared; /* writable memory mapped file */
1039 retval_t retval = 0;
1040 pgcnt_t ok_dbd_limit = 0; /* last dbd that we can trust */
1041 pgcnt_t bpages; /* number of pages per block */
1043 vfspage_t *vm_info = NULL;
1050 int change_to_fstore = 0; /* need to change dbds to DBD_FSTORE */
1051 int flush_start_blk = 0;
1052 int flush_end_blk = 0;
1056 AFS_STATCNT(afs_pagein);
1057 vmemp_lockx(); /* lock down VM empire */
1059 /* Initialize the VM info structure */
1061 vm_pagein_init(&vm_info, prp, pgindx, space, vaddr, wrt, 0,
1064 /* Check to see if we slept and the page was falted in. */
1066 vm_release_structs(vm_info);
1070 vp = VM_GET_PAGEIN_VNODE(vm_info);
1071 VASSERT(vp != NULL);
1072 shared = VM_SHARED_OBJECT(vm_info);
1073 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1076 * Get the devvp and block size for this vnode type
1079 bsize = vp->v_vfsp->vfs_bsize;
1080 if (bsize <= 0 || (bsize & (DEV_BSIZE - 1)))
1081 osi_Panic("afs_pagein: bsize is zero or not a multiple of DEV_BSIZE");
1083 bpages = (pgcnt_t) btop(bsize);
1084 VASSERT(bpages > 0);
1085 VM_SET_FS_MAX_PAGES(vm_info, bpages);
1087 /* this trace cannot be here because the afs_global lock might not be
1088 * held at this point. We hold the vm global lock throughout
1089 * this procedure ( and not the AFS global lock )
1090 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEIN, ICL_TYPE_POINTER, (afs_int32) vp,
1091 * ICL_TYPE_LONG, DBD_TYPE(vm_info), ICL_TYPE_LONG, bpages,
1092 * ICL_TYPE_LONG, shared);
1094 /* Come here if we have to release the region lock before
1095 * locking pages. This can happen in memreserve() and
1100 * For remote files like ours, we want to check to see if the file has shrunk.
1101 * If so, we should invalidate any pages past the end. In the name
1102 * of efficiency, we only do this if the page we want to fault is
1103 * past the end of the file.
1106 if (VOP_GETATTR(vp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1107 VM_ZOMBIE_OBJECT(vm_info);
1108 vm_release_memory(vm_info);
1109 vm_release_structs(vm_info);
1113 if (vnodindx(VM_REGION(vm_info), pgindx) >= isize) {
1115 * The file has shrunk and someone is trying to access a
1116 * page past the end of the object. Shrink the object back
1117 * to its currrent size, send a SIGBUS to the faulting
1118 * process and return.
1120 * We must release the region lock before calling mtrunc(),
1121 * since mtrunc() locks all the regions that are using this
1124 vm_release_memory(vm_info);
1125 vm_truncate_region(vm_info, isize);
1126 vm_release_structs(vm_info);
1127 vmemp_returnx(-SIGBUS);
1131 maxpagein = vm_pick_maxpagein(vm_info);
1132 if (vm_wait_for_memory(vm_info, maxpagein, 1)) {
1133 /* Check to see if we should continue faulting. */
1134 if (vm_page_now_valid(vm_info, &page_count)) {
1135 vm_release_memory(vm_info);
1136 vm_release_structs(vm_info);
1137 vmemp_returnx(page_count);
1140 if (count = vm_no_io_required(vm_info)) {
1141 /* Release any excess memory. */
1142 vm_release_memory(vm_info);
1143 vm_release_structs(vm_info);
1144 vmemp_returnx(count);
1148 * We should never have DBD_HOLE pages in a non-MMF region.
1151 VASSERT(dbd->dbd_type != DBD_HOLE);
1153 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1155 startindex = *ret_startindex;
1158 * If the page we want is in memory already, take it
1160 if (VM_MEMORY_RESERVED(vm_info) < maxpagein) {
1161 /* pick up the rest of memory now. */
1162 if (vm_wait_for_memory(vm_info, maxpagein, 0)) {
1163 if (vm_page_now_valid(vm_info, &page_count)) {
1164 vm_release_memory(vm_info);
1165 vm_release_structs(vm_info);
1166 vmemp_returnx(page_count);
1174 afspgin_setup_io_ranges(vm_info, bpages, isize, startindex))) {
1178 startindex = VM_GET_STARTINDX(vm_info);
1180 VASSERT(maxpagein >= count);
1183 * Release the memory we won't need.
1185 if (count < maxpagein) {
1186 vm_release_excess_memory(vm_info,
1187 (VM_MEMORY_RESERVED(vm_info) - count));
1190 retval = afspgin_blkflsh(vm_info, devvp, &count);
1192 if (retval == VM_RETRY) {
1196 if (retval == VM_PAGE_PRESENT)
1201 * The definition of krusage_cntr_t is in h/kmetric.h, which
1202 * is not shipped. Since it's just statistics, we punt and do
1203 * not update it. If it's a problem we'll need to get HP to export
1204 * an interface that we can use to increment the counter.
1207 /* It's a real fault, not a reclaim */
1209 krusage_cntr_t *temp;
1210 temp = kt_cntrp(u.u_kthreadp);
1216 * Tell VM where the I/O intends to start. This may be different
1217 * from the faulting point.
1221 * vm_prepare_io will fill the region with pages and release the
1224 vm_prepare_io(vm_info, &count);
1227 * Count may have been adjusted, check to make sure it's non-zero.
1230 if (vm_retry(vm_info)) {
1235 * Release resources and retry the fault. Release any excess
1239 vm_release_memory(vm_info);
1240 vm_release_structs(vm_info);
1244 error = afspgin_io(vm_info, devvp, bpages, maxpagein, count);
1246 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1248 VM_ZOMBIE_OBJECT(vm_info);
1252 * For a writable memory mapped file that is remote we must
1253 * detect potential holes in the file and force allocation of
1254 * disk space on the remote system. Unfortunately, there is
1255 * no easy way to do this, so this gets a little ugly.
1257 if (shared && wrt) {
1259 * See if The user wants to write to this page. Write some
1260 * minimal amount of data back to the remote file to
1261 * force allocation of file space. We only need to
1262 * write a small amount, since holes are always at
1263 * least one filesystem block in size.
1265 error = vm_alloc_hole(vm_info);
1268 * If some sort of I/O error occurred we generate a
1269 * SIGBUS for the process that caused the write,
1270 * undo our page locks, etc and return.
1272 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1273 VM_ZOMBIE_OBJECT(vm_info);
1279 * Change these dbds to DBD_FSTORE. We cannot do it here,
1280 * since the region must be locked, and it is not locked
1281 * at the moment. We cannot lock the region yet, as we
1282 * first have to release the page locks.
1284 change_to_fstore = 1;
1287 vm_finish_io(vm_info, count);
1290 * Acquire the lock before we play around with changing the vfd's.
1294 if (change_to_fstore)
1295 afspgin_update_dbd(vm_info, bsize);
1297 #if defined(AFS_HPUX110_ENV)
1298 getppdp()->cnt.v_exfod += count;
1300 mpproc_info[getprocindex()].cnt.v_exfod += count;
1302 vmemp_unlockx(); /* free up VM empire */
1303 *ret_startindex = startindex;
1306 * In case we have any excess memory...
1308 if (VM_MEMORY_RESERVED(vm_info))
1309 vm_release_memory(vm_info);
1310 vm_release_structs(vm_info);
1316 vm_finish_io_failed(vm_info, count);
1320 vm_undo_validation(vm_info, count);
1323 * In case we have any excess memory...
1325 if (VM_MEMORY_RESERVED(vm_info))
1326 vm_release_memory(vm_info);
1327 vm_release_structs(vm_info);
1329 vmemp_unlockx(); /* free up VM empire */
1334 afs_pageout(vp, prp, start, end, flags)
1335 struct vnode *vp; /* not used */
1341 struct vnode *filevp;
1342 struct vnode *devvp;
1347 int *piocnt; /* wakeup counter used if PAGEOUT_WAIT */
1348 struct ucred *old_cred;
1352 int inode_changed = 0;
1356 AFS_STATCNT(afs_pageout);
1358 steal = (flags & PAGEOUT_FREE);
1359 vhand = (flags & PAGEOUT_VHAND);
1360 hard = (flags & PAGEOUT_HARD);
1364 /* Initialize the VM info structure. */
1365 vm_pageout_init(&vm_info, prp, start, end, 0, 0, 0, flags);
1368 * If the region is marked "don't swap", then don't steal any pages
1369 * from it. We can, however, write dirty pages out to disk (only if
1370 * PAGEOUT_FREE is not set).
1372 if (vm_no_pageout(&vm_info)) {
1378 * If caller wants to wait until the I/O is complete.
1380 vm_setup_wait_for_io(&vm_info);
1382 filevp = VM_GET_PAGEOUT_VNODE(&vm_info); /* always page out to back store */
1383 VASSERT(filevp != NULL);
1385 memset((caddr_t) & args, 0, sizeof(fsdata_t));
1386 args.remote_down = 0; /* assume remote file servers are up */
1387 args.remote = 1; /* we are remote */
1388 args.bsize = 0; /* filled up later by afs_vm_checkpage() */
1390 if (filevp->v_fstype == VUFS) {
1392 devvp = ip->i_devvp;
1399 * If we are vhand(), and this is an NFS file, we need to
1400 * see if the NFS server is "down". If so, we decide
1401 * if we will try to talk to it again, or defer pageouts
1402 * of dirty NFS pages until a future time.
1405 if (vhand && filevp->v_fstype == VNFS && vtomi(filevp)->mi_down
1406 && vtomi(filevp)->mi_hard) {
1407 extern afs_int32 vhand_nfs_retry;
1409 * If there is still time left on our timer, we will
1410 * not talk to this server right now.
1412 if (vhand_nfs_retry > 0)
1413 args.remote_down = 1;
1419 * Initialize args. We set bsize to 0 to tell vfs_vfdcheck() that
1420 * it must get the file size and other attributes if it comes across
1423 vm_info.fs_data = (caddr_t) & args;
1425 /* this trace cannot be here because the afs_global lock might not be
1426 * held at this point. We hold the vm global lock throughout
1427 * this procedure ( and not the AFS global lock )
1428 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEOUT, ICL_TYPE_POINTER, (afs_int32) filevp,
1429 * ICL_TYPE_LONG, start, ICL_TYPE_LONG, end, ICL_TYPE_LONG, flags);
1441 extern int pageiodone();
1446 * Ask the VM system to find the next run of pages.
1448 vm_find_next_range(&vm_info, i, end);
1451 * It's possible that the remote file shrunk in size. Check the flags
1452 * to see if the request was beyond the end of the file. If it was,
1453 * truncate the region to the file size and continue. We could be on a
1454 * run so after trunction continue, there may be some I/O to write
1457 if (VM_FS_FLAGS(&vm_info) & PAGEOUT_TRUNCATE) {
1458 pgcnt_t pglen = (pgcnt_t) btorp(args.isize);
1461 * This page is past the end of the file. Unlock this page
1462 * (region_trunc will throw it away) and then call
1463 * region_trunc() to invalidate all pages past the new end of
1466 region_trunc(VM_REGION(&vm_info), pglen, pglen + 1);
1469 * remove the truncation flag.
1471 VM_UNSETFS_FLAGS(&vm_info, PAGEOUT_TRUNCATE);
1474 if (VM_NO_PAGEOUT_RUN(&vm_info))
1478 * We have a run of dirty pages [args.start...args.end].
1480 VASSERT(filevp->v_fstype != VCDFS);
1481 VASSERT((filevp->v_vfsp->vfs_flag & VFS_RDONLY) == 0);
1482 VASSERT(VM_GET_NUM_IO(&vm_info) == 1);
1485 * We will be doing an I/O on the region, let the VM system know.
1487 (void)vm_up_physio_count(&vm_info);
1490 * Okay, get set to perform the I/O.
1494 (VM_END_PAGEOUT_INDX(&vm_info) + 1) -
1495 VM_START_PAGEOUT_INDX(&vm_info);
1498 * Allocate and initialize an I/O buffer.
1501 vm_init_bp(&vm_info, bp); /* Let the VM system initialize */
1503 /* Identify this buffer for KI */
1504 bp->b_bptype = B_vfs_pageout | B_pagebf;
1507 bp->b_flags = B_CALL | B_BUSY | B_PAGEOUT; /* steal pages */
1509 bp->b_flags = B_CALL | B_BUSY; /* keep pages */
1512 * If we are vhand paging over NFS, we will wait for the I/O
1515 if (vhand && filevp->v_fstype == VNFS) {
1516 bp->b_flags &= ~B_CALL;
1518 bp->b_iodone = (int (*)())pageiodone;
1522 * Make sure we do not write past the end of the file.
1524 nbytes = ptob(npages);
1525 start = vnodindx(VM_REGION(&vm_info), vm_info.start);
1526 if (start + nbytes > args.isize) {
1529 * The amount we are off better not be bigger than a
1532 if (start + nbytes - args.isize >= args.bsize) {
1533 osi_Panic("afs_pageout: remainder too large");
1537 * Reset the size of the I/O as necessary. For remote
1538 * files, we set the size to the exact number of bytes to
1539 * the end of the file. For local files, we round this up
1540 * to the nearest DEV_BSIZE chunk since disk I/O must always
1541 * be in multiples of DEV_BSIZE. In this case, we do not
1542 * bother to zero out the data past the "real" end of the
1543 * file, this is done when the data is read (either through
1544 * mmap() or by normal file system access).
1547 nbytes = args.isize - start;
1549 nbytes = roundup(args.isize - start, DEV_BSIZE);
1553 * Now get ready to perform the I/O
1555 if (!vm_protect_pageout(&vm_info, npages)) {
1557 vm_undo_invalidation(&vm_info, vm_info.start, vm_info.end);
1558 vm_finish_io_failed(&vm_info, npages);
1563 * If this is an NFS write by vhand(), we will not be calling
1564 * pageiodone(). asyncpageio() increments parolemem for us
1565 * if bp->b_iodone is pageiodone, so we must do it manually
1566 * if pageiodone() will not be called automatically.
1568 if (!(bp->b_flags & B_CALL) && steal) {
1569 register ulong_t context;
1571 SPINLOCK_USAV(pfdat_lock, context);
1572 parolemem += btorp(nbytes);
1573 SPINUNLOCK_USAV(pfdat_lock, context);
1575 blkflush(devvp, VM_START_PAGEOUT_BLK(&vm_info), (long)nbytes,
1576 (BX_NOBUFWAIT | BX_PURGE), VM_REGION(&vm_info));
1579 * If vhand is the one paging things out, and this is an NFS
1580 * file, we need to temporarily become a different user so
1581 * that we are not trying to page over NFS as root. We use
1582 * the user credentials associated with the writable file
1583 * pointer that is in the psuedo-vas for this MMF.
1585 * NOTE: we are currently using "va_rss" to store the ucred
1586 * value in the vas (this should be fixed in 10.0).
1588 old_cred = kt_cred(u.u_kthreadp);
1590 #if defined(AFS_HPUX1123_ENV)
1592 * DEE - 1123 does not have the vas.h, and it looks
1593 * we should never be called with a NFS type file anyway.
1594 * so where did this come from? Was it copied from NFS?
1595 * I assume it was, so we will add an assert for now
1596 * and see if the code runs at all.
1598 VASSERT(filevp->v_fstype != VNFS);
1600 set_kt_cred(u.u_kthreadp, filevp->v_vas->va_cred);
1603 * If root was the one who opened the mmf for write,
1604 * va_cred will be NULL. So reset kt_cred(u.u_kthreadp) to what it
1605 * was. We will page out as root, but that is the
1606 * correct thing to do in this case anyway.
1608 if (kt_cred(u.u_kthreadp) == NULL)
1609 set_kt_cred(u.u_kthreadp, old_cred);
1614 * Really do the I/O.
1617 asyncpageio(bp, VM_START_PAGEOUT_BLK(&vm_info),
1618 VM_MAPPED_SPACE(&vm_info), VM_MAPPED_ADDR(&vm_info),
1619 (int)nbytes, B_WRITE, devvp);
1621 VASSERT(error == 0);
1625 * If we are vhand paging over NFS we want to wait for the
1626 * I/O to complete and take the appropriate actions if an
1627 * error is encountered.
1630 if (waitforpageio(bp) && nfs_mi_harddown(filevp)) {
1632 * The server is down, ignore this failure, and
1633 * try again later. (rfscall() has set our retry
1636 fsdata.remote_down = 1;
1637 pageiocleanup(bp, 0);
1640 * vm_vfdcheck() has cleared the valid bit on the
1641 * vfds for these pages. We must go back and set the
1642 * valid bit, as the pages are really not gone.
1644 * NOTE: we can do this because we still hold (and have
1645 * not released) the region lock.
1648 vm_undo_invalidation(&vm_info, vm_info.start,
1652 * The I/O succeeded, or we had an error that we do
1653 * not want to defer until later. Call pageidone()
1662 * And restore our credentials to what they were.
1664 set_kt_cred(u.u_kthreadp, old_cred);
1667 * If we reserved memory in vfs_vfdcheck(), (only for NFS) we
1668 * can now unreserve it.
1670 if (vm_info.vm_flags & PAGEOUT_RESERVED) {
1671 vm_info.vm_flags &= ~PAGEOUT_RESERVED;
1672 vm_release_malloc_memory();
1679 if (flags & PF_DEACT) {
1680 #if defined(AFS_HPUX110_ENV)
1681 getppdp()->cnt.v_pswpout += npages;
1683 mpproc_info[getprocindex()].cnt.v_pswpout += npages;
1685 /* sar_bswapout += ptod(npages);*/
1687 #if defined(AFS_HPUX110_ENV)
1688 getppdp()->cnt.v_pgout++;
1689 getppdp()->cnt.v_pgpgout += npages;
1691 mpproc_info[getprocindex()].cnt.v_pgout++;
1692 mpproc_info[getprocindex()].cnt.v_pgpgout += npages;
1698 * If time and patience have delivered enough
1699 * pages, then quit now while we are ahead.
1701 if (VM_STOP_PAGING(&vm_info))
1704 i = VM_END_PAGEOUT_INDX(&vm_info) - VM_BASE_OFFSET(&vm_info) + 1;
1707 vm_finish_pageout(&vm_info); /* update vhand's stealscan */
1712 * If we wanted to wait for the I/O to complete, sleep on piocnt.
1713 * We must decrement it by one first, and then make sure that it
1714 * is non-zero before going to sleep.
1716 vm_wait_for_io(&vm_info);
1718 if (inode_changed && !file_is_remote) {
1719 imark(ip, IUPD | ICHG);
1726 afs_mapdbd(filevp, offset, bn, flags, hole, startidx, endidx)
1727 struct vnode *filevp;
1729 kern_daddr_t *bn; /* Block number. */
1730 int flags; /* B_READ or B_WRITE */
1731 int *hole; /* To be used for read-ahead. */
1732 pgcnt_t *startidx; /* To be used for read-ahead. */
1733 pgcnt_t *endidx; /* To be used for read-ahead. */
1735 kern_daddr_t lbn, local_bn;
1738 long bsize = vtoblksz(filevp) & ~(DEV_BSIZE - 1);
1741 *startidx = (pgcnt_t) (offset / NBPG);
1743 *endidx = (pgcnt_t) (offset / NBPG);
1745 *hole = 0; /* Can't have holes. */
1747 osi_Panic("afs_mapdbd: zero size");
1749 lbn = (kern_daddr_t) (offset / bsize);
1750 on = offset % bsize;
1752 err = VOP_BMAP(filevp, lbn, NULL, &local_bn, flags);
1756 * We can never get a bn less than zero on remote files.
1758 VASSERT(local_bn >= 0);
1760 local_bn = local_bn + btodb(on);
1768 * 1: The blocks are contiguous.
1769 * 0: The blocks are not contiguous.
1772 afs_vm_fscontiguous(vp, args, cur_data)
1777 if (cur_data == (VM_END_PAGEOUT_BLK(args) + btodb(NBPG))) {
1786 * 1: Stop, this page is the last in the block.
1788 * Terminate requests at filesystem block boundaries
1790 afs_vm_stopio(vp, args)
1794 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1796 #if defined(AFS_HPUX1123_ENV)
1798 tmpdb = VM_END_PAGEOUT_BLK(args);
1800 if ((dbtob(tmpdb) + NBPG) % (fsdata->bsize) == 0)
1802 if ((dbtob(VM_END_PAGEOUT_BLK(args)) + NBPG) % (fsdata->bsize) == 0)
1803 #endif /* AFS_HPUX1123_ENV */
1812 * afs_vm_checkpage is called by the VM while collecting a run of
1813 * pages on a pageout. afs_vm_checkpage() is called for each page
1814 * VM wants to write to disk.
1816 afs_vm_checkpage(vp, args, pgindx, cur_data)
1822 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1824 if (fsdata->remote_down) { /* never happens for AFS */
1826 * The remote system is down.
1828 VASSERT(args->run == 0);
1832 * A dirty page. If we have not yet determined the file size and
1833 * other attributes that we need to write out pages (the block
1834 * size and ok_dbd_limit), get that information now.
1836 if (fsdata->bsize == 0) {
1840 struct vnode *filevp;
1842 * Get the various attributes about the file. Store them
1843 * in args for the next time around.
1847 bsize = vtoblksz(filevp);
1848 args->maxpgs = (pgcnt_t) btop(bsize);
1850 if (VOP_GETATTR(filevp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1852 * The VOP_GETATTR() failed.
1853 * we are vhand, and this is a hard mount, we will
1854 * skip dirty pages for a while and try again later.
1856 if (args->vm_flags & PAGEOUT_VHAND) {
1857 VASSERT(args->run == 0);
1861 * This is a "soft" mount, or some other error was
1862 * returned from the server. Mark this region
1863 * as a zombie, and free this dirty page.
1865 VM_ZOMBIE_OBJECT(args);
1868 * The caller will see r_zomb and remove the page
1874 fsdata->isize = isize;
1875 fsdata->bsize = bsize;
1879 * See if the file has shrunk (this could have happened
1880 * asynchronously because of NFS or DUX). If so, invalidate
1881 * all of the pages past the end of the file. This is only
1882 * needed for remote files, as local files are truncated
1886 if (vnodindx(VM_REGION(args), pgindx) > fsdata->isize) {
1888 * This page is past the end of the file. Unlock this page
1889 * (region_trunc will throw it away) and then call region_trunc()
1890 * to invalidate all pages past the new end of the file.
1892 VM_SETFS_FLAGS(args, PAGEOUT_TRUNCATE);
1896 if ((args->vm_flags & PAGEOUT_VHAND)
1897 && (!(args->vm_flags & PAGEOUT_RESERVED))
1898 && (!(VM_IS_ZOMBIE(args)))) {
1899 VASSERT(args->run == 0);
1900 if (vm_reserve_malloc_memory(NFS_PAGEOUT_MEM)) {
1902 * Got enough memory to pageout. Mark the fact that we did
1903 * a sysprocmemreserve(), so that we can sysprocmemunreserve() it
1904 * later (in remote_pageout()).
1906 args->vm_flags |= PAGEOUT_RESERVED;
1909 * We do not have enough memory to do this pageout. By
1910 * definition, we do not yet have a run, so we just unlock
1911 * this page and tell foreach_valid() to continue scanning.
1912 * If we come across another dirty page, we will try to
1913 * reserve memory again. That is okay, in fact some memory
1914 * may have freed up (as earlier pageouts complete under
1931 fs_bsize = vtoblksz(bp->b_vp);
1933 * Check to see if we are starting mid block. If so, then
1934 * we must return the remainder of the block or less depending
1937 bnrem = bp->b_offset % fs_bsize;
1939 max_size = fs_bsize - bnrem;
1941 max_size = fs_bsize;
1944 if (bp->b_bcount > max_size) {
1947 return (bp->b_bcount);
1951 afs_mmap(vp, off, size_bytes, access)
1954 #if defined(AFS_HPUX1111_ENV)
1961 long bsize = vtoblksz(vp);
1963 if (bsize % NBPG != 0) {
1970 afs_cachelimit(vp, len, location)
1976 * Disk addresses are logical, not physical, so fragments are
1979 *location = btorp(len) + 1;
1989 afs_unmap(vp, off, size_bytes, access)
1992 #if defined(AFS_HPUX1111_ENV)
2003 afs_read_ahead(vp, prp, wrt, space, vaddr, rhead_cnt)
2011 printf("afs_read_ahead returning 0 \n");
2016 afs_prealloc(vp, size, ignore_minfree, reserved)
2018 /* DEE on 11.22 following is off_t */
2023 printf("afs_prealloc returning ENOSPC\n");
2028 afs_ioctl(vp, com, data, flag, cred)
2036 struct afs_ioctl afsioctl, *ai;
2038 AFS_STATCNT(afs_ioctl);
2040 /* The call must be a VICEIOCTL call */
2041 if (((com >> 8) & 0xff) == 'V') {
2043 /* AFS_COPYIN returns error 14. Copy data in instead */
2044 AFS_COPYIN(data, (caddr_t) & afsioctl, sizeof(afsioctl), error);
2048 ai = (struct afs_ioctl *)data;
2049 afsioctl.in = ai->in;
2050 afsioctl.out = ai->out;
2051 afsioctl.in_size = ai->in_size;
2052 afsioctl.out_size = ai->out_size;
2053 error = HandleIoctl(VTOAFS(vp), com, &afsioctl);
2059 #if defined(AFS_HPUX1111_ENV)
2060 /* looks like even if appl is 32 bit, we need to round to 8 bytes */
2061 /* This had no effect, it must not be being used */
2063 #define roundtoint(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2064 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2065 sizeof(u_int) + 2 * sizeof(u_short)))
2068 #define roundtoint(x) (((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
2069 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2070 2 * sizeof(u_short)))
2074 afs_readdir(vp, uiop, cred)
2081 caddr_t ibuf, obuf, ibufend, obufend;
2082 struct __dirent32 *idp;
2084 int count, outcount;
2086 uint64_t tmp_offset;
2088 count = uiop->uio_resid;
2089 /* Allocate temporary space for format conversion */
2090 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2091 obuf = kmem_alloc(count + sizeof(struct dirent));
2092 aiov.iov_base = ibuf;
2093 aiov.iov_len = count;
2094 auio.uio_iov = &aiov;
2095 auio.uio_iovcnt = 1;
2096 offset = auio.uio_offset = uiop->uio_offset;
2097 auio.uio_seg = UIOSEG_KERNEL;
2098 auio.uio_resid = count;
2099 auio.uio_fpflags = 0;
2101 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2105 /* Convert entries from __dirent32 to dirent format */
2107 for (idp = (struct __dirent32 *)ibuf, odp =
2108 (struct dirent *)obuf, ibufend =
2109 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2110 (caddr_t) idp < ibufend;
2111 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2112 (struct dirent *)((caddr_t) odp + odp->d_reclen)) {
2113 odp->d_ino = idp->__d_ino;
2114 odp->d_namlen = idp->__d_namlen;
2115 (void)strcpy(odp->d_name, idp->__d_name);
2116 odp->d_reclen = reclen(odp);
2117 if ((caddr_t) odp + odp->d_reclen > obufend)
2119 /* record offset *after* we're sure to use this entry */
2120 memcpy((char *)&tmp_offset, (char *)&idp->__d_off, sizeof tmp_offset);
2121 offset = tmp_offset;
2124 outcount = (caddr_t) odp - obuf;
2125 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2128 uiop->uio_offset = offset;
2130 kmem_free(ibuf, count);
2131 kmem_free(obuf, count + sizeof(struct dirent));
2136 #define roundtolong(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2137 #define reclen_dirent64(dp) roundtolong(((dp)->__d_namlen + 1 + (2*sizeof(u_long)) +\
2138 2 * sizeof(u_short)))
2141 afs_readdir3(vp, uiop, cred)
2148 caddr_t ibuf, obuf, ibufend, obufend;
2149 struct __dirent32 *idp;
2150 struct __dirent64 *odp;
2151 int count, outcount;
2154 count = uiop->uio_resid;
2155 /* Allocate temporary space for format conversion */
2156 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2157 obuf = kmem_alloc(count + sizeof(struct __dirent64));
2158 aiov.iov_base = ibuf;
2159 aiov.iov_len = count;
2160 auio.uio_iov = &aiov;
2161 auio.uio_iovcnt = 1;
2162 offset = auio.uio_offset = uiop->uio_offset;
2163 auio.uio_seg = UIOSEG_KERNEL;
2164 auio.uio_resid = count;
2165 auio.uio_fpflags = 0;
2167 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2171 /* Convert entries from __dirent32 to __dirent64 format */
2173 for (idp = (struct __dirent32 *)ibuf, odp =
2174 (struct __dirent64 *)obuf, ibufend =
2175 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2176 (caddr_t) idp < ibufend;
2177 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2178 (struct __dirent64 *)((caddr_t) odp + odp->__d_reclen)) {
2179 memcpy((char *)&odp->__d_off, (char *)&idp->__d_off,
2180 sizeof odp->__d_off);
2181 odp->__d_ino = idp->__d_ino;
2182 odp->__d_namlen = idp->__d_namlen;
2183 (void)strcpy(odp->__d_name, idp->__d_name);
2184 odp->__d_reclen = reclen_dirent64(odp);
2185 if ((caddr_t) odp + odp->__d_reclen > obufend)
2187 /* record offset *after* we're sure to use this entry */
2188 offset = odp->__d_off;
2191 outcount = (caddr_t) odp - obuf;
2192 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2195 uiop->uio_offset = offset;
2197 kmem_free(ibuf, count);
2198 kmem_free(obuf, count + sizeof(struct __dirent64));
2202 #define AFS_SV_SEMA_HASH 1
2203 #define AFS_SV_SEMA_HASH_DEBUG 0
2205 #if AFS_SV_SEMA_HASH
2206 /* This portion of the code was originally used to implement
2207 * thread specific storage for the semaphore save area. However,
2208 * there were some spare fields in the proc structure, this is
2209 * now being used for the saving semapores. Hence, this portion of
2210 * the code is no longer used.
2213 /* This portion of the code implements thread specific information.
2214 * The thread id is passed in as the key. The semaphore saved area
2215 * is hashed on this key.
2218 /* why is this hash table required ?
2219 * The AFS code is written in such a way that a GLOCK() is done in
2220 * one function and the GUNLOCK() is done in another function further
2221 * down the call chain. The GLOCK() call has to save the current
2222 * semaphore status before acquiring afs_global_sema. The GUNLOCK
2223 * has to release afs_global_sema and reacquire the sempahore status
2224 * that existed before the corresponding GLOCK. If GLOCK() and
2225 * GUNLOCK() were called in the same function, the GLOCK call could
2226 * have stored the saved sempahore status in a local variable and the
2227 * corresponding GUNLOCK() call could have restored the original
2228 * status from this local variable. But this is not the case with
2229 * AFS code. Hence, we have to implement a thread specific semaphore
2230 * save area. This is implemented as a hash table. The key is the
2234 /* In order for multithreaded processes to work, the sv_sema structures
2235 * must be saved on a per-thread basis, not a per-process basis. There
2236 * is no per-thread storage available to hijack in the OS per-thread
2237 * data structures (e.g. struct user) so we revive this code.
2238 * I removed the upper limit on the memory consumption since we don't
2239 * know how many threads there will be. Now the code first checks the
2240 * freeList. If that fails it then tries garbage collecting. If that
2241 * doesn't free up anything then it allocs what it needs.
2244 #define ELEMENT sv_sema_t
2246 #define Hash(xx) ( (xx) % sizeOfHashTable )
2247 #define hashLockInit(xx) initsema(&xx,1, FILESYS_SEMA_PRI, FILESYS_SEMA_ORDER)
2248 #define hashLock(xx) MP_PSEMA(&xx)
2249 #define hashUnlock(xx) MP_VSEMA(&xx)
2251 typedef struct elem {
2258 typedef struct bucket {
2263 static int sizeOfHashTable;
2264 static Bucket *hashTable;
2266 static int currentSize = 0;
2267 static Element *freeList; /* free list */
2270 static sema_t afsHashLock = { 0 }; /* global lock for hash table */
2272 static void afsHashGarbageCollect();
2275 ** The global lock protects the global data structures,
2276 ** e.g. freeList and currentSize.
2277 ** The bucket lock protects the link list hanging off that bucket.
2278 ** The lock hierarchy : one can obtain the bucket lock while holding
2279 ** the global lock, but not vice versa.
2284 afsHash(int nbuckets)
2285 { /* allocate the hash table */
2288 #if AFS_SV_SEMA_HASH_DEBUG
2289 printf("afsHash: enter\n");
2292 sizeOfHashTable = nbuckets;
2293 currentSize = nbuckets * sizeof(Bucket);
2296 osi_Panic("afs: SEMA Hashtable already created\n");
2298 hashTable = (Bucket *) AFS_KALLOC(sizeOfHashTable * sizeof(Bucket));
2300 osi_Panic("afs: cannot create SEMA Hashtable\n");
2302 /* initialize the hash table and associated locks */
2303 memset((char *)hashTable, 0, sizeOfHashTable * sizeof(Bucket));
2304 for (i = 0; i < sizeOfHashTable; i++)
2305 hashLockInit(hashTable[i].lock);
2306 hashLockInit(afsHashLock);
2308 #if AFS_SV_SEMA_HASH_DEBUG
2309 printf("afsHash: exit\n");
2314 afsHashInsertFind(KEY key)
2319 #if AFS_SV_SEMA_HASH_DEBUG
2320 printf("afsHashInsertFind: %d\n", key);
2323 osi_Panic("afs: afsHashInsertFind: no hashTable\n");
2325 index = Hash(key); /* get bucket number */
2326 hashLock(hashTable[index].lock); /* lock this bucket */
2327 ptr = hashTable[index].element;
2329 /* if it is already there */
2331 if (ptr->key == key) {
2332 ptr->refCnt++; /* hold it */
2333 hashUnlock(hashTable[index].lock);
2334 #if AFS_SV_SEMA_HASH_DEBUG
2335 printf("afsHashInsertFind: %d FOUND\n", key);
2337 return &(ptr->element);
2343 hashUnlock(hashTable[index].lock);
2345 /* if something exists in the freeList, take it from there */
2347 hashLock(afsHashLock);
2350 ptr = freeList; /* reuse entry */
2351 freeList = freeList->next;
2353 afsHashGarbageCollect(); /* afsHashLock locked */
2355 ptr = freeList; /* reuse entry */
2356 freeList = freeList->next;
2358 ptr = (Element *) AFS_KALLOC(sizeof(Element));
2362 currentSize += sizeof(Element); /* update memory used */
2363 hashUnlock(afsHashLock);
2366 osi_Panic("afs: SEMA Hashtable cannot create new entry\n");
2367 /* create new entry */
2369 memset((char *)&ptr->element, 0, sizeof(ptr->element));
2370 ptr->refCnt = 1; /* this guy */
2372 /* insert new entry in bucket */
2373 hashLock(hashTable[index].lock); /* lock this bucket */
2374 ptr->next = hashTable[index].element;
2375 hashTable[index].element = ptr;
2376 hashUnlock(hashTable[index].lock);
2378 #if AFS_SV_SEMA_HASH_DEBUG
2379 printf("afsHashInsertFind: %d MADE\n", key);
2382 return &(ptr->element);
2386 afsHashFind(KEY key)
2391 #if AFS_SV_SEMA_HASH_DEBUG
2392 printf("afsHashFind: %d\n", key);
2395 osi_Panic("afs: afsHashFind: no hashTable\n");
2397 index = Hash(key); /* get bucket number */
2398 hashLock(hashTable[index].lock); /* lock this bucket */
2399 ptr = hashTable[index].element;
2401 /* it should be in the hash table */
2403 if (ptr->key == key) {
2404 if (ptr->refCnt <= 0)
2405 osi_Panic("afs: SEMA HashTable entry already released\n");
2406 hashUnlock(hashTable[index].lock);
2407 #if AFS_SV_SEMA_HASH_DEBUG
2408 printf("afsHashFind: %d FOUND\n", key);
2410 return &(ptr->element);
2416 hashUnlock(hashTable[index].lock);
2417 /* it better be in the hash table */
2418 osi_Panic("afs: SEMA HashTable wants non-existent entry \n");
2423 afsHashRelease(KEY key)
2428 #if AFS_SV_SEMA_HASH_DEBUG
2429 printf("afsHashRelease: %d\n", key);
2432 osi_Panic("afs: afsHashRelease: no hashTable\n");
2434 index = Hash(key); /* get bucket number */
2435 hashLock(hashTable[index].lock); /* lock this bucket */
2436 ptr = hashTable[index].element;
2438 /* it should be in the hash table */
2440 if (ptr->key == key) {
2441 if (ptr->refCnt <= 0)
2442 osi_Panic("afs: SEMA HashTable entry already released\n");
2443 ptr->refCnt--; /* release this guy */
2444 hashUnlock(hashTable[index].lock);
2445 #if AFS_SV_SEMA_HASH_DEBUG
2446 printf("afsHashRelease: %d FOUND\n", key);
2454 hashUnlock(hashTable[index].lock);
2455 /* it better be in the hash table */
2456 osi_Panic("afs: SEMA HashTable deleting non-existent entry \n");
2459 /* this should be called with afsHashLock WRITE locked */
2461 afsHashGarbageCollect()
2468 osi_Panic("afs: afsHashGarbageCollect: no hashTable\n");
2470 for (index = 0; index < sizeOfHashTable; index++) {
2471 hashLock(hashTable[index].lock);
2472 ptr = hashTable[index].element; /* pick up bucket */
2474 while (ptr && !ptr->refCnt) {
2475 /* insert this element into free list */
2478 ptr->next = freeList;
2481 foundFlag = 1; /* found at least one */
2482 currentSize -= sizeof(Element);
2485 hashTable[index].element = ptr;
2487 /* scan thru the remaining list */
2490 if (ptr->next->refCnt == 0) {
2491 /* collect this element */
2494 ptr->next = ptr->next->next;
2495 temp->next = freeList;
2498 currentSize -= sizeof(Element);
2504 hashUnlock(hashTable[index].lock);
2508 osi_Panic("afs: SEMA HashTable full\n");
2512 #endif /* AFS_SV_SEMA_HASH */
2516 register struct buf *bp;
2518 register afs_int32 code;
2520 struct iovec tiovec[1];
2521 extern caddr_t hdl_kmap_bp();
2522 register struct kthread *t = u.u_kthreadp;
2524 AFS_STATCNT(afs_hp_strategy);
2526 * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after
2527 * the I/O. We must save and restore the count because pageiodone()
2528 * uses b_bcount to determine how many pages to unlock.
2530 * Remap the entire range.
2535 afs_Trace4(afs_iclSetp, CM_TRACE_HPSTRAT, ICL_TYPE_POINTER, bp->b_vp,
2536 ICL_TYPE_LONG, (int)bp->b_blkno * DEV_BSIZE, ICL_TYPE_LONG,
2537 bp->b_bcount, ICL_TYPE_LONG, 0);
2539 /* Set up the uio structure */
2540 tuio.afsio_iov = tiovec;
2541 tuio.afsio_iovcnt = 1;
2542 tuio.afsio_offset = DEV_BSIZE * bp->b_blkno;
2543 tuio.afsio_seg = AFS_UIOSYS;
2544 tuio.afsio_resid = bp->b_bcount;
2545 tuio.uio_fpflags = 0;
2546 tiovec[0].iov_base = bp->b_un.b_addr;
2547 tiovec[0].iov_len = bp->b_bcount;
2550 if ((bp->b_flags & B_READ) == B_READ) {
2551 /* read b_bcount bytes into kernel address b_un.b_addr
2552 * starting at byte DEV_BSIZE * b_blkno. Bzero anything
2553 * we can't read, and finally call iodone(bp). File is
2554 * in bp->b_vp. Credentials are from u area??
2556 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_READ, 0, kt_cred(t));
2558 if (tuio.afsio_resid > 0) {
2559 privlbzero(bvtospace(bp, bp->b_un.b_addr),
2560 bp->b_un.b_addr + bp->b_bcount - tuio.afsio_resid,
2561 (size_t) tuio.afsio_resid);
2565 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_WRITE, 0, kt_cred(t));
2567 /* Remap back to the user's space */
2576 afs_pathconf(vp, name, resultp, cred)
2580 struct ucred *cred; /* unused */
2583 case _PC_LINK_MAX: /* Maximum number of links to a file */
2584 *resultp = 255; /* an unsigned short on the fileserver */
2585 break; /* a unsigned char in the client.... */
2587 case _PC_NAME_MAX: /* Max length of file name */
2591 case _PC_PATH_MAX: /* Maximum length of Path Name */
2595 case _PC_PIPE_BUF: /* Max atomic write to pipe. See fifo_vnops */
2596 case _PC_CHOWN_RESTRICTED: /* Anybody can chown? */
2597 case _PC_NO_TRUNC: /* No file name truncation on overflow? */
2598 u.u_error = EOPNOTSUPP;
2599 return (EOPNOTSUPP);
2602 case _PC_MAX_CANON: /* TTY buffer size for canonical input */
2603 /* need more work here for pty, ite buffer size, if differ */
2604 if (vp->v_type != VCHR) {
2608 *resultp = CANBSIZ; /*for tty */
2612 /* need more work here for pty, ite buffer size, if differ */
2613 if (vp->v_type != VCHR) { /* TTY buffer size */
2617 *resultp = TTYHOG; /*for tty */
2621 /* Terminal special characters can be disabled? */
2622 if (vp->v_type != VCHR) {
2630 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2634 *resultp = 1; /* Synchronized IO supported for this file */
2637 case _PC_FILESIZEBITS:
2638 if (vp->v_type != VDIR)
2640 *resultp = MAX_SMALL_FILE_BITS;