2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* This is a placeholder for routines unique to the port of AFS to hp-ux*/
12 #include <afsconfig.h>
13 #include "afs/param.h"
18 #include "afs/sysincludes.h" /* Standard vendor system headers */
19 #include "afsincludes.h" /* Afs-based standard headers */
20 #include "afs/afs_stats.h" /* statistics stuff */
24 #include <sys/mount.h>
25 #include <sys/vnode.h>
26 #include <sys/pathname.h>
28 extern struct vfsops Afs_vfsops;
29 extern int afs_hp_strategy();
30 extern int afs_bmap(), afs_badop(), afs_noop(), afs_lockf();
31 extern int afs_pagein();
32 extern int afs_pageout();
33 extern int afs_ioctl();
34 extern int afs_prealloc();
35 extern int afs_mapdbd();
36 extern int afs_mmap();
37 extern int afs_cachelimit();
38 extern int afs_vm_checkpage();
39 extern int afs_vm_fscontiguous();
40 extern int afs_vm_stopio();
41 extern int afs_read_ahead();
42 extern int afs_unmap();
43 extern int afs_release();
44 extern int afs_swapfs_len();
45 extern int afs_readdir2();
46 extern int afs_readdir();
47 extern int afs_readdir3();
48 extern int afs_pathconf();
49 extern int afs_close();
51 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
53 #if defined(AFS_HPUX110_ENV)
54 /* We no longer need to lock on the VM Empire,
55 * or at least that is what is claimed.
56 * so we will noopt the vmemp_ routines
57 * This needs to be looked at closer.
61 #define vmemp_returnx(a) return(a)
62 #define vmemp_unlockx()
65 #if !defined(AFS_HPUX110_ENV)
67 * Copy an mbuf to the contiguous area pointed to by cp.
68 * Skip <off> bytes and copy <len> bytes.
69 * Returns the number of bytes not transferred.
70 * The mbuf is NOT changed.
73 m_cpytoc(m, off, len, cp)
74 register struct mbuf *m;
75 register int off, len;
80 if (m == NULL || off < 0 || len < 0 || cp == NULL)
81 osi_Panic("m_cpytoc");
83 if (m->m_len <= off) {
92 ml = MIN(len, m->m_len - off);
93 memcpy(cp, mtod(m, caddr_t) + off, (u_int) ml);
100 memcpy(cp, mtod(m, caddr_t), (u_int) ml);
111 * Note that the standard Sun vnode interface doesn't haven't an vop_lockf(), so this code is
112 * totally new. This came about because HP-UX has lockf() implemented as
113 * a system call while Sun has it implemented as a library (apparently).
114 * To handle this, we have to translate the lockf() request into an
115 * fcntl() looking request, and then translate the results back if necessary.
116 * we call afs_lockctl() directly .
118 afs_lockf(vp, flag, len, cred, fp, LB, UB)
121 struct AFS_UCRED *cred;
125 /*for now, just pretend it works */
126 struct k_flock flock;
130 * Create a flock structure and translate the lockf request
131 * into an appropriate looking fcntl() type request for afs_lockctl()
135 flock.l_start = fp->f_offset;
136 /* convert negative lengths to positive */
137 if (flock.l_len < 0) {
138 flock.l_start += flock.l_len;
139 flock.l_len = -(flock.l_len);
142 * Adjust values to look like fcntl() requests.
143 * All locks are write locks, only F_LOCK requests
144 * are blocking. F_TEST has to be translated into
145 * a get lock and then back again.
147 flock.l_type = F_WRLCK;
151 flock.l_type = F_UNLCK;
160 u.u_error = mp_afs_lockctl(vp, &flock, cmd, fp->f_cred);
162 return (u.u_error); /* some other error code */
165 * if request is F_TEST, and GETLK changed
166 * the lock type to ULOCK, then return 0, else
167 * set errno to EACCESS and return.
169 if (flag == F_TEST && flock.l_type != F_UNLCK) {
177 #if defined(AFS_HPUX1122_ENV)
178 #include "machine/vm/vmparam.h"
180 #include "../machine/vmparam.h" /* For KERNELSPACE */
187 #include "ufs/inode.h"
191 #include "h/region.h"
192 #include "h/pregion.h"
193 #include "h/vmmeter.h"
195 #include "h/sysinfo.h"
197 #include "h/tuneable.h"
199 #include "netinet/in.h"
201 /* a freelist of one */
202 struct buf *afs_bread_freebp = 0;
205 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
206 * Thus we can use fake bufs (ie not from the real buffer pool).
208 afs_bread(vp, lbn, bpp)
213 int offset, fsbsize, error;
218 AFS_STATCNT(afs_bread);
219 fsbsize = vp->v_vfsp->vfs_bsize;
220 offset = lbn * fsbsize;
221 if (afs_bread_freebp) {
222 bp = afs_bread_freebp;
223 afs_bread_freebp = 0;
225 bp = (struct buf *)AFS_KALLOC(sizeof(*bp));
226 bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
229 iov.iov_base = bp->b_un.b_addr;
230 iov.iov_len = fsbsize;
231 uio.afsio_iov = &iov;
232 uio.afsio_iovcnt = 1;
233 uio.afsio_seg = AFS_UIOSYS;
234 uio.afsio_offset = offset;
235 uio.afsio_resid = fsbsize;
239 error = afs_read(VTOAFS(vp), &uio, p_cred(u.u_procp), lbn, bpp, 0);
241 afs_bread_freebp = bp;
245 afs_bread_freebp = bp;
247 *(struct buf **)&bp->b_vp = bp; /* mark as fake */
257 AFS_STATCNT(afs_brelse);
259 if ((struct buf *)bp->b_vp != bp) { /* not fake */
260 ufs_brelse(bp->b_vp, bp);
261 } else if (afs_bread_freebp) {
262 AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
263 AFS_KFREE(bp, sizeof(*bp));
265 afs_bread_freebp = bp;
270 afs_bmap(avc, abn, anvp, anbn)
271 register struct vcache *avc;
272 afs_int32 abn, *anbn;
273 struct vcache **anvp;
275 AFS_STATCNT(afs_bmap);
279 *anbn = abn * (8192 / DEV_BSIZE); /* in 512 byte units */
283 afs_inactive(avc, acred)
284 register struct vcache *avc;
285 struct AFS_UCRED *acred;
287 struct vnode *vp = AFSTOV(avc);
290 if (afs_shuttingdown)
294 * In Solaris and HPUX s800 and HP-UX10.0 they actually call us with
295 * v_count 1 on last reference!
297 MP_H_SPINLOCK_USAV(vn_h_sl_pool, vp, &sv_lock, &context);
298 if (avc->vrefCount < 1)
299 osi_Panic("afs_inactive : v_count < 1\n");
302 * If more than 1 don't unmap the vnode but do decrement the ref count
305 if (vp->v_count > 0) {
306 MP_SPINUNLOCK_USAV(sv_lock, context);
309 MP_SPINUNLOCK_USAV(sv_lock, context);
310 afs_InactiveVCache(avc, acred);
316 mp_afs_open(register struct vnode **avcp, int aflags, struct AFS_UCRED *acred)
321 code = afs_open(avcp, aflags, acred);
327 mp_afs_close(register struct vnode *avcp, int aflags, struct AFS_UCRED *acred)
332 code = afs_close(avcp, aflags, acred);
338 mp_afs_rdwr(register struct vnode *avcp, struct uio *uio, enum uio_rw arw,
339 int aio, struct AFS_UCRED *acred)
345 save_resid = uio->uio_resid;
346 code = afs_rdwr(avcp, uio, arw, aio, acred);
347 if (arw == UIO_WRITE && code == ENOSPC) {
348 /* HP clears code if any data written. */
349 uio->uio_resid = save_resid;
356 mp_afs_getattr(register struct vnode *avcp, struct vattr *attrs,
357 struct AFS_UCRED *acred, enum vsync unused1)
362 code = afs_getattr(avcp, attrs, acred);
368 mp_afs_setattr(register struct vnode *avcp, register struct vattr *attrs,
369 struct AFS_UCRED *acred, int unused1)
374 code = afs_setattr(avcp, attrs, acred);
380 mp_afs_access(register struct vnode *avcp, int mode, struct AFS_UCRED *acred)
385 code = afs_access(avcp, mode, acred);
391 mp_afs_lookup(register struct vnode *adp, char *aname,
392 register struct vnode **avcp, struct AFS_UCRED *acred,
393 struct vnode *unused1)
398 code = afs_lookup(adp, aname, avcp, acred);
404 mp_afs_create(register struct vnode *adp, char *aname, struct vattr *attrs,
405 enum vcexcl aexcl, int amode, struct vnode **avcp,
406 struct AFS_UCRED *acred)
411 code = afs_create(adp, aname, attrs, aexcl, amode, avcp, acred);
418 mp_afs_remove(register struct vnode *adp, char *aname,
419 struct AFS_UCRED *acred)
424 code = afs_remove(adp, aname, acred);
430 mp_afs_link(register struct vnode *avc, register struct vnode *adp,
431 char *aname, struct AFS_UCRED *acred)
436 code = afs_link(avc, adp, aname, acred);
442 mp_afs_rename(register struct vnode *aodp, char *aname1,
443 register struct vnode *andp, char *aname2,
444 struct AFS_UCRED *acred)
449 code = afs_rename(aodp, aname1, andp, aname2, acred);
455 mp_afs_mkdir(register struct vnode *adp, char *aname, struct vattr *attrs,
456 register struct vnode **avcp, struct AFS_UCRED *acred)
461 code = afs_mkdir(adp, aname, attrs, avcp, acred);
468 mp_afs_rmdir(register struct vnode *adp, char *aname, struct AFS_UCRED *acred)
473 code = afs_rmdir(adp, aname, acred);
480 mp_afs_readdir(register struct vnode *avc, struct uio *auio,
481 struct AFS_UCRED *acred)
486 code = afs_readdir(avc, auio, acred);
492 mp_afs_symlink(register struct vnode *adp, char *aname, struct vattr *attrs,
493 char *atargetName, struct AFS_UCRED *acred)
498 code = afs_symlink(adp, aname, attrs, atargetName, acred);
505 mp_afs_readlink(register struct vnode *avc, struct uio *auio,
506 struct AFS_UCRED *acred)
511 code = afs_readlink(avc, auio, acred);
517 mp_afs_fsync(register struct vnode *avc, struct AFS_UCRED *acred, int unused1)
522 code = afs_fsync(avc, acred);
528 mp_afs_bread(register struct vnode *avc, daddr_t lbn, struct buf **bpp,
529 struct vattr *unused1, struct ucred *unused2)
534 code = afs_bread(avc, lbn, bpp);
540 mp_afs_brelse(register struct vnode *avc, struct buf *bp)
545 code = afs_brelse(avc, bp);
552 mp_afs_inactive(register struct vnode *avc, struct AFS_UCRED *acred)
557 code = afs_inactive(avc, acred);
563 mp_afs_lockctl(struct vnode *avc, struct flock *af, int cmd,
564 struct AFS_UCRED *acred, struct file *unused1, off_t unused2,
570 code = afs_lockctl(avc, af, cmd, acred);
576 mp_afs_fid(struct vnode *avc, struct fid **fidpp)
581 code = afs_fid(avc, fidpp);
587 mp_afs_readdir2(register struct vnode *avc, struct uio *auio,
588 struct AFS_UCRED *acred)
593 code = afs_readdir2(avc, auio, acred);
599 struct vnodeops Afs_vnodeops = {
622 #if !defined(AFS_NONFSTRANS)
623 /* on HPUX102 the nfs translator calls afs_bread but does
624 * not call afs_brelse. Hence we see a memory leak. If the
625 * VOP_BREAD() call fails, then nfs does VOP_RDWR() to get
626 * the same data : this is the path we follow now. */
633 afs_badop, /* pathsend */
634 afs_noop, /* setacl */
635 afs_noop, /* getacl */
639 afs_lockf, /* lockf */
662 struct vnodeops *afs_ops = &Afs_vnodeops;
664 /* vnode file operations, and our own */
666 extern int vno_ioctl();
667 extern int vno_select();
668 extern int afs_closex();
669 extern int vno_close();
670 struct fileops afs_fileops = {
677 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
680 ********************************************************************
682 **** afspgin_setup_io_ranges ()
683 **** similar to: nfspgin_setup_io_ranges ()
684 ********************************************************************
687 afspgin_setup_io_ranges(vfspage_t * vm_info, pgcnt_t bpages, k_off_t isize,
690 pgcnt_t file_offset = VM_FILE_OFFSET(vm_info);
691 pgcnt_t minpage; /* first page to bring in */
692 pgcnt_t maxpage; /* one past last page to bring in */
694 pgcnt_t multio_maxpage;
697 expnd_flags_t up_reason, down_reason;
704 VM_GET_IO_INFO(vm_info, maxpagein, max_num_io);
707 * We do not go past the end of the current pregion nor past the end
708 * of the current file.
711 maxpage = startindex + (bpages - (startindex + file_offset) % bpages);
712 maxpage = vm_reset_maxpage(vm_info, maxpage);
713 maxpage = MIN(maxpage, (pgcnt_t) btorp(isize) - file_offset);
714 maxpage = MIN(maxpage, startindex + maxpagein);
715 multio_maxpage = maxpage = vm_maxpage(vm_info, maxpage);
720 VASSERT(maxpage >= startindex);
723 * Expanding the fault will create calls to FINDENTRY() for new
724 * pages, which will obsolete "dbd", so copy what it points to
725 * and clear it to prevent using stale data.
728 prp = VM_PRP(vm_info);
729 dbdtype = DBD_TYPE(vm_info);
730 start_blk = DBD_DATA(vm_info);
733 VASSERT(dbdtype != DBD_NONE);
735 if (max_num_io == 1) {
737 * We need to set up one I/O: First we attempt to expand the
738 * I/O forward. Then we expand the I/O backwards.
741 expand_faultin_up(vm_info, dbdtype, (int)bpages, maxpage, count,
742 startindex, start_blk, &up_reason);
743 maxpage = startindex + count;
744 VASSERT(maxpage <= startindex + maxpagein);
745 minpage = startindex - (startindex + file_offset) % bpages;
746 minpage = MAX(minpage, maxpage - maxpagein);
747 VASSERT(startindex >= VM_BASE_OFFSET(vm_info));
748 minpage = vm_minpage(vm_info, minpage);
749 VASSERT(minpage <= startindex);
751 expand_faultin_down(vm_info, dbdtype, (int)bpages, minpage, count,
752 &startindex, &start_blk, &down_reason);
753 VM_SET_IO_STARTINDX(vm_info, 0, startindex);
754 VM_SET_IO_STARTBLK(vm_info, 0, start_blk);
755 VM_SET_IO_COUNT(vm_info, 0, count);
756 VM_SET_NUM_IO(vm_info, 1);
759 if (max_num_io > 1) {
761 * We need to set up multiple I/O information; beginning
762 * with the startindex, we will expand upwards. The expansion
763 * could stop for one of 2 reasons; we take the appropriate
764 * action in each of these cases:
765 * o VM reasons: abort setting up the multiple I/O
766 * information and return to our caller indicating
767 * that "retry" is required.
768 * o pagelimit: set up the next I/O info [we may have
769 * reached multio_maxpage at this point].
770 * Note that expansion involves no more than a block at a time;
771 * hence it could never stop due to "discontiguous block"
774 startindex = minpage = vm_minpage(vm_info, 0);
775 for (indx = 0; (indx < max_num_io) && (startindex < multio_maxpage);
776 indx++, startindex += count) {
777 dbd = FINDDBD(prp->p_reg, startindex);
778 start_blk = dbd->dbd_data;
780 startindex + (bpages - (startindex + file_offset) % bpages);
781 maxpage = min(maxpage, multio_maxpage);
783 expand_faultin_up(vm_info, dbdtype, bpages, maxpage,
785 startindex, start_blk, &up_reason);
786 VM_SET_IO_STARTINDX(vm_info, indx, startindex);
787 VM_SET_IO_STARTBLK(vm_info, indx, start_blk);
788 VM_SET_IO_COUNT(vm_info, indx, count);
789 if (up_reason & VM_REASONS)
791 VASSERT(!(up_reason & NONCONTIGUOUS_BLOCK));
792 VASSERT(up_reason & PAGELIMIT);
794 if (startindex < multio_maxpage) {
795 VM_MULT_IO_FAILURE(vm_info);
796 VM_REINIT_FAULT_DBDVFD(vm_info);
797 return (0); /* retry */
800 VM_SET_NUM_IO(vm_info, indx);
804 * Tell VM where the I/O intends to start. This may be different
805 * from the faulting point.
808 VM_SET_STARTINDX(vm_info, VM_GET_IO_STARTINDX(vm_info, 0));
815 ********************************************************************
817 **** afspgin_blkflsh ()
818 **** similar to: nfspgin_blkflsh ()
819 ********************************************************************
822 afspgin_blkflsh(vfspage_t * vm_info, struct vnode * devvp, pgcnt_t * num_4k)
825 pgcnt_t count = *num_4k;
828 int num_io = VM_GET_NUM_IO(vm_info);
831 * On this blkflush() we don't want to purge the buffer cache and we do
832 * want to wait, so the flags are '0'.
835 for (indx = 0; indx < num_io; indx++) {
837 blkflush(devvp, (daddr_t) VM_GET_IO_STARTBLK(vm_info, indx),
838 ptob(VM_GET_IO_COUNT(vm_info, indx)), 0,
842 if (vm_page_now_valid(vm_info, &page_count)) {
843 vm_release_memory(vm_info);
844 vm_release_structs(vm_info);
845 *num_4k = page_count;
846 return (VM_PAGE_PRESENT);
855 ********************************************************************
858 **** similar to: nfspgin_io ()
859 ********************************************************************
862 afspgin_io(vfspage_t * vm_info, struct vnode *devvp, pgcnt_t bpages,
863 pgcnt_t maxpagein, pgcnt_t count)
867 caddr_t vaddr = VM_ADDR(vm_info);
868 caddr_t virt_addr = VM_MAPPED_ADDR(vm_info);
869 pagein_info_t *io = VM_PAGEIN_INFO(vm_info);
870 preg_t *prp = VM_PRP(vm_info);
871 int wrt = VM_WRT(vm_info);
872 space_t space = VM_SPACE(vm_info);
873 int num_io = VM_GET_NUM_IO(vm_info);
875 #ifdef notdef /* Not used in AFS */
877 * With VM_READ_AHEAD_ALLOWED() macro, check if read-ahead should
878 * be used in this case.
880 * Unlike UFS, NFS does not start the faulting page I/O
881 * asynchronously. Why? Asynchronous requests are handled by the
882 * biod's. It doesn't make sense to queue up the faulting request
883 * behind other asynchrnous requests. This is not true for UFS
884 * where the asynchrnous request is immediately handled.
887 if ((VM_READ_AHEAD_ALLOWED(vm_info)) && (nfs_read_ahead_on)
888 && (NFS_DO_READ_AHEAD) && (should_do_read_ahead(prp, vaddr))) {
890 pgcnt_t max_rhead_io;
892 pgcnt_t total_rheads_allowed;
895 * Determine the maximum amount of read-ahead I/O.
897 total_rheads_allowed = maxpagein - count;
900 * If the count is less than a block, raise it to one.
902 if (total_rheads_allowed < bpages)
903 total_rheads_allowed = bpages;
905 max_rhead_io = total_rheads_allowed;
906 rhead_vaddr = VM_MAPPED_ADDR(vm_info) + (count * NBPG);
908 nfs_read_ahead(vm_info->vp, prp, wrt, space, rhead_vaddr,
912 * Set the next fault location. If read_ahead launches any
913 * I/O it will adjust it accordingly.
915 vm_info->prp->p_nextfault = vm_info->startindex + count;
918 * Now perform the faulting I/O synchronously.
923 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, 0),
924 VM_MAPPED_SPACE(vm_info), VM_MAPPED_ADDR(vm_info),
925 (int)ptob(count), B_READ, devvp,
926 B_vfs_pagein | B_pagebf, VM_REGION(vm_info));
930 virt_addr = VM_MAPPED_ADDR(vm_info);
932 for (i = 0; i < num_io; i++) {
934 * REVISIT -- investigate doing asyncpageio().
936 error |= (io[i].error =
937 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, i),
938 VM_MAPPED_SPACE(vm_info), virt_addr,
939 (int)ptob(VM_GET_IO_COUNT(vm_info, i)),
940 B_READ, devvp, B_vfs_pagein | B_pagebf,
941 VM_REGION(vm_info)));
942 virt_addr += ptob(VM_GET_IO_COUNT(vm_info, i));
945 * Set the next fault location. If read_ahead launches any
946 * I/O it will adjust it accordingly.
948 vm_info->prp->p_nextfault = vm_info->startindex + count;
955 ********************************************************************
957 **** afspgin_update_dbd ()
958 **** similar to: nfspgin_update_dbd ()
959 ********************************************************************
962 afspgin_update_dbd(vfspage_t * vm_info, int bsize)
965 pgcnt_t count = bsize / NBPG;
970 int num_io = VM_GET_NUM_IO(vm_info);
973 for (i = 0; i < num_io; i++) {
975 pgindx = VM_GET_IO_STARTINDX(vm_info, i);
976 off = vnodindx(VM_REGION(vm_info), pgindx);
978 blkno = VM_GET_IO_STARTBLK(vm_info, i);
980 VASSERT(bsize % NBPG == 0);
981 VASSERT(rem % NBPG == 0);
983 pgindx -= (pgcnt_t) btop(rem);
984 blkno -= (daddr_t) btodb(rem);
987 * This region could start in mid-block. If so, pgindx
988 * could be less than 0, so we adjust pgindx and blkno back
989 * up so that pgindx is 0.
997 blkno += btodb(ptob(prem));
1000 for (m = 0; m < count && pgindx < VM_REGION_SIZE(vm_info);
1001 m++, pgindx++, blkno += btodb(NBPG)) {
1003 * Note: since this only changes one block, it
1004 * assumes only one block was faulted in. Currently
1005 * this is always true for remote files, and we only
1006 * get here for remote files, so everything is ok.
1008 vm_mark_dbd(vm_info, pgindx, blkno);
1014 afs_pagein(vp, prp, wrt, space, vaddr, ret_startindex)
1020 pgcnt_t *ret_startindex;
1023 pgcnt_t pgindx = *ret_startindex;
1025 struct vnode *devvp;
1027 daddr_t start_blk = 0;
1031 int shared; /* writable memory mapped file */
1032 retval_t retval = 0;
1033 pgcnt_t ok_dbd_limit = 0; /* last dbd that we can trust */
1034 pgcnt_t bpages; /* number of pages per block */
1036 vfspage_t *vm_info = NULL;
1043 int change_to_fstore = 0; /* need to change dbds to DBD_FSTORE */
1044 int flush_start_blk = 0;
1045 int flush_end_blk = 0;
1049 AFS_STATCNT(afs_pagein);
1050 vmemp_lockx(); /* lock down VM empire */
1052 /* Initialize the VM info structure */
1054 vm_pagein_init(&vm_info, prp, pgindx, space, vaddr, wrt, 0,
1057 /* Check to see if we slept and the page was falted in. */
1059 vm_release_structs(vm_info);
1063 vp = VM_GET_PAGEIN_VNODE(vm_info);
1064 VASSERT(vp != NULL);
1065 shared = VM_SHARED_OBJECT(vm_info);
1066 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1069 * Get the devvp and block size for this vnode type
1072 bsize = vp->v_vfsp->vfs_bsize;
1073 if (bsize <= 0 || (bsize & (DEV_BSIZE - 1)))
1074 osi_Panic("afs_pagein: bsize is zero or not a multiple of DEV_BSIZE");
1076 bpages = (pgcnt_t) btop(bsize);
1077 VASSERT(bpages > 0);
1078 VM_SET_FS_MAX_PAGES(vm_info, bpages);
1080 /* this trace cannot be here because the afs_global lock might not be
1081 * held at this point. We hold the vm global lock throughout
1082 * this procedure ( and not the AFS global lock )
1083 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEIN, ICL_TYPE_POINTER, (afs_int32) vp,
1084 * ICL_TYPE_LONG, DBD_TYPE(vm_info), ICL_TYPE_LONG, bpages,
1085 * ICL_TYPE_LONG, shared);
1087 /* Come here if we have to release the region lock before
1088 * locking pages. This can happen in memreserve() and
1093 * For remote files like ours, we want to check to see if the file has shrunk.
1094 * If so, we should invalidate any pages past the end. In the name
1095 * of efficiency, we only do this if the page we want to fault is
1096 * past the end of the file.
1099 if (VOP_GETATTR(vp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1100 VM_ZOMBIE_OBJECT(vm_info);
1101 vm_release_memory(vm_info);
1102 vm_release_structs(vm_info);
1106 if (vnodindx(VM_REGION(vm_info), pgindx) >= isize) {
1108 * The file has shrunk and someone is trying to access a
1109 * page past the end of the object. Shrink the object back
1110 * to its currrent size, send a SIGBUS to the faulting
1111 * process and return.
1113 * We must release the region lock before calling mtrunc(),
1114 * since mtrunc() locks all the regions that are using this
1117 vm_release_memory(vm_info);
1118 vm_truncate_region(vm_info, isize);
1119 vm_release_structs(vm_info);
1120 vmemp_returnx(-SIGBUS);
1124 maxpagein = vm_pick_maxpagein(vm_info);
1125 if (vm_wait_for_memory(vm_info, maxpagein, 1)) {
1126 /* Check to see if we should continue faulting. */
1127 if (vm_page_now_valid(vm_info, &page_count)) {
1128 vm_release_memory(vm_info);
1129 vm_release_structs(vm_info);
1130 vmemp_returnx(page_count);
1133 if (count = vm_no_io_required(vm_info)) {
1134 /* Release any excess memory. */
1135 vm_release_memory(vm_info);
1136 vm_release_structs(vm_info);
1137 vmemp_returnx(count);
1141 * We should never have DBD_HOLE pages in a non-MMF region.
1144 VASSERT(dbd->dbd_type != DBD_HOLE);
1146 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1148 startindex = *ret_startindex;
1151 * If the page we want is in memory already, take it
1153 if (VM_MEMORY_RESERVED(vm_info) < maxpagein) {
1154 /* pick up the rest of memory now. */
1155 if (vm_wait_for_memory(vm_info, maxpagein, 0)) {
1156 if (vm_page_now_valid(vm_info, &page_count)) {
1157 vm_release_memory(vm_info);
1158 vm_release_structs(vm_info);
1159 vmemp_returnx(page_count);
1167 afspgin_setup_io_ranges(vm_info, bpages, isize, startindex))) {
1171 startindex = VM_GET_STARTINDX(vm_info);
1173 VASSERT(maxpagein >= count);
1176 * Release the memory we won't need.
1178 if (count < maxpagein) {
1179 vm_release_excess_memory(vm_info,
1180 (VM_MEMORY_RESERVED(vm_info) - count));
1183 retval = afspgin_blkflsh(vm_info, devvp, &count);
1185 if (retval == VM_RETRY) {
1189 if (retval == VM_PAGE_PRESENT)
1194 * The definition of krusage_cntr_t is in h/kmetric.h, which
1195 * is not shipped. Since it's just statistics, we punt and do
1196 * not update it. If it's a problem we'll need to get HP to export
1197 * an interface that we can use to increment the counter.
1200 /* It's a real fault, not a reclaim */
1202 krusage_cntr_t *temp;
1203 temp = kt_cntrp(u.u_kthreadp);
1209 * Tell VM where the I/O intends to start. This may be different
1210 * from the faulting point.
1214 * vm_prepare_io will fill the region with pages and release the
1217 vm_prepare_io(vm_info, &count);
1220 * Count may have been adjusted, check to make sure it's non-zero.
1223 if (vm_retry(vm_info)) {
1228 * Release resources and retry the fault. Release any excess
1232 vm_release_memory(vm_info);
1233 vm_release_structs(vm_info);
1237 error = afspgin_io(vm_info, devvp, bpages, maxpagein, count);
1239 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1241 VM_ZOMBIE_OBJECT(vm_info);
1245 * For a writable memory mapped file that is remote we must
1246 * detect potential holes in the file and force allocation of
1247 * disk space on the remote system. Unfortunately, there is
1248 * no easy way to do this, so this gets a little ugly.
1250 if (shared && wrt) {
1252 * See if The user wants to write to this page. Write some
1253 * minimal amount of data back to the remote file to
1254 * force allocation of file space. We only need to
1255 * write a small amount, since holes are always at
1256 * least one filesystem block in size.
1258 error = vm_alloc_hole(vm_info);
1261 * If some sort of I/O error occurred we generate a
1262 * SIGBUS for the process that caused the write,
1263 * undo our page locks, etc and return.
1265 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1266 VM_ZOMBIE_OBJECT(vm_info);
1272 * Change these dbds to DBD_FSTORE. We cannot do it here,
1273 * since the region must be locked, and it is not locked
1274 * at the moment. We cannot lock the region yet, as we
1275 * first have to release the page locks.
1277 change_to_fstore = 1;
1280 vm_finish_io(vm_info, count);
1283 * Acquire the lock before we play around with changing the vfd's.
1287 if (change_to_fstore)
1288 afspgin_update_dbd(vm_info, bsize);
1290 #if defined(AFS_HPUX110_ENV)
1291 getppdp()->cnt.v_exfod += count;
1293 mpproc_info[getprocindex()].cnt.v_exfod += count;
1295 vmemp_unlockx(); /* free up VM empire */
1296 *ret_startindex = startindex;
1299 * In case we have any excess memory...
1301 if (VM_MEMORY_RESERVED(vm_info))
1302 vm_release_memory(vm_info);
1303 vm_release_structs(vm_info);
1309 vm_finish_io_failed(vm_info, count);
1313 vm_undo_validation(vm_info, count);
1316 * In case we have any excess memory...
1318 if (VM_MEMORY_RESERVED(vm_info))
1319 vm_release_memory(vm_info);
1320 vm_release_structs(vm_info);
1322 vmemp_unlockx(); /* free up VM empire */
1327 afs_pageout(vp, prp, start, end, flags)
1328 struct vnode *vp; /* not used */
1334 struct vnode *filevp;
1335 struct vnode *devvp;
1340 int *piocnt; /* wakeup counter used if PAGEOUT_WAIT */
1341 struct ucred *old_cred;
1345 int inode_changed = 0;
1349 AFS_STATCNT(afs_pageout);
1351 steal = (flags & PAGEOUT_FREE);
1352 vhand = (flags & PAGEOUT_VHAND);
1353 hard = (flags & PAGEOUT_HARD);
1357 /* Initialize the VM info structure. */
1358 vm_pageout_init(&vm_info, prp, start, end, 0, 0, 0, flags);
1361 * If the region is marked "don't swap", then don't steal any pages
1362 * from it. We can, however, write dirty pages out to disk (only if
1363 * PAGEOUT_FREE is not set).
1365 if (vm_no_pageout(&vm_info)) {
1371 * If caller wants to wait until the I/O is complete.
1373 vm_setup_wait_for_io(&vm_info);
1375 filevp = VM_GET_PAGEOUT_VNODE(&vm_info); /* always page out to back store */
1376 VASSERT(filevp != NULL);
1378 memset((caddr_t) & args, 0, sizeof(fsdata_t));
1379 args.remote_down = 0; /* assume remote file servers are up */
1380 args.remote = 1; /* we are remote */
1381 args.bsize = 0; /* filled up later by afs_vm_checkpage() */
1383 if (filevp->v_fstype == VUFS) {
1385 devvp = ip->i_devvp;
1392 * If we are vhand(), and this is an NFS file, we need to
1393 * see if the NFS server is "down". If so, we decide
1394 * if we will try to talk to it again, or defer pageouts
1395 * of dirty NFS pages until a future time.
1398 if (vhand && filevp->v_fstype == VNFS && vtomi(filevp)->mi_down
1399 && vtomi(filevp)->mi_hard) {
1400 extern afs_int32 vhand_nfs_retry;
1402 * If there is still time left on our timer, we will
1403 * not talk to this server right now.
1405 if (vhand_nfs_retry > 0)
1406 args.remote_down = 1;
1412 * Initialize args. We set bsize to 0 to tell vfs_vfdcheck() that
1413 * it must get the file size and other attributes if it comes across
1416 vm_info.fs_data = (caddr_t) & args;
1418 /* this trace cannot be here because the afs_global lock might not be
1419 * held at this point. We hold the vm global lock throughout
1420 * this procedure ( and not the AFS global lock )
1421 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEOUT, ICL_TYPE_POINTER, (afs_int32) filevp,
1422 * ICL_TYPE_LONG, start, ICL_TYPE_LONG, end, ICL_TYPE_LONG, flags);
1434 extern int pageiodone();
1439 * Ask the VM system to find the next run of pages.
1441 vm_find_next_range(&vm_info, i, end);
1444 * It's possible that the remote file shrunk in size. Check the flags
1445 * to see if the request was beyond the end of the file. If it was,
1446 * truncate the region to the file size and continue. We could be on a
1447 * run so after trunction continue, there may be some I/O to write
1450 if (VM_FS_FLAGS(&vm_info) & PAGEOUT_TRUNCATE) {
1451 pgcnt_t pglen = (pgcnt_t) btorp(args.isize);
1454 * This page is past the end of the file. Unlock this page
1455 * (region_trunc will throw it away) and then call
1456 * region_trunc() to invalidate all pages past the new end of
1459 region_trunc(VM_REGION(&vm_info), pglen, pglen + 1);
1462 * remove the truncation flag.
1464 VM_UNSETFS_FLAGS(&vm_info, PAGEOUT_TRUNCATE);
1467 if (VM_NO_PAGEOUT_RUN(&vm_info))
1471 * We have a run of dirty pages [args.start...args.end].
1473 VASSERT(filevp->v_fstype != VCDFS);
1474 VASSERT((filevp->v_vfsp->vfs_flag & VFS_RDONLY) == 0);
1475 VASSERT(VM_GET_NUM_IO(&vm_info) == 1);
1478 * We will be doing an I/O on the region, let the VM system know.
1480 (void)vm_up_physio_count(&vm_info);
1483 * Okay, get set to perform the I/O.
1487 (VM_END_PAGEOUT_INDX(&vm_info) + 1) -
1488 VM_START_PAGEOUT_INDX(&vm_info);
1491 * Allocate and initialize an I/O buffer.
1494 vm_init_bp(&vm_info, bp); /* Let the VM system initialize */
1496 /* Identify this buffer for KI */
1497 bp->b_bptype = B_vfs_pageout | B_pagebf;
1500 bp->b_flags = B_CALL | B_BUSY | B_PAGEOUT; /* steal pages */
1502 bp->b_flags = B_CALL | B_BUSY; /* keep pages */
1505 * If we are vhand paging over NFS, we will wait for the I/O
1508 if (vhand && filevp->v_fstype == VNFS) {
1509 bp->b_flags &= ~B_CALL;
1511 bp->b_iodone = (int (*)())pageiodone;
1515 * Make sure we do not write past the end of the file.
1517 nbytes = ptob(npages);
1518 start = vnodindx(VM_REGION(&vm_info), vm_info.start);
1519 if (start + nbytes > args.isize) {
1522 * The amount we are off better not be bigger than a
1525 if (start + nbytes - args.isize >= args.bsize) {
1526 osi_Panic("afs_pageout: remainder too large");
1530 * Reset the size of the I/O as necessary. For remote
1531 * files, we set the size to the exact number of bytes to
1532 * the end of the file. For local files, we round this up
1533 * to the nearest DEV_BSIZE chunk since disk I/O must always
1534 * be in multiples of DEV_BSIZE. In this case, we do not
1535 * bother to zero out the data past the "real" end of the
1536 * file, this is done when the data is read (either through
1537 * mmap() or by normal file system access).
1540 nbytes = args.isize - start;
1542 nbytes = roundup(args.isize - start, DEV_BSIZE);
1546 * Now get ready to perform the I/O
1548 if (!vm_protect_pageout(&vm_info, npages)) {
1550 vm_undo_invalidation(&vm_info, vm_info.start, vm_info.end);
1551 vm_finish_io_failed(&vm_info, npages);
1556 * If this is an NFS write by vhand(), we will not be calling
1557 * pageiodone(). asyncpageio() increments parolemem for us
1558 * if bp->b_iodone is pageiodone, so we must do it manually
1559 * if pageiodone() will not be called automatically.
1561 if (!(bp->b_flags & B_CALL) && steal) {
1562 register ulong_t context;
1564 SPINLOCK_USAV(pfdat_lock, context);
1565 parolemem += btorp(nbytes);
1566 SPINUNLOCK_USAV(pfdat_lock, context);
1568 blkflush(devvp, VM_START_PAGEOUT_BLK(&vm_info), (long)nbytes,
1569 (BX_NOBUFWAIT | BX_PURGE), VM_REGION(&vm_info));
1572 * If vhand is the one paging things out, and this is an NFS
1573 * file, we need to temporarily become a different user so
1574 * that we are not trying to page over NFS as root. We use
1575 * the user credentials associated with the writable file
1576 * pointer that is in the psuedo-vas for this MMF.
1578 * NOTE: we are currently using "va_rss" to store the ucred
1579 * value in the vas (this should be fixed in 10.0).
1581 old_cred = kt_cred(u.u_kthreadp);
1583 set_kt_cred(u.u_kthreadp, filevp->v_vas->va_cred);
1586 * If root was the one who opened the mmf for write,
1587 * va_cred will be NULL. So reset kt_cred(u.u_kthreadp) to what it
1588 * was. We will page out as root, but that is the
1589 * correct thing to do in this case anyway.
1591 if (kt_cred(u.u_kthreadp) == NULL)
1592 set_kt_cred(u.u_kthreadp, old_cred);
1596 * Really do the I/O.
1599 asyncpageio(bp, VM_START_PAGEOUT_BLK(&vm_info),
1600 VM_MAPPED_SPACE(&vm_info), VM_MAPPED_ADDR(&vm_info),
1601 (int)nbytes, B_WRITE, devvp);
1603 VASSERT(error == 0);
1607 * If we are vhand paging over NFS we want to wait for the
1608 * I/O to complete and take the appropriate actions if an
1609 * error is encountered.
1612 if (waitforpageio(bp) && nfs_mi_harddown(filevp)) {
1614 * The server is down, ignore this failure, and
1615 * try again later. (rfscall() has set our retry
1618 fsdata.remote_down = 1;
1619 pageiocleanup(bp, 0);
1622 * vm_vfdcheck() has cleared the valid bit on the
1623 * vfds for these pages. We must go back and set the
1624 * valid bit, as the pages are really not gone.
1626 * NOTE: we can do this because we still hold (and have
1627 * not released) the region lock.
1630 vm_undo_invalidation(&vm_info, vm_info.start,
1634 * The I/O succeeded, or we had an error that we do
1635 * not want to defer until later. Call pageidone()
1644 * And restore our credentials to what they were.
1646 set_kt_cred(u.u_kthreadp, old_cred);
1649 * If we reserved memory in vfs_vfdcheck(), (only for NFS) we
1650 * can now unreserve it.
1652 if (vm_info.vm_flags & PAGEOUT_RESERVED) {
1653 vm_info.vm_flags &= ~PAGEOUT_RESERVED;
1654 vm_release_malloc_memory();
1661 if (flags & PF_DEACT) {
1662 #if defined(AFS_HPUX110_ENV)
1663 getppdp()->cnt.v_pswpout += npages;
1665 mpproc_info[getprocindex()].cnt.v_pswpout += npages;
1667 /* sar_bswapout += ptod(npages);*/
1669 #if defined(AFS_HPUX110_ENV)
1670 getppdp()->cnt.v_pgout++;
1671 getppdp()->cnt.v_pgpgout += npages;
1673 mpproc_info[getprocindex()].cnt.v_pgout++;
1674 mpproc_info[getprocindex()].cnt.v_pgpgout += npages;
1680 * If time and patience have delivered enough
1681 * pages, then quit now while we are ahead.
1683 if (VM_STOP_PAGING(&vm_info))
1686 i = VM_END_PAGEOUT_INDX(&vm_info) - VM_BASE_OFFSET(&vm_info) + 1;
1689 vm_finish_pageout(&vm_info); /* update vhand's stealscan */
1694 * If we wanted to wait for the I/O to complete, sleep on piocnt.
1695 * We must decrement it by one first, and then make sure that it
1696 * is non-zero before going to sleep.
1698 vm_wait_for_io(&vm_info);
1700 if (inode_changed && !file_is_remote) {
1701 imark(ip, IUPD | ICHG);
1708 afs_mapdbd(filevp, offset, bn, flags, hole, startidx, endidx)
1709 struct vnode *filevp;
1711 daddr_t *bn; /* Block number. */
1712 int flags; /* B_READ or B_WRITE */
1713 int *hole; /* To be used for read-ahead. */
1714 pgcnt_t *startidx; /* To be used for read-ahead. */
1715 pgcnt_t *endidx; /* To be used for read-ahead. */
1717 daddr_t lbn, local_bn;
1720 long bsize = vtoblksz(filevp) & ~(DEV_BSIZE - 1);
1723 *startidx = (pgcnt_t) (offset / NBPG);
1725 *endidx = (pgcnt_t) (offset / NBPG);
1727 *hole = 0; /* Can't have holes. */
1729 osi_Panic("afs_mapdbd: zero size");
1731 lbn = (daddr_t) (offset / bsize);
1732 on = offset % bsize;
1734 err = VOP_BMAP(filevp, lbn, NULL, &local_bn, flags);
1738 * We can never get a bn less than zero on remote files.
1740 VASSERT(local_bn >= 0);
1742 local_bn = local_bn + btodb(on);
1750 * 1: The blocks are contiguous.
1751 * 0: The blocks are not contiguous.
1754 afs_vm_fscontiguous(vp, args, cur_data)
1759 if (cur_data == (VM_END_PAGEOUT_BLK(args) + btodb(NBPG))) {
1768 * 1: Stop, this page is the last in the block.
1770 * Terminate requests at filesystem block boundaries
1772 afs_vm_stopio(vp, args)
1776 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1778 if ((dbtob(VM_END_PAGEOUT_BLK(args)) + NBPG) % (fsdata->bsize) == 0) {
1786 * afs_vm_checkpage is called by the VM while collecting a run of
1787 * pages on a pageout. afs_vm_checkpage() is called for each page
1788 * VM wants to write to disk.
1790 afs_vm_checkpage(vp, args, pgindx, cur_data)
1796 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1798 if (fsdata->remote_down) { /* never happens for AFS */
1800 * The remote system is down.
1802 VASSERT(args->run == 0);
1806 * A dirty page. If we have not yet determined the file size and
1807 * other attributes that we need to write out pages (the block
1808 * size and ok_dbd_limit), get that information now.
1810 if (fsdata->bsize == 0) {
1814 struct vnode *filevp;
1816 * Get the various attributes about the file. Store them
1817 * in args for the next time around.
1821 bsize = vtoblksz(filevp);
1822 args->maxpgs = (pgcnt_t) btop(bsize);
1824 if (VOP_GETATTR(filevp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1826 * The VOP_GETATTR() failed.
1827 * we are vhand, and this is a hard mount, we will
1828 * skip dirty pages for a while and try again later.
1830 if (args->vm_flags & PAGEOUT_VHAND) {
1831 VASSERT(args->run == 0);
1835 * This is a "soft" mount, or some other error was
1836 * returned from the server. Mark this region
1837 * as a zombie, and free this dirty page.
1839 VM_ZOMBIE_OBJECT(args);
1842 * The caller will see r_zomb and remove the page
1848 fsdata->isize = isize;
1849 fsdata->bsize = bsize;
1853 * See if the file has shrunk (this could have happened
1854 * asynchronously because of NFS or DUX). If so, invalidate
1855 * all of the pages past the end of the file. This is only
1856 * needed for remote files, as local files are truncated
1860 if (vnodindx(VM_REGION(args), pgindx) > fsdata->isize) {
1862 * This page is past the end of the file. Unlock this page
1863 * (region_trunc will throw it away) and then call region_trunc()
1864 * to invalidate all pages past the new end of the file.
1866 VM_SETFS_FLAGS(args, PAGEOUT_TRUNCATE);
1870 if ((args->vm_flags & PAGEOUT_VHAND)
1871 && (!(args->vm_flags & PAGEOUT_RESERVED))
1872 && (!(VM_IS_ZOMBIE(args)))) {
1873 VASSERT(args->run == 0);
1874 if (vm_reserve_malloc_memory(NFS_PAGEOUT_MEM)) {
1876 * Got enough memory to pageout. Mark the fact that we did
1877 * a sysprocmemreserve(), so that we can sysprocmemunreserve() it
1878 * later (in remote_pageout()).
1880 args->vm_flags |= PAGEOUT_RESERVED;
1883 * We do not have enough memory to do this pageout. By
1884 * definition, we do not yet have a run, so we just unlock
1885 * this page and tell foreach_valid() to continue scanning.
1886 * If we come across another dirty page, we will try to
1887 * reserve memory again. That is okay, in fact some memory
1888 * may have freed up (as earlier pageouts complete under
1905 fs_bsize = vtoblksz(bp->b_vp);
1907 * Check to see if we are starting mid block. If so, then
1908 * we must return the remainder of the block or less depending
1911 bnrem = bp->b_offset % fs_bsize;
1913 max_size = fs_bsize - bnrem;
1915 max_size = fs_bsize;
1918 if (bp->b_bcount > max_size) {
1921 return (bp->b_bcount);
1925 afs_mmap(vp, off, size_bytes, access)
1928 #if defined(AFS_HPUX1111_ENV)
1935 long bsize = vtoblksz(vp);
1937 if (bsize % NBPG != 0) {
1944 afs_cachelimit(vp, len, location)
1950 * Disk addresses are logical, not physical, so fragments are
1953 *location = btorp(len) + 1;
1963 afs_unmap(vp, off, size_bytes, access)
1966 #if defined(AFS_HPUX1111_ENV)
1977 afs_read_ahead(vp, prp, wrt, space, vaddr, rhead_cnt)
1985 printf("afs_read_ahead returning 0 \n");
1990 afs_prealloc(vp, size, ignore_minfree, reserved)
1992 /* DEE on 11.22 following is off_t */
1997 printf("afs_prealloc returning ENOSPC\n");
2002 afs_ioctl(vp, com, data, flag, cred)
2010 struct afs_ioctl afsioctl, *ai;
2012 AFS_STATCNT(afs_ioctl);
2014 /* The call must be a VICEIOCTL call */
2015 if (((com >> 8) & 0xff) == 'V') {
2017 /* AFS_COPYIN returns error 14. Copy data in instead */
2018 AFS_COPYIN(data, (caddr_t) & afsioctl, sizeof(afsioctl), error);
2022 ai = (struct afs_ioctl *)data;
2023 afsioctl.in = ai->in;
2024 afsioctl.out = ai->out;
2025 afsioctl.in_size = ai->in_size;
2026 afsioctl.out_size = ai->out_size;
2027 error = HandleIoctl(VTOAFS(vp), com, &afsioctl);
2033 #if defined(AFS_HPUX1111_ENV)
2034 /* looks like even if appl is 32 bit, we need to round to 8 bytes */
2035 /* This had no effect, it must not be being used */
2037 #define roundtoint(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2038 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2039 sizeof(u_int) + 2 * sizeof(u_short)))
2042 #define roundtoint(x) (((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
2043 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2044 2 * sizeof(u_short)))
2048 afs_readdir(vp, uiop, cred)
2055 caddr_t ibuf, obuf, ibufend, obufend;
2056 struct __dirent32 *idp;
2058 int count, outcount;
2060 uint64_t tmp_offset;
2062 count = uiop->uio_resid;
2063 /* Allocate temporary space for format conversion */
2064 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2065 obuf = kmem_alloc(count + sizeof(struct dirent));
2066 aiov.iov_base = ibuf;
2067 aiov.iov_len = count;
2068 auio.uio_iov = &aiov;
2069 auio.uio_iovcnt = 1;
2070 offset = auio.uio_offset = uiop->uio_offset;
2071 auio.uio_seg = UIOSEG_KERNEL;
2072 auio.uio_resid = count;
2073 auio.uio_fpflags = 0;
2075 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2079 /* Convert entries from __dirent32 to dirent format */
2081 for (idp = (struct __dirent32 *)ibuf, odp =
2082 (struct dirent *)obuf, ibufend =
2083 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2084 (caddr_t) idp < ibufend;
2085 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2086 (struct dirent *)((caddr_t) odp + odp->d_reclen)) {
2087 odp->d_ino = idp->__d_ino;
2088 odp->d_namlen = idp->__d_namlen;
2089 (void)strcpy(odp->d_name, idp->__d_name);
2090 odp->d_reclen = reclen(odp);
2091 if ((caddr_t) odp + odp->d_reclen > obufend)
2093 /* record offset *after* we're sure to use this entry */
2094 memcpy((char *)&tmp_offset, (char *)&idp->__d_off, sizeof tmp_offset);
2095 offset = tmp_offset;
2098 outcount = (caddr_t) odp - obuf;
2099 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2102 uiop->uio_offset = offset;
2104 kmem_free(ibuf, count);
2105 kmem_free(obuf, count + sizeof(struct dirent));
2110 #define roundtolong(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2111 #define reclen_dirent64(dp) roundtolong(((dp)->__d_namlen + 1 + (2*sizeof(u_long)) +\
2112 2 * sizeof(u_short)))
2115 afs_readdir3(vp, uiop, cred)
2122 caddr_t ibuf, obuf, ibufend, obufend;
2123 struct __dirent32 *idp;
2124 struct __dirent64 *odp;
2125 int count, outcount;
2128 count = uiop->uio_resid;
2129 /* Allocate temporary space for format conversion */
2130 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2131 obuf = kmem_alloc(count + sizeof(struct __dirent64));
2132 aiov.iov_base = ibuf;
2133 aiov.iov_len = count;
2134 auio.uio_iov = &aiov;
2135 auio.uio_iovcnt = 1;
2136 offset = auio.uio_offset = uiop->uio_offset;
2137 auio.uio_seg = UIOSEG_KERNEL;
2138 auio.uio_resid = count;
2139 auio.uio_fpflags = 0;
2141 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2145 /* Convert entries from __dirent32 to __dirent64 format */
2147 for (idp = (struct __dirent32 *)ibuf, odp =
2148 (struct __dirent64 *)obuf, ibufend =
2149 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2150 (caddr_t) idp < ibufend;
2151 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2152 (struct __dirent64 *)((caddr_t) odp + odp->__d_reclen)) {
2153 memcpy((char *)&odp->__d_off, (char *)&idp->__d_off,
2154 sizeof odp->__d_off);
2155 odp->__d_ino = idp->__d_ino;
2156 odp->__d_namlen = idp->__d_namlen;
2157 (void)strcpy(odp->__d_name, idp->__d_name);
2158 odp->__d_reclen = reclen_dirent64(odp);
2159 if ((caddr_t) odp + odp->__d_reclen > obufend)
2161 /* record offset *after* we're sure to use this entry */
2162 offset = odp->__d_off;
2165 outcount = (caddr_t) odp - obuf;
2166 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2169 uiop->uio_offset = offset;
2171 kmem_free(ibuf, count);
2172 kmem_free(obuf, count + sizeof(struct __dirent64));
2176 #define AFS_SV_SEMA_HASH 1
2177 #define AFS_SV_SEMA_HASH_DEBUG 0
2179 #if AFS_SV_SEMA_HASH
2180 /* This portion of the code was originally used to implement
2181 * thread specific storage for the semaphore save area. However,
2182 * there were some spare fields in the proc structure, this is
2183 * now being used for the saving semapores. Hence, this portion of
2184 * the code is no longer used.
2187 /* This portion of the code implements thread specific information.
2188 * The thread id is passed in as the key. The semaphore saved area
2189 * is hashed on this key.
2192 /* why is this hash table required ?
2193 * The AFS code is written in such a way that a GLOCK() is done in
2194 * one function and the GUNLOCK() is done in another function further
2195 * down the call chain. The GLOCK() call has to save the current
2196 * semaphore status before acquiring afs_global_sema. The GUNLOCK
2197 * has to release afs_global_sema and reacquire the sempahore status
2198 * that existed before the corresponding GLOCK. If GLOCK() and
2199 * GUNLOCK() were called in the same function, the GLOCK call could
2200 * have stored the saved sempahore status in a local variable and the
2201 * corresponding GUNLOCK() call could have restored the original
2202 * status from this local variable. But this is not the case with
2203 * AFS code. Hence, we have to implement a thread specific semaphore
2204 * save area. This is implemented as a hash table. The key is the
2208 /* In order for multithreaded processes to work, the sv_sema structures
2209 * must be saved on a per-thread basis, not a per-process basis. There
2210 * is no per-thread storage available to hijack in the OS per-thread
2211 * data structures (e.g. struct user) so we revive this code.
2212 * I removed the upper limit on the memory consumption since we don't
2213 * know how many threads there will be. Now the code first checks the
2214 * freeList. If that fails it then tries garbage collecting. If that
2215 * doesn't free up anything then it allocs what it needs.
2218 #define ELEMENT sv_sema_t
2220 #define Hash(xx) ( (xx) % sizeOfHashTable )
2221 #define hashLockInit(xx) initsema(&xx,1, FILESYS_SEMA_PRI, FILESYS_SEMA_ORDER)
2222 #define hashLock(xx) MP_PSEMA(&xx)
2223 #define hashUnlock(xx) MP_VSEMA(&xx)
2225 typedef struct elem {
2232 typedef struct bucket {
2237 static int sizeOfHashTable;
2238 static Bucket *hashTable;
2240 static int currentSize = 0;
2241 static Element *freeList; /* free list */
2244 static sema_t afsHashLock = { 0 }; /* global lock for hash table */
2246 static void afsHashGarbageCollect();
2249 ** The global lock protects the global data structures,
2250 ** e.g. freeList and currentSize.
2251 ** The bucket lock protects the link list hanging off that bucket.
2252 ** The lock hierarchy : one can obtain the bucket lock while holding
2253 ** the global lock, but not vice versa.
2258 afsHash(int nbuckets)
2259 { /* allocate the hash table */
2262 #if AFS_SV_SEMA_HASH_DEBUG
2263 printf("afsHash: enter\n");
2266 sizeOfHashTable = nbuckets;
2267 currentSize = nbuckets * sizeof(Bucket);
2270 osi_Panic("afs: SEMA Hashtable already created\n");
2272 hashTable = (Bucket *) AFS_KALLOC(sizeOfHashTable * sizeof(Bucket));
2274 osi_Panic("afs: cannot create SEMA Hashtable\n");
2276 /* initialize the hash table and associated locks */
2277 memset((char *)hashTable, 0, sizeOfHashTable * sizeof(Bucket));
2278 for (i = 0; i < sizeOfHashTable; i++)
2279 hashLockInit(hashTable[i].lock);
2280 hashLockInit(afsHashLock);
2282 #if AFS_SV_SEMA_HASH_DEBUG
2283 printf("afsHash: exit\n");
2288 afsHashInsertFind(KEY key)
2293 #if AFS_SV_SEMA_HASH_DEBUG
2294 printf("afsHashInsertFind: %d\n", key);
2297 osi_Panic("afs: afsHashInsertFind: no hashTable\n");
2299 index = Hash(key); /* get bucket number */
2300 hashLock(hashTable[index].lock); /* lock this bucket */
2301 ptr = hashTable[index].element;
2303 /* if it is already there */
2305 if (ptr->key == key) {
2306 ptr->refCnt++; /* hold it */
2307 hashUnlock(hashTable[index].lock);
2308 #if AFS_SV_SEMA_HASH_DEBUG
2309 printf("afsHashInsertFind: %d FOUND\n", key);
2311 return &(ptr->element);
2317 hashUnlock(hashTable[index].lock);
2319 /* if something exists in the freeList, take it from there */
2321 hashLock(afsHashLock);
2324 ptr = freeList; /* reuse entry */
2325 freeList = freeList->next;
2327 afsHashGarbageCollect(); /* afsHashLock locked */
2329 ptr = freeList; /* reuse entry */
2330 freeList = freeList->next;
2332 ptr = (Element *) AFS_KALLOC(sizeof(Element));
2336 currentSize += sizeof(Element); /* update memory used */
2337 hashUnlock(afsHashLock);
2340 osi_Panic("afs: SEMA Hashtable cannot create new entry\n");
2341 /* create new entry */
2343 memset((char *)&ptr->element, 0, sizeof(ptr->element));
2344 ptr->refCnt = 1; /* this guy */
2346 /* insert new entry in bucket */
2347 hashLock(hashTable[index].lock); /* lock this bucket */
2348 ptr->next = hashTable[index].element;
2349 hashTable[index].element = ptr;
2350 hashUnlock(hashTable[index].lock);
2352 #if AFS_SV_SEMA_HASH_DEBUG
2353 printf("afsHashInsertFind: %d MADE\n", key);
2356 return &(ptr->element);
2360 afsHashFind(KEY key)
2365 #if AFS_SV_SEMA_HASH_DEBUG
2366 printf("afsHashFind: %d\n", key);
2369 osi_Panic("afs: afsHashFind: no hashTable\n");
2371 index = Hash(key); /* get bucket number */
2372 hashLock(hashTable[index].lock); /* lock this bucket */
2373 ptr = hashTable[index].element;
2375 /* it should be in the hash table */
2377 if (ptr->key == key) {
2378 if (ptr->refCnt <= 0)
2379 osi_Panic("afs: SEMA HashTable entry already released\n");
2380 hashUnlock(hashTable[index].lock);
2381 #if AFS_SV_SEMA_HASH_DEBUG
2382 printf("afsHashFind: %d FOUND\n", key);
2384 return &(ptr->element);
2390 hashUnlock(hashTable[index].lock);
2391 /* it better be in the hash table */
2392 osi_Panic("afs: SEMA HashTable wants non-existent entry \n");
2397 afsHashRelease(KEY key)
2402 #if AFS_SV_SEMA_HASH_DEBUG
2403 printf("afsHashRelease: %d\n", key);
2406 osi_Panic("afs: afsHashRelease: no hashTable\n");
2408 index = Hash(key); /* get bucket number */
2409 hashLock(hashTable[index].lock); /* lock this bucket */
2410 ptr = hashTable[index].element;
2412 /* it should be in the hash table */
2414 if (ptr->key == key) {
2415 if (ptr->refCnt <= 0)
2416 osi_Panic("afs: SEMA HashTable entry already released\n");
2417 ptr->refCnt--; /* release this guy */
2418 hashUnlock(hashTable[index].lock);
2419 #if AFS_SV_SEMA_HASH_DEBUG
2420 printf("afsHashRelease: %d FOUND\n", key);
2428 hashUnlock(hashTable[index].lock);
2429 /* it better be in the hash table */
2430 osi_Panic("afs: SEMA HashTable deleting non-existent entry \n");
2433 /* this should be called with afsHashLock WRITE locked */
2435 afsHashGarbageCollect()
2442 osi_Panic("afs: afsHashGarbageCollect: no hashTable\n");
2444 for (index = 0; index < sizeOfHashTable; index++) {
2445 hashLock(hashTable[index].lock);
2446 ptr = hashTable[index].element; /* pick up bucket */
2448 while (ptr && !ptr->refCnt) {
2449 /* insert this element into free list */
2452 ptr->next = freeList;
2455 foundFlag = 1; /* found at least one */
2456 currentSize -= sizeof(Element);
2459 hashTable[index].element = ptr;
2461 /* scan thru the remaining list */
2464 if (ptr->next->refCnt == 0) {
2465 /* collect this element */
2468 ptr->next = ptr->next->next;
2469 temp->next = freeList;
2472 currentSize -= sizeof(Element);
2478 hashUnlock(hashTable[index].lock);
2482 osi_Panic("afs: SEMA HashTable full\n");
2486 #endif /* AFS_SV_SEMA_HASH */
2490 register struct buf *bp;
2492 register afs_int32 code;
2494 struct iovec tiovec[1];
2495 extern caddr_t hdl_kmap_bp();
2496 register struct kthread *t = u.u_kthreadp;
2498 AFS_STATCNT(afs_hp_strategy);
2500 * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after
2501 * the I/O. We must save and restore the count because pageiodone()
2502 * uses b_bcount to determine how many pages to unlock.
2504 * Remap the entire range.
2509 afs_Trace4(afs_iclSetp, CM_TRACE_HPSTRAT, ICL_TYPE_POINTER, bp->b_vp,
2510 ICL_TYPE_LONG, (int)bp->b_blkno * DEV_BSIZE, ICL_TYPE_LONG,
2511 bp->b_bcount, ICL_TYPE_LONG, 0);
2513 /* Set up the uio structure */
2514 tuio.afsio_iov = tiovec;
2515 tuio.afsio_iovcnt = 1;
2516 tuio.afsio_offset = DEV_BSIZE * bp->b_blkno;
2517 tuio.afsio_seg = AFS_UIOSYS;
2518 tuio.afsio_resid = bp->b_bcount;
2519 tuio.uio_fpflags = 0;
2520 tiovec[0].iov_base = bp->b_un.b_addr;
2521 tiovec[0].iov_len = bp->b_bcount;
2524 if ((bp->b_flags & B_READ) == B_READ) {
2525 /* read b_bcount bytes into kernel address b_un.b_addr
2526 * starting at byte DEV_BSIZE * b_blkno. Bzero anything
2527 * we can't read, and finally call iodone(bp). File is
2528 * in bp->b_vp. Credentials are from u area??
2530 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_READ, 0, kt_cred(t));
2532 if (tuio.afsio_resid > 0) {
2533 privlbzero(bvtospace(bp, bp->b_un.b_addr),
2534 bp->b_un.b_addr + bp->b_bcount - tuio.afsio_resid,
2535 (size_t) tuio.afsio_resid);
2539 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_WRITE, 0, kt_cred(t));
2541 /* Remap back to the user's space */
2550 afs_pathconf(vp, name, resultp, cred)
2554 struct ucred *cred; /* unused */
2557 case _PC_LINK_MAX: /* Maximum number of links to a file */
2558 *resultp = 255; /* an unsigned short on the fileserver */
2559 break; /* a unsigned char in the client.... */
2561 case _PC_NAME_MAX: /* Max length of file name */
2565 case _PC_PATH_MAX: /* Maximum length of Path Name */
2569 case _PC_PIPE_BUF: /* Max atomic write to pipe. See fifo_vnops */
2570 case _PC_CHOWN_RESTRICTED: /* Anybody can chown? */
2571 case _PC_NO_TRUNC: /* No file name truncation on overflow? */
2572 u.u_error = EOPNOTSUPP;
2573 return (EOPNOTSUPP);
2576 case _PC_MAX_CANON: /* TTY buffer size for canonical input */
2577 /* need more work here for pty, ite buffer size, if differ */
2578 if (vp->v_type != VCHR) {
2582 *resultp = CANBSIZ; /*for tty */
2586 /* need more work here for pty, ite buffer size, if differ */
2587 if (vp->v_type != VCHR) { /* TTY buffer size */
2591 *resultp = TTYHOG; /*for tty */
2595 /* Terminal special characters can be disabled? */
2596 if (vp->v_type != VCHR) {
2604 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2608 *resultp = 1; /* Synchronized IO supported for this file */
2611 case _PC_FILESIZEBITS:
2612 if (vp->v_type != VDIR)
2614 *resultp = MAX_SMALL_FILE_BITS;