2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 * vnodeops structure and Digital Unix specific ops and support routines.
14 #include <afsconfig.h>
15 #include "afs/param.h"
21 #include "afs/sysincludes.h" /* Standard vendor system headers */
22 #include "afsincludes.h" /* Afs-based standard headers */
23 #include "afs/afs_stats.h" /* statistics */
25 #include <vm/vnode_pager.h>
26 #include <vm/vm_map.h>
27 /* #include <vm/vm_ubc.h> */
28 #include "afs/afs_cbqueue.h"
29 #include "afs/nfsclient.h"
30 #include "afs/afs_osidnlc.h"
33 extern int afs_lookup(), afs_create(), afs_noop(), afs_open(), afs_close();
34 extern int afs_access(), afs_getattr(), afs_setattr(), afs_badop();
35 extern int afs_fsync(), afs_seek(), afs_remove(), afs_link(), afs_rename();
36 extern int afs_mkdir(), afs_rmdir(), afs_symlink(), afs_readdir();
37 extern int afs_readlink(), afs_lockctl();
38 extern int vn_pathconf_default(), seltrue();
40 int mp_afs_lookup(), mp_afs_create(), mp_afs_open();
41 int mp_afs_access(), mp_afs_getattr(), mp_afs_setattr(), mp_afs_ubcrdwr();
42 int mp_afs_ubcrdwr(), mp_afs_mmap();
43 int mp_afs_fsync(), mp_afs_seek(), mp_afs_remove(), mp_afs_link();
44 int mp_afs_rename(), mp_afs_mkdir(), mp_afs_rmdir(), mp_afs_symlink();
45 int mp_afs_readdir(), mp_afs_readlink(), mp_afs_abortop(), mp_afs_inactive();
46 int mp_afs_reclaim(), mp_afs_bmap(), mp_afs_strategy(), mp_afs_print();
47 int mp_afs_page_read(), mp_afs_page_write(), mp_afs_swap(), mp_afs_bread();
48 int mp_afs_brelse(), mp_afs_lockctl(), mp_afs_syncdata(), mp_afs_close();
53 struct vnodeops Afs_vnodeops = {
56 afs_noop, /* vn_mknod */
64 afs_badop, /* vn_ioctl */
65 seltrue, /* vn_select */
91 afs_noop, /* unLock */
92 afs_noop, /* get ext attrs */
93 afs_noop, /* set ext attrs */
94 afs_noop, /* del ext attrs */
97 struct vnodeops *afs_ops = &Afs_vnodeops;
100 /* vnode file operations, and our own */
101 extern int vn_read();
102 extern int vn_write();
103 extern int vn_ioctl();
104 extern int vn_select();
105 extern int afs_closex();
107 struct fileops afs_fileops = {
116 mp_afs_lookup(adp, ndp)
118 struct nameidata *ndp;
122 code = afs_lookup(adp, ndp);
127 mp_afs_create(ndp, attrs)
128 struct nameidata *ndp;
133 code = afs_create(ndp, attrs);
138 mp_afs_open(avcp, aflags, acred)
139 struct vcache **avcp;
141 struct AFS_UCRED *acred;
145 code = afs_open(avcp, aflags, acred);
150 mp_afs_access(avc, amode, acred)
153 struct AFS_UCRED *acred;
157 code = afs_access(avc, amode, acred);
162 mp_afs_close(avc, flags, cred)
169 code = afs_close(avc, flags, cred);
174 mp_afs_getattr(avc, attrs, acred)
177 struct AFS_UCRED *acred;
181 code = afs_getattr(avc, attrs, acred);
186 mp_afs_setattr(avc, attrs, acred)
189 struct AFS_UCRED *acred;
193 code = afs_setattr(avc, attrs, acred);
198 mp_afs_fsync(avc, fflags, acred, waitfor)
201 struct AFS_UCRED *acred;
206 code = afs_fsync(avc, fflags, acred, waitfor);
212 struct nameidata *ndp;
216 code = afs_remove(ndp);
221 mp_afs_link(avc, ndp)
223 struct nameidata *ndp;
227 code = afs_link(avc, ndp);
232 mp_afs_rename(fndp, tndp)
233 struct nameidata *fndp, *tndp;
237 code = afs_rename(fndp, tndp);
242 mp_afs_mkdir(ndp, attrs)
243 struct nameidata *ndp;
248 code = afs_mkdir(ndp, attrs);
254 struct nameidata *ndp;
258 code = afs_rmdir(ndp);
263 mp_afs_symlink(ndp, attrs, atargetName)
264 struct nameidata *ndp;
266 register char *atargetName;
270 code = afs_symlink(ndp, attrs, atargetName);
275 mp_afs_readdir(avc, auio, acred, eofp)
278 struct AFS_UCRED *acred;
283 code = afs_readdir(avc, auio, acred, eofp);
288 mp_afs_readlink(avc, auio, acred)
291 struct AFS_UCRED *acred;
295 code = afs_readlink(avc, auio, acred);
300 mp_afs_lockctl(avc, af, flag, acred, clid, offset)
303 struct AFS_UCRED *acred;
310 code = afs_lockctl(avc, af, flag, acred, clid, offset);
320 code = afs_closex(afd);
325 mp_afs_seek(avc, oldoff, newoff, cred)
327 off_t oldoff, newoff;
337 struct nameidata *ndp;
342 mp_afs_inactive(avc, acred)
343 register struct vcache *avc;
344 struct AFS_UCRED *acred;
347 afs_InactiveVCache(avc, acred);
364 mp_afs_page_read(avc, uio, acred)
370 struct vrequest treq;
373 error = afs_rdwr(avc, uio, UIO_READ, 0, acred);
374 afs_Trace3(afs_iclSetp, CM_TRACE_PAGE_READ, ICL_TYPE_POINTER, avc,
375 ICL_TYPE_INT32, error, ICL_TYPE_INT32, avc->states);
378 } else if ((avc->states) == 0) {
379 afs_InitReq(&treq, acred);
380 ObtainWriteLock(&avc->lock, 161);
381 afs_Wire(avc, &treq);
382 ReleaseWriteLock(&avc->lock);
389 mp_afs_page_write(avc, uio, acred, pager, offset)
393 memory_object_t pager;
399 error = afs_rdwr(avc, uio, UIO_WRITE, 0, acred);
400 afs_Trace3(afs_iclSetp, CM_TRACE_PAGE_WRITE, ICL_TYPE_POINTER, avc,
401 ICL_TYPE_INT32, error, ICL_TYPE_INT32, avc->states);
411 mp_afs_ubcrdwr(avc, uio, ioflag, cred)
417 register afs_int32 code;
419 afs_int32 fileBase, size, cnt = 0;
421 register afs_int32 tsize;
422 register afs_int32 pageOffset;
424 struct vrequest treq;
425 int rw = uio->uio_rw;
429 afs_int32 save_resid;
435 afs_InitReq(&treq, cred);
436 if (AFS_NFSXLATORREQ(cred) && rw == UIO_READ) {
438 (avc, PRSFS_READ, &treq,
439 CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) {
444 afs_Trace4(afs_iclSetp, CM_TRACE_VMRW, ICL_TYPE_POINTER, avc,
445 ICL_TYPE_INT32, (rw == UIO_WRITE ? 1 : 0), ICL_TYPE_LONG,
446 uio->uio_offset, ICL_TYPE_LONG, uio->uio_resid);
447 code = afs_VerifyVCache(avc, &treq);
449 code = afs_CheckCode(code, &treq, 35);
453 if (vType(avc) != VREG) {
455 return EISDIR; /* can't read or write other things */
457 osi_FlushPages(avc); /* hold bozon lock, but not basic vnode lock */
458 ObtainWriteLock(&avc->lock, 162);
459 /* adjust parameters when appending files */
460 if ((ioflag & IO_APPEND) && uio->uio_rw == UIO_WRITE)
461 uio->uio_offset = avc->m.Length; /* write at EOF position */
462 if (uio->uio_rw == UIO_WRITE) {
463 avc->states |= CDirty;
467 * before starting any I/O, we must ensure that the file is big enough
468 * to hold the results (since afs_putpage will be called to force
471 size = uio->afsio_resid + uio->afsio_offset; /* new file size */
472 if (size > avc->m.Length)
473 avc->m.Length = size; /* file grew */
474 avc->m.Date = osi_Time(); /* Set file date (for ranlib) */
475 if (uio->afsio_resid > PAGE_SIZE)
476 cnt = uio->afsio_resid / PAGE_SIZE;
477 save_resid = uio->afsio_resid;
482 * compute the amount of data to move into this block,
483 * based on uio->afsio_resid.
485 size = uio->afsio_resid; /* transfer size */
486 fileBase = uio->afsio_offset; /* start file position */
487 pageBase = fileBase & ~(PAGE_SIZE - 1); /* file position of the page */
488 pageOffset = fileBase & (PAGE_SIZE - 1); /* start offset within page */
489 tsize = PAGE_SIZE - pageOffset; /* amount left in this page */
491 * we'll read tsize bytes,
492 * but first must make sure tsize isn't too big
495 tsize = size; /* don't read past end of request */
496 eof = 0; /* flag telling us if we hit the EOF on the read */
497 if (uio->uio_rw == UIO_READ) { /* we're doing a read operation */
498 /* don't read past EOF */
499 if (tsize + fileBase > avc->m.Length) {
500 tsize = avc->m.Length - fileBase;
501 eof = 1; /* we did hit the EOF */
503 tsize = 0; /* better safe than sorry */
507 break; /* nothing to transfer, we're done */
509 /* Purge dirty chunks of file if there are too many dirty chunks.
510 * Inside the write loop, we only do this at a chunk boundary.
511 * Clean up partial chunk if necessary at end of loop.
513 if (uio->uio_rw == UIO_WRITE && counter > 0
514 && AFS_CHUNKOFFSET(fileBase) == 0) {
515 code = afs_DoPartialWrite(avc, &treq);
516 avc->states |= CDirty;
524 ReleaseWriteLock(&avc->lock);
527 ubc_lookup(((struct vnode *)avc)->v_object, pageBase, PAGE_SIZE,
528 PAGE_SIZE, &page, &flags);
530 ObtainWriteLock(&avc->lock, 163);
535 if (flags & B_NOCACHE) {
537 * No page found. We should not read the page in if
538 * 1. the write starts on a page edge (ie, pageoffset == 0)
540 * 1. we will fill the page (ie, size == PAGESIZE), or
541 * 2. we are writing past eof
543 if ((uio->uio_rw == UIO_WRITE)
546 && (size == PAGE_SIZE || fileBase >= avc->m.Length)))) {
547 struct vnode *vp = (struct vnode *)avc;
548 /* we're doing a write operation past eof; no need to read it */
551 ubc_page_zero(page, 0, PAGE_SIZE);
552 ubc_page_release(page, B_DONE);
555 /* page wasn't cached, read it in. */
559 bp = ubc_bufalloc(page, 1, PAGE_SIZE, 1, B_READ);
562 bp->b_vp = (struct vnode *)avc;
563 bp->b_blkno = btodb(pageBase);
564 ReleaseWriteLock(&avc->lock);
565 code = afs_ustrategy(bp, cred); /* do the I/O */
566 ObtainWriteLock(&avc->lock, 164);
572 ubc_page_release(page, 0);
580 data = (char *)page->pg_addr; /* DUX 4.0D */
582 data = (char *)PHYS_TO_KSEG(page->pg_phys_addr); /* DUX 4.0E */
584 ReleaseWriteLock(&avc->lock); /* uiomove may page fault */
586 code = uiomove(data + pageOffset, tsize, uio);
587 ubc_unload(page, pageOffset, page_size);
588 if (uio->uio_rw == UIO_WRITE) {
591 /* Mark the page dirty and release it to avoid a deadlock
592 * in ubc_dirty_kluster when more than one process writes
593 * this page at the same time. */
594 toffset = page->pg_offset;
596 ubc_page_release(page, flags);
603 /* We released the page, so we can get a null page
604 * list if another thread calls the strategy routine.
606 pl = ubc_dirty_kluster(((struct vnode *)avc)->v_object, NULL,
607 toffset, 0, B_WANTED, FALSE, &kpcnt);
609 bp = ubc_bufalloc(pl, 1, PAGE_SIZE, 1, B_WRITE);
611 bp->b_vp = (struct vnode *)avc;
612 bp->b_blkno = btodb(pageBase);
614 code = afs_ustrategy(bp, cred); /* do the I/O */
619 ObtainWriteLock(&avc->lock, 415);
625 ubc_page_release(page, flags);
628 ObtainWriteLock(&avc->lock, 165);
630 * If reading at a chunk boundary, start prefetch of next chunk.
632 if (uio->uio_rw == UIO_READ
633 && (counter == 0 || AFS_CHUNKOFFSET(fileBase) == 0)) {
634 tdc = afs_FindDCache(avc, fileBase);
636 if (!(tdc->mflags & DFNextStarted))
637 afs_PrefetchChunk(avc, tdc, cred, &treq);
646 afs_FakeClose(avc, cred);
647 if (uio->uio_rw == UIO_WRITE && code == 0 && (avc->states & CDirty)) {
648 code = afs_DoPartialWrite(avc, &treq);
650 ReleaseWriteLock(&avc->lock);
651 if (DO_FLUSH || (!newpage && (cnt < 10))) {
653 ubc_flush_dirty(((struct vnode *)avc)->v_object, flags);
657 ObtainSharedLock(&avc->lock, 409);
660 code = avc->vc_error;
663 /* This is required since we may still have dirty pages after the write.
664 * I could just let close do the right thing, but stat's before the close
665 * return the wrong length.
667 if (code == EDQUOT || code == ENOSPC) {
668 uio->uio_resid = save_resid;
669 UpgradeSToWLock(&avc->lock, 410);
670 osi_ReleaseVM(avc, cred);
671 ConvertWToSLock(&avc->lock);
673 ReleaseSharedLock(&avc->lock);
675 if (!code && (ioflag & IO_SYNC) && (uio->uio_rw == UIO_WRITE)
676 && !AFS_NFSXLATORREQ(cred)) {
677 code = afs_fsync(avc, 0, cred, 0);
680 code = afs_CheckCode(code, &treq, 36);
687 * Now for some bad news. Since we artificially hold on to vnodes by doing
688 * and extra VNHOLD in afs_NewVCache(), there is no way for us to know
689 * when we need to flush the pages when a program exits. Particularly
690 * if it closes the file after mapping it R/W.
694 mp_afs_mmap(avc, offset, map, addrp, len, prot, maxprot, flags, cred)
695 register struct vcache *avc;
705 struct vp_mmap_args args;
706 register struct vp_mmap_args *ap = &args;
707 struct vnode *vp = (struct vnode *)avc;
709 struct vrequest treq;
711 extern kern_return_t u_vp_create();
715 afs_InitReq(&treq, cred);
716 code = afs_VerifyVCache(avc, &treq);
718 code = afs_CheckCode(code, &treq, 37);
722 osi_FlushPages(avc); /* ensure old pages are gone */
723 ObtainWriteLock(&avc->lock, 166);
724 avc->states |= CMAPPED;
725 ReleaseWriteLock(&avc->lock);
726 ap->a_offset = offset;
729 ap->a_prot = prot, ap->a_maxprot = maxprot;
732 code = u_vp_create(map, vp->v_object, (vm_offset_t) ap);
734 code = afs_CheckCode(code, &treq, 38);
741 mp_afs_getpage(vop, offset, len, protp, pl, plsz, mape, addr, rw, cred)
753 register afs_int32 code;
754 struct vrequest treq;
756 int i, pages = (len + PAGE_SIZE - 1) >> page_shift;
760 struct vcache *avc = VTOAFS(vop->vu_vp);
762 /* first, obtain the proper lock for the VM system */
765 afs_InitReq(&treq, cred);
766 code = afs_VerifyVCache(avc, &treq);
769 code = afs_CheckCode(code, &treq, 39); /* failed to get it */
774 /* clean all dirty pages for this vnode */
776 ubc_flush_dirty(vop, 0);
779 ObtainWriteLock(&avc->lock, 167);
780 afs_Trace4(afs_iclSetp, CM_TRACE_PAGEIN, ICL_TYPE_POINTER, avc,
781 ICL_TYPE_LONG, offset, ICL_TYPE_LONG, len, ICL_TYPE_INT32,
783 for (i = 0; i < pages; i++) {
785 off = offset + PAGE_SIZE * i;
789 ReleaseWriteLock(&avc->lock);
792 ubc_lookup(((struct vnode *)avc)->v_object, off, PAGE_SIZE,
793 PAGE_SIZE, pagep, &flags);
795 ObtainWriteLock(&avc->lock, 168);
799 if (flags & B_NOCACHE) { /* if (page) */
800 if ((rw & B_WRITE) && (offset + len >= avc->m.Length)) {
801 struct vnode *vp = (struct vnode *)avc;
802 /* we're doing a write operation past eof; no need to read it */
804 ubc_page_zero(*pagep, 0, PAGE_SIZE);
805 ubc_page_release(*pagep, B_DONE);
808 /* page wasn't cached, read it in. */
812 bp = ubc_bufalloc(*pagep, 1, PAGE_SIZE, 1, B_READ);
815 bp->b_vp = (struct vnode *)avc;
816 bp->b_blkno = btodb(off);
817 ReleaseWriteLock(&avc->lock);
818 code = afs_ustrategy(bp, cred); /* do the I/O */
819 ObtainWriteLock(&avc->lock, 169);
825 ubc_page_release(pl[i], 0);
831 if ((rw & B_READ) == 0) {
833 ubc_page_dirty(pl[i]);
836 if (protp && (flags & B_DIRTY) == 0) {
837 protp[i] = VM_PROT_WRITE;
842 pl[i] = VM_PAGE_NULL;
843 ReleaseWriteLock(&avc->lock);
844 afs_Trace3(afs_iclSetp, CM_TRACE_PAGEINDONE, ICL_TYPE_INT32, code,
845 ICL_TYPE_POINTER, *pagep, ICL_TYPE_INT32, flags);
846 code = afs_CheckCode(code, &treq, 40);
853 mp_afs_putpage(vop, pl, pcnt, flags, cred)
860 register afs_int32 code = 0;
861 struct vcache *avc = VTOAFS(vop->vu_vp);
862 struct vnode *vp = (struct vnode *)avc;
866 afs_Trace4(afs_iclSetp, CM_TRACE_PAGEOUT, ICL_TYPE_POINTER, avc,
867 ICL_TYPE_INT32, pcnt, ICL_TYPE_INT32, vp->v_flag,
868 ICL_TYPE_INT32, flags);
872 if (vp->v_flag & VXLOCK) {
874 for (i = 0; i < pcnt; i++) {
875 ubc_page_release(pl[i], B_DONE | B_DIRTY);
876 pl[i] = VM_PAGE_NULL;
885 /* first, obtain the proper lock for the VM system */
886 ObtainWriteLock(&avc->lock, 170);
887 for (i = 0; i < pcnt; i++) {
888 vm_page_t page = pl[i];
893 bp = ubc_bufalloc(page, 1, PAGE_SIZE, 1, B_WRITE);
896 bp->b_vp = (struct vnode *)avc;
897 bp->b_blkno = btodb(page->pg_offset);
898 ReleaseWriteLock(&avc->lock);
899 code = afs_ustrategy(bp, cred); /* do the I/O */
900 ObtainWriteLock(&avc->lock, 171);
907 pl[i] = VM_PAGE_NULL;
911 ReleaseWriteLock(&avc->lock);
912 afs_Trace2(afs_iclSetp, CM_TRACE_PAGEOUTDONE, ICL_TYPE_INT32, code,
913 ICL_TYPE_INT32, avc->m.Length);
920 mp_afs_swap(avc, swapop, argp)
929 mp_afs_syncdata(avc, flag, offset, length, cred)
936 /* NFS V3 makes this call, ignore it. We'll sync the data in afs_fsync. */
937 if (AFS_NFSXLATORREQ(cred))
943 /* a freelist of one */
944 struct buf *afs_bread_freebp = 0;
947 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
948 * Thus we can use fake bufs (ie not from the real buffer pool).
950 mp_afs_bread(vp, lbn, bpp, cred)
956 int offset, fsbsize, error;
962 AFS_STATCNT(afs_bread);
963 fsbsize = vp->v_vfsp->vfs_bsize;
964 offset = lbn * fsbsize;
965 if (afs_bread_freebp) {
966 bp = afs_bread_freebp;
967 afs_bread_freebp = 0;
969 bp = (struct buf *)AFS_KALLOC(sizeof(*bp));
970 bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
973 iov.iov_base = bp->b_un.b_addr;
974 iov.iov_len = fsbsize;
975 uio.afsio_iov = &iov;
976 uio.afsio_iovcnt = 1;
977 uio.afsio_seg = AFS_UIOSYS;
978 uio.afsio_offset = offset;
979 uio.afsio_resid = fsbsize;
981 error = afs_read(VTOAFS(vp), &uio, cred, lbn, bpp, 0);
983 afs_bread_freebp = bp;
988 afs_bread_freebp = bp;
990 *(struct buf **)&bp->b_vp = bp; /* mark as fake */
998 mp_afs_brelse(vp, bp)
1003 AFS_STATCNT(afs_brelse);
1004 if ((struct buf *)bp->b_vp != bp) { /* not fake */
1006 } else if (afs_bread_freebp) {
1007 AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
1008 AFS_KFREE(bp, sizeof(*bp));
1010 afs_bread_freebp = bp;
1016 mp_afs_bmap(avc, abn, anvp, anbn)
1017 register struct vcache *avc;
1018 afs_int32 abn, *anbn;
1019 struct vcache **anvp;
1022 AFS_STATCNT(afs_bmap);
1026 *anbn = abn * (8192 / DEV_BSIZE); /* in 512 byte units */
1033 mp_afs_strategy(abp)
1034 register struct buf *abp;
1036 register afs_int32 code;
1039 AFS_STATCNT(afs_strategy);
1040 code = afs_osi_MapStrategy(afs_ustrategy, abp);
1046 mp_afs_refer(vm_ubc_object_t vop)
1052 mp_afs_release(vm_ubc_object_t vop)
1058 mp_afs_write_check(vm_ubc_object_t vop, vm_page_t pp)
1065 struct vfs_ubcops afs_ubcops = {
1066 mp_afs_refer, /* refer vnode */
1067 mp_afs_release, /* release vnode */
1068 mp_afs_getpage, /* get page */
1069 mp_afs_putpage, /* put page */
1070 mp_afs_write_check, /* check writablity */
1075 * Cover function for lookup name using OSF equivalent, namei()
1077 * Note, the result vnode (ni_vp) in the namei data structure is remains
1078 * locked after return.
1080 lookupname(namep, seg, follow, dvpp, cvpp)
1081 char *namep; /* path name */
1082 int seg; /* address space containing name */
1083 int follow; /* follow symbolic links */
1084 struct vnode **dvpp; /* result, containing parent vnode */
1085 struct vnode **cvpp; /* result, containing final component vnode */
1087 /* Should I use free-bee in u-area? */
1088 struct nameidata *ndp = &u.u_nd;
1091 ndp->ni_nameiop = ((follow) ? (LOOKUP | FOLLOW) : (LOOKUP));
1092 ndp->ni_segflg = seg;
1093 ndp->ni_dirp = namep;
1096 *dvpp = ndp->ni_dvp;