2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 * vnodeops structure and Digital Unix specific ops and support routines.
14 #include "../afs/param.h" /* Should be always first */
16 #include "../afs/sysincludes.h" /* Standard vendor system headers */
17 #include "../afs/afsincludes.h" /* Afs-based standard headers */
18 #include "../afs/afs_stats.h" /* statistics */
20 #include <vm/vnode_pager.h>
21 #include <vm/vm_map.h>
22 /* #include <vm/vm_ubc.h> */
23 #include "../afs/afs_cbqueue.h"
24 #include "../afs/nfsclient.h"
25 #include "../afs/afs_osidnlc.h"
28 extern int afs_lookup(), afs_create(), afs_noop(), afs_open(), afs_close();
29 extern int afs_access(), afs_getattr(), afs_setattr(), afs_badop();
30 extern int afs_fsync(), afs_seek(), afs_remove(), afs_link(), afs_rename();
31 extern int afs_mkdir(), afs_rmdir(), afs_symlink(), afs_readdir();
32 extern int afs_readlink(), afs_lockctl();
33 extern int vn_pathconf_default(), seltrue();
35 int mp_afs_lookup(), mp_afs_create(), mp_afs_open();
36 int mp_afs_access(), mp_afs_getattr(), mp_afs_setattr(), mp_afs_ubcrdwr();
37 int mp_afs_ubcrdwr(), mp_afs_mmap();
38 int mp_afs_fsync(), mp_afs_seek(), mp_afs_remove(), mp_afs_link();
39 int mp_afs_rename(), mp_afs_mkdir(), mp_afs_rmdir(), mp_afs_symlink();
40 int mp_afs_readdir(), mp_afs_readlink(), mp_afs_abortop(), mp_afs_inactive();
41 int mp_afs_reclaim(), mp_afs_bmap(), mp_afs_strategy(), mp_afs_print();
42 int mp_afs_page_read(), mp_afs_page_write(), mp_afs_swap(), mp_afs_bread();
43 int mp_afs_brelse(), mp_afs_lockctl(), mp_afs_syncdata(), mp_afs_close();
48 struct vnodeops Afs_vnodeops = {
51 afs_noop, /* vn_mknod */
59 afs_badop, /* vn_ioctl */
60 seltrue, /* vn_select */
86 afs_noop, /* unLock */
87 afs_noop, /* get ext attrs */
88 afs_noop, /* set ext attrs */
89 afs_noop, /* del ext attrs */
92 struct vnodeops *afs_ops = &Afs_vnodeops;
95 /* vnode file operations, and our own */
97 extern int vn_write();
98 extern int vn_ioctl();
99 extern int vn_select();
100 extern int afs_closex();
102 struct fileops afs_fileops = {
111 mp_afs_lookup(adp, ndp)
113 struct nameidata *ndp;
117 code = afs_lookup(adp, ndp);
122 mp_afs_create(ndp, attrs)
123 struct nameidata *ndp;
128 code = afs_create(ndp, attrs);
133 mp_afs_open(avcp, aflags, acred)
134 struct vcache **avcp;
136 struct AFS_UCRED *acred;
140 code = afs_open(avcp, aflags, acred);
145 mp_afs_access(avc, amode, acred)
148 struct AFS_UCRED *acred;
152 code = afs_access(avc, amode, acred);
157 mp_afs_close(avc, flags, cred)
164 code = afs_close(avc, flags, cred);
169 mp_afs_getattr(avc, attrs, acred)
172 struct AFS_UCRED *acred;
176 code = afs_getattr(avc, attrs, acred);
181 mp_afs_setattr(avc, attrs, acred)
184 struct AFS_UCRED *acred;
188 code = afs_setattr(avc, attrs, acred);
193 mp_afs_fsync(avc, fflags, acred, waitfor)
196 struct AFS_UCRED *acred;
201 code = afs_fsync(avc, fflags, acred, waitfor);
207 struct nameidata *ndp;
211 code = afs_remove(ndp);
216 mp_afs_link(avc, ndp)
218 struct nameidata *ndp;
222 code = afs_link(avc, ndp);
227 mp_afs_rename(fndp, tndp)
228 struct nameidata *fndp, *tndp;
232 code = afs_rename(fndp, tndp);
237 mp_afs_mkdir(ndp, attrs)
238 struct nameidata *ndp;
243 code = afs_mkdir(ndp, attrs);
249 struct nameidata *ndp;
253 code = afs_rmdir(ndp);
258 mp_afs_symlink(ndp, attrs, atargetName)
259 struct nameidata *ndp;
261 register char *atargetName;
265 code = afs_symlink(ndp, attrs, atargetName);
270 mp_afs_readdir(avc, auio, acred, eofp)
273 struct AFS_UCRED *acred;
278 code = afs_readdir(avc, auio, acred, eofp);
283 mp_afs_readlink(avc, auio, acred)
286 struct AFS_UCRED *acred;
290 code = afs_readlink(avc, auio, acred);
295 mp_afs_lockctl(avc, af, flag, acred, clid, offset)
298 struct AFS_UCRED *acred;
305 code = afs_lockctl(avc, af, flag, acred, clid, offset);
315 code = afs_closex(afd);
320 mp_afs_seek(avc, oldoff, newoff, cred)
322 off_t oldoff, newoff;
325 if ((int) newoff < 0)
332 struct nameidata *ndp;
337 mp_afs_inactive(avc, acred)
338 register struct vcache *avc;
339 struct AFS_UCRED *acred;
342 afs_InactiveVCache(avc, acred);
359 mp_afs_page_read(avc, uio, acred)
365 struct vrequest treq;
368 error = afs_rdwr(avc, uio, UIO_READ, 0, acred);
369 afs_Trace3(afs_iclSetp, CM_TRACE_PAGE_READ, ICL_TYPE_POINTER, avc,
370 ICL_TYPE_INT32, error, ICL_TYPE_INT32, avc->states);
373 } else if ((avc->states) == 0) {
374 afs_InitReq(&treq, acred);
375 ObtainWriteLock(&avc->lock,161);
376 afs_Wire(avc, &treq);
377 ReleaseWriteLock(&avc->lock);
384 mp_afs_page_write(avc, uio, acred, pager, offset)
388 memory_object_t pager;
394 error = afs_rdwr(avc, uio, UIO_WRITE, 0, acred);
395 afs_Trace3(afs_iclSetp, CM_TRACE_PAGE_WRITE, ICL_TYPE_POINTER, avc,
396 ICL_TYPE_INT32, error, ICL_TYPE_INT32, avc->states);
406 mp_afs_ubcrdwr(avc, uio, ioflag, cred)
412 register afs_int32 code;
414 afs_int32 fileBase, size, cnt=0;
416 register afs_int32 tsize;
417 register afs_int32 pageOffset;
419 struct vrequest treq;
420 int rw = uio->uio_rw;
424 afs_int32 save_resid;
430 afs_InitReq(&treq, cred);
431 if (AFS_NFSXLATORREQ(cred) && rw == UIO_READ) {
432 if (!afs_AccessOK(avc, PRSFS_READ, &treq,
433 CHECK_MODE_BITS|CMB_ALLOW_EXEC_AS_READ)) {
438 afs_Trace4(afs_iclSetp, CM_TRACE_VMRW, ICL_TYPE_POINTER, avc,
439 ICL_TYPE_INT32, (rw==UIO_WRITE? 1 : 0),
440 ICL_TYPE_LONG, uio->uio_offset,
441 ICL_TYPE_LONG, uio->uio_resid);
442 code = afs_VerifyVCache(avc, &treq);
444 code = afs_CheckCode(code, &treq, 35);
448 if (vType(avc) != VREG) {
450 return EISDIR; /* can't read or write other things */
452 afs_BozonLock(&avc->pvnLock, avc);
453 osi_FlushPages(avc); /* hold bozon lock, but not basic vnode lock */
454 ObtainWriteLock(&avc->lock,162);
455 /* adjust parameters when appending files */
456 if ((ioflag & IO_APPEND) && uio->uio_rw == UIO_WRITE)
457 uio->uio_offset = avc->m.Length; /* write at EOF position */
458 if (uio->uio_rw == UIO_WRITE) {
459 avc->states |= CDirty;
463 * before starting any I/O, we must ensure that the file is big enough
464 * to hold the results (since afs_putpage will be called to force
467 size = uio->afsio_resid + uio->afsio_offset; /* new file size */
468 if (size > avc->m.Length) avc->m.Length = size; /* file grew */
469 avc->m.Date = osi_Time(); /* Set file date (for ranlib) */
470 if (uio->afsio_resid > PAGE_SIZE)
471 cnt = uio->afsio_resid / PAGE_SIZE;
472 save_resid = uio->afsio_resid;
477 * compute the amount of data to move into this block,
478 * based on uio->afsio_resid.
480 size = uio->afsio_resid; /* transfer size */
481 fileBase = uio->afsio_offset; /* start file position */
482 pageBase = fileBase & ~(PAGE_SIZE-1); /* file position of the page */
483 pageOffset = fileBase & (PAGE_SIZE-1); /* start offset within page */
484 tsize = PAGE_SIZE-pageOffset; /* amount left in this page */
486 * we'll read tsize bytes,
487 * but first must make sure tsize isn't too big
489 if (tsize > size) tsize = size; /* don't read past end of request */
490 eof = 0; /* flag telling us if we hit the EOF on the read */
491 if (uio->uio_rw == UIO_READ) { /* we're doing a read operation */
492 /* don't read past EOF */
493 if (tsize + fileBase > avc->m.Length) {
494 tsize = avc->m.Length - fileBase;
495 eof = 1; /* we did hit the EOF */
496 if (tsize < 0) tsize = 0; /* better safe than sorry */
499 if (tsize <= 0) break; /* nothing to transfer, we're done */
501 /* Purge dirty chunks of file if there are too many dirty chunks.
502 * Inside the write loop, we only do this at a chunk boundary.
503 * Clean up partial chunk if necessary at end of loop.
505 if (uio->uio_rw == UIO_WRITE && counter > 0
506 && AFS_CHUNKOFFSET(fileBase) == 0) {
507 code = afs_DoPartialWrite(avc, &treq);
508 avc->states |= CDirty;
516 ReleaseWriteLock(&avc->lock);
518 code = ubc_lookup(((struct vnode *)avc)->v_object, pageBase,
519 PAGE_SIZE, PAGE_SIZE, &page, &flags);
521 ObtainWriteLock(&avc->lock,163);
526 if (flags & B_NOCACHE) {
528 No page found. We should not read the page in if
529 1. the write starts on a page edge (ie, pageoffset == 0)
531 1. we will fill the page (ie, size == PAGESIZE), or
532 2. we are writing past eof
534 if ((uio->uio_rw == UIO_WRITE) &&
535 ((pageOffset == 0 && (size == PAGE_SIZE || fileBase >= avc->m.Length)))) {
536 struct vnode *vp = (struct vnode *)avc;
537 /* we're doing a write operation past eof; no need to read it */
540 ubc_page_zero(page, 0, PAGE_SIZE);
541 ubc_page_release(page, B_DONE);
544 /* page wasn't cached, read it in. */
548 bp = ubc_bufalloc(page, 1, PAGE_SIZE, 1, B_READ);
551 bp->b_vp = (struct vnode *)avc;
552 bp->b_blkno = btodb(pageBase);
553 ReleaseWriteLock(&avc->lock);
554 code = afs_ustrategy(bp, cred); /* do the I/O */
555 ObtainWriteLock(&avc->lock,164);
561 ubc_page_release(page, 0);
569 data = (char *)page->pg_addr; /* DUX 4.0D */
571 data = (char *)PHYS_TO_KSEG(page->pg_phys_addr); /* DUX 4.0E */
573 ReleaseWriteLock(&avc->lock); /* uiomove may page fault */
575 code = uiomove(data+pageOffset, tsize, uio);
576 ubc_unload(page, pageOffset, page_size);
577 if (uio->uio_rw == UIO_WRITE) {
580 /* Mark the page dirty and release it to avoid a deadlock
581 * in ubc_dirty_kluster when more than one process writes
582 * this page at the same time. */
583 toffset = page->pg_offset;
585 ubc_page_release(page, flags);
592 /* We released the page, so we can get a null page
593 * list if another thread calls the strategy routine.
595 pl = ubc_dirty_kluster(((struct vnode *)avc)->v_object,
596 NULL, toffset, 0, B_WANTED, FALSE, &kpcnt);
598 bp = ubc_bufalloc(pl, 1, PAGE_SIZE, 1, B_WRITE);
600 bp->b_vp = (struct vnode *)avc;
601 bp->b_blkno = btodb(pageBase);
603 code = afs_ustrategy(bp, cred); /* do the I/O */
608 ObtainWriteLock(&avc->lock,415);
614 ubc_page_release(page, flags);
617 ObtainWriteLock(&avc->lock,165);
619 * If reading at a chunk boundary, start prefetch of next chunk.
621 if (uio->uio_rw == UIO_READ
622 && (counter == 0 || AFS_CHUNKOFFSET(fileBase) == 0)) {
623 tdc = afs_FindDCache(avc, fileBase);
625 if (!(tdc->flags & DFNextStarted))
626 afs_PrefetchChunk(avc, tdc, cred, &treq);
634 afs_FakeClose(avc, cred);
635 if (uio->uio_rw == UIO_WRITE && code == 0 && (avc->states & CDirty)) {
636 code = afs_DoPartialWrite(avc, &treq);
638 ReleaseWriteLock(&avc->lock);
639 afs_BozonUnlock(&avc->pvnLock, avc);
640 if (DO_FLUSH || (!newpage && (cnt < 10))) {
642 ubc_flush_dirty(((struct vnode *)avc)->v_object, flags);
646 ObtainSharedLock(&avc->lock, 409);
649 code = avc->vc_error;
652 /* This is required since we may still have dirty pages after the write.
653 * I could just let close do the right thing, but stat's before the close
654 * return the wrong length.
656 if (code == EDQUOT || code == ENOSPC) {
657 uio->uio_resid = save_resid;
658 UpgradeSToWLock(&avc->lock, 410);
659 osi_ReleaseVM(avc, cred);
660 ConvertWToSLock(&avc->lock);
662 ReleaseSharedLock(&avc->lock);
664 if (!code && (ioflag & IO_SYNC) && (uio->uio_rw == UIO_WRITE)
665 && !AFS_NFSXLATORREQ(cred)) {
666 code = afs_fsync(avc, 0, cred, 0);
669 code = afs_CheckCode(code, &treq, 36);
676 * Now for some bad news. Since we artificially hold on to vnodes by doing
677 * and extra VNHOLD in afs_NewVCache(), there is no way for us to know
678 * when we need to flush the pages when a program exits. Particularly
679 * if it closes the file after mapping it R/W.
683 mp_afs_mmap(avc, offset, map, addrp, len, prot, maxprot, flags, cred)
684 register struct vcache *avc;
694 struct vp_mmap_args args;
695 register struct vp_mmap_args *ap = &args;
696 struct vnode *vp = (struct vnode *)avc;
698 struct vrequest treq;
700 extern kern_return_t u_vp_create();
704 afs_InitReq(&treq, cred);
705 code = afs_VerifyVCache(avc, &treq);
707 code = afs_CheckCode(code, &treq, 37);
711 afs_BozonLock(&avc->pvnLock, avc);
712 osi_FlushPages(avc); /* ensure old pages are gone */
713 afs_BozonUnlock(&avc->pvnLock, avc);
714 ObtainWriteLock(&avc->lock,166);
715 avc->states |= CMAPPED;
716 ReleaseWriteLock(&avc->lock);
717 ap->a_offset = offset;
721 ap->a_maxprot = maxprot;
724 code = u_vp_create(map, vp->v_object, (vm_offset_t) ap);
726 code = afs_CheckCode(code, &treq, 38);
732 int mp_afs_getpage(vop, offset, len, protp, pl, plsz, mape, addr, rw, cred)
744 register afs_int32 code;
745 struct vrequest treq;
747 int i, pages = (len + PAGE_SIZE - 1) >> page_shift;
751 struct vcache *avc = (struct vcache *)vop->vu_vp;
753 /* first, obtain the proper lock for the VM system */
756 afs_InitReq(&treq, cred);
757 code = afs_VerifyVCache(avc, &treq);
760 code = afs_CheckCode(code, &treq, 39); /* failed to get it */
765 /* clean all dirty pages for this vnode */
767 ubc_flush_dirty(vop,0);
770 afs_BozonLock(&avc->pvnLock, avc);
771 ObtainWriteLock(&avc->lock,167);
772 afs_Trace4(afs_iclSetp, CM_TRACE_PAGEIN, ICL_TYPE_POINTER, avc,
773 ICL_TYPE_LONG, offset, ICL_TYPE_LONG, len,
774 ICL_TYPE_INT32, (int) rw);
775 for (i = 0; i < pages; i++) {
777 off = offset + PAGE_SIZE * i;
778 if (protp) protp[i] = 0;
780 ReleaseWriteLock(&avc->lock);
782 code = ubc_lookup(((struct vnode *)avc)->v_object, off,
783 PAGE_SIZE, PAGE_SIZE, pagep, &flags);
785 ObtainWriteLock(&avc->lock,168);
789 if(flags & B_NOCACHE) { /* if (page) */
790 if ((rw & B_WRITE) && (offset+len >= avc->m.Length)) {
791 struct vnode *vp = (struct vnode *)avc;
792 /* we're doing a write operation past eof; no need to read it */
794 ubc_page_zero(*pagep, 0, PAGE_SIZE);
795 ubc_page_release(*pagep, B_DONE);
798 /* page wasn't cached, read it in. */
802 bp = ubc_bufalloc(*pagep, 1, PAGE_SIZE, 1, B_READ);
805 bp->b_vp = (struct vnode *)avc;
806 bp->b_blkno = btodb(off);
807 ReleaseWriteLock(&avc->lock);
808 code = afs_ustrategy(bp, cred); /* do the I/O */
809 ObtainWriteLock(&avc->lock,169);
815 ubc_page_release(pl[i], 0);
821 if ((rw & B_READ) == 0) {
823 ubc_page_dirty(pl[i]);
826 if (protp && (flags & B_DIRTY) == 0) {
827 protp[i] = VM_PROT_WRITE;
832 pl[i] = VM_PAGE_NULL;
833 ReleaseWriteLock(&avc->lock);
834 afs_BozonUnlock(&avc->pvnLock, avc);
835 afs_Trace3(afs_iclSetp, CM_TRACE_PAGEINDONE, ICL_TYPE_INT32, code,
836 ICL_TYPE_POINTER, *pagep, ICL_TYPE_INT32, flags);
837 code = afs_CheckCode(code, &treq, 40);
843 int mp_afs_putpage(vop, pl, pcnt, flags, cred)
850 register afs_int32 code=0;
851 struct vcache *avc = (struct vcache *)vop->vu_vp;
852 struct vnode *vp = (struct vnode *)avc;
856 afs_Trace4(afs_iclSetp, CM_TRACE_PAGEOUT, ICL_TYPE_POINTER, avc,
857 ICL_TYPE_INT32, pcnt, ICL_TYPE_INT32, vp->v_flag,
858 ICL_TYPE_INT32, flags);
862 if (vp->v_flag & VXLOCK) {
864 for (i = 0; i < pcnt; i++) {
865 ubc_page_release(pl[i], B_DONE|B_DIRTY);
866 pl[i] = VM_PAGE_NULL;
875 /* first, obtain the proper lock for the VM system */
876 afs_BozonLock(&avc->pvnLock, avc);
877 ObtainWriteLock(&avc->lock,170);
878 for (i = 0; i < pcnt; i++) {
879 vm_page_t page = pl[i];
884 bp = ubc_bufalloc(page, 1, PAGE_SIZE, 1, B_WRITE);
887 bp->b_vp = (struct vnode *)avc;
888 bp->b_blkno = btodb(page->pg_offset);
889 ReleaseWriteLock(&avc->lock);
890 code = afs_ustrategy(bp, cred); /* do the I/O */
891 ObtainWriteLock(&avc->lock,171);
898 pl[i] = VM_PAGE_NULL;
902 ReleaseWriteLock(&avc->lock);
903 afs_BozonUnlock(&avc->pvnLock, avc);
904 afs_Trace2(afs_iclSetp, CM_TRACE_PAGEOUTDONE, ICL_TYPE_INT32, code,
905 ICL_TYPE_INT32, avc->m.Length);
911 int mp_afs_swap(avc, swapop, argp)
919 int mp_afs_syncdata(avc, flag, offset, length, cred)
926 /* NFS V3 makes this call, ignore it. We'll sync the data in afs_fsync. */
927 if (AFS_NFSXLATORREQ(cred))
933 /* a freelist of one */
934 struct buf *afs_bread_freebp = 0;
937 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
938 * Thus we can use fake bufs (ie not from the real buffer pool).
940 mp_afs_bread(vp, lbn, bpp, cred)
946 int offset, fsbsize, error;
952 AFS_STATCNT(afs_bread);
953 fsbsize = vp->v_vfsp->vfs_bsize;
954 offset = lbn * fsbsize;
955 if (afs_bread_freebp) {
956 bp = afs_bread_freebp;
957 afs_bread_freebp = 0;
959 bp = (struct buf *) AFS_KALLOC(sizeof(*bp));
960 bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
963 iov.iov_base = bp->b_un.b_addr;
964 iov.iov_len = fsbsize;
965 uio.afsio_iov = &iov;
966 uio.afsio_iovcnt = 1;
967 uio.afsio_seg = AFS_UIOSYS;
968 uio.afsio_offset = offset;
969 uio.afsio_resid = fsbsize;
971 error = afs_read((struct vcache *)vp, &uio, cred, lbn, bpp, 0);
973 afs_bread_freebp = bp;
978 afs_bread_freebp = bp;
980 *(struct buf **)&bp->b_vp = bp; /* mark as fake */
988 mp_afs_brelse(vp, bp)
993 AFS_STATCNT(afs_brelse);
994 if ((struct buf *)bp->b_vp != bp) { /* not fake */
996 } else if (afs_bread_freebp) {
997 AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
998 AFS_KFREE(bp, sizeof(*bp));
1000 afs_bread_freebp = bp;
1006 mp_afs_bmap(avc, abn, anvp, anbn)
1007 register struct vcache *avc;
1008 afs_int32 abn, *anbn;
1009 struct vcache **anvp;
1012 AFS_STATCNT(afs_bmap);
1016 *anbn = abn * (8192 / DEV_BSIZE); /* in 512 byte units */
1023 mp_afs_strategy (abp)
1024 register struct buf *abp;
1026 register afs_int32 code;
1029 AFS_STATCNT(afs_strategy);
1030 code = afs_osi_MapStrategy(afs_ustrategy, abp);
1036 mp_afs_refer(vm_ubc_object_t vop)
1042 mp_afs_release(vm_ubc_object_t vop)
1048 mp_afs_write_check(vm_ubc_object_t vop, vm_page_t pp)
1055 struct vfs_ubcops afs_ubcops = {
1056 mp_afs_refer, /* refer vnode */
1057 mp_afs_release, /* release vnode */
1058 mp_afs_getpage, /* get page */
1059 mp_afs_putpage, /* put page */
1060 mp_afs_write_check, /* check writablity */
1065 * Cover function for lookup name using OSF equivalent, namei()
1067 * Note, the result vnode (ni_vp) in the namei data structure is remains
1068 * locked after return.
1070 lookupname(namep, seg, follow, dvpp, cvpp)
1071 char *namep; /* path name */
1072 int seg; /* address space containing name */
1073 int follow; /* follow symbolic links */
1074 struct vnode **dvpp; /* result, containing parent vnode */
1075 struct vnode **cvpp; /* result, containing final component vnode */
1077 /* Should I use free-bee in u-area? */
1078 struct nameidata *ndp = &u.u_nd;
1081 ndp->ni_nameiop = ((follow) ? (LOOKUP|FOLLOW) : (LOOKUP));
1082 ndp->ni_segflg = seg;
1083 ndp->ni_dirp = namep;
1085 if (dvpp != (struct vnode **)0)
1086 *dvpp = ndp->ni_dvp;
1087 if (cvpp != (struct vnode **)0)