src/afs/HPUX/osi_vnodeops.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* This is a placeholder for routines unique to the port of AFS to hp-ux*/
  11
  12 #include <afsconfig.h>
  13 #include "afs/param.h"
  14
  15 RCSID
  16     ("$Header$");
  17
  18 #include "afs/sysincludes.h"    /* Standard vendor system headers */
  19 #include "afsincludes.h"        /* Afs-based standard headers */
  20 #include "afs/afs_stats.h"      /* statistics stuff */
  21
  22 #include <sys/uio.h>
  23 #include <sys/vfs.h>
  24 #include <sys/mount.h>
  25 #include <sys/vnode.h>
  26 #include <sys/pathname.h>
  27
  28 extern struct vfsops Afs_vfsops;
  29 extern int afs_hp_strategy();
  30 extern int afs_bmap(), afs_badop(), afs_noop(), afs_lockf();
  31 extern int afs_pagein();
  32 extern int afs_pageout();
  33 extern int afs_ioctl();
  34 extern int afs_prealloc();
  35 extern int afs_mapdbd();
  36 extern int afs_mmap();
  37 extern int afs_cachelimit();
  38 extern int afs_vm_checkpage();
  39 extern int afs_vm_fscontiguous();
  40 extern int afs_vm_stopio();
  41 extern int afs_read_ahead();
  42 extern int afs_unmap();
  43 extern int afs_release();
  44 extern int afs_swapfs_len();
  45 extern int afs_readdir2();
  46 extern int afs_readdir();
  47 extern int afs_readdir3();
  48 extern int afs_pathconf();
  49 extern int afs_close();
  50
  51 #define vtoblksz(vp)    ((vp)->v_vfsp->vfs_bsize)
  52
  53 #if defined(AFS_HPUX110_ENV)
  54 /* We no longer need to lock on the VM Empire,
  55  * or at least that is what is claimed.
  56  * so we will noopt the vmemp_ routines
  57  * This needs to be looked at closer.
  58  */
  59 #define vmemp_lockx()
  60 #undef  vmemp_returnx
  61 #define vmemp_returnx(a) return(a)
  62 #define vmemp_unlockx()
  63 #endif
  64
  65 #if !defined(AFS_HPUX110_ENV)
  66 /*
  67  * Copy an mbuf to the contiguous area pointed to by cp.
  68  * Skip <off> bytes and copy <len> bytes.
  69  * Returns the number of bytes not transferred.
  70  * The mbuf is NOT changed.
  71  */
  72 int
  73 m_cpytoc(m, off, len, cp)
  74      register struct mbuf *m;
  75      register int off, len;
  76      register caddr_t cp;
  77 {
  78     register int ml;
  79
  80     if (m == NULL || off < 0 || len < 0 || cp == NULL)
  81         osi_Panic("m_cpytoc");
  82     while (off && m)
  83         if (m->m_len <= off) {
  84             off -= m->m_len;
  85             m = m->m_next;
  86             continue;
  87         } else
  88             break;
  89     if (m == NULL)
  90         return (len);
  91
  92     ml = MIN(len, m->m_len - off);
  93     memcpy(cp, mtod(m, caddr_t) + off, (u_int) ml);
  94     cp += ml;
  95     len -= ml;
  96     m = m->m_next;
  97
  98     while (len && m) {
  99         ml = m->m_len;
 100         memcpy(cp, mtod(m, caddr_t), (u_int) ml);
 101         cp += ml;
 102         len -= ml;
 103         m = m->m_next;
 104     }
 105
 106     return (len);
 107 }
 108 #endif
 109
 110 /*
 111  *  Note that the standard Sun vnode interface doesn't haven't an vop_lockf(), so this code is
 112  * totally new.  This came about because HP-UX has lockf() implemented as
 113  * a system call while Sun has it implemented as a library (apparently).
 114  * To handle this, we have to translate the lockf() request into an
 115  * fcntl() looking request, and then translate the results back if necessary.
 116  * we call afs_lockctl() directly .
 117  */
 118 afs_lockf(vp, flag, len, cred, fp, LB, UB)
 119      struct vnode *vp;
 120      int flag;
 121      struct AFS_UCRED *cred;
 122      struct file *fp;
 123      k_off_t len, LB, UB;
 124 {
 125     /*for now, just pretend it works */
 126     struct k_flock flock;
 127     int cmd, code;
 128
 129     /*
 130      * Create a flock structure and translate the lockf request
 131      * into an appropriate looking fcntl() type request for afs_lockctl()
 132      */
 133     flock.l_whence = 0;
 134     flock.l_len = len;
 135     flock.l_start = fp->f_offset;
 136     /* convert negative lengths to positive */
 137     if (flock.l_len < 0) {
 138         flock.l_start += flock.l_len;
 139         flock.l_len = -(flock.l_len);
 140     }
 141     /*
 142      * Adjust values to look like fcntl() requests.
 143      * All locks are write locks, only F_LOCK requests
 144      * are blocking.  F_TEST has to be translated into
 145      * a get lock and then back again.
 146      */
 147     flock.l_type = F_WRLCK;
 148     cmd = F_SETLK;
 149     switch (flag) {
 150     case F_ULOCK:
 151         flock.l_type = F_UNLCK;
 152         break;
 153     case F_LOCK:
 154         cmd = F_SETLKW;
 155         break;
 156     case F_TEST:
 157         cmd = F_GETLK;
 158         break;
 159     }
 160     u.u_error = mp_afs_lockctl(vp, &flock, cmd, fp->f_cred);
 161     if (u.u_error) {
 162         return (u.u_error);     /* some other error code */
 163     }
 164     /*
 165      * if request is F_TEST, and GETLK changed
 166      * the lock type to ULOCK, then return 0, else
 167      * set errno to EACCESS and return.
 168      */
 169     if (flag == F_TEST && flock.l_type != F_UNLCK) {
 170         u.u_error = EACCES;
 171         return (u.u_error);
 172     }
 173     return (0);
 174 }
 175
 176
 177 #if defined(AFS_HPUX1122_ENV)
 178 #include "machine/vm/vmparam.h"
 179 #else
 180 #include "../machine/vmparam.h" /* For KERNELSPACE */
 181 #endif
 182 #include "h/debug.h"
 183 #include "h/types.h"
 184 #if !defined(AFS_HPUX1123_ENV)
 185         /* 11.23 is using 64 bit in many cases */
 186 #define kern_daddr_t daddr_t
 187 #endif
 188 #include "h/param.h"
 189 #include "h/vmmac.h"
 190 #include "h/time.h"
 191 #include "ufs/inode.h"
 192 #include "ufs/fs.h"
 193 #include "h/dbd.h"
 194 #if defined(AFS_HPUX1123_ENV)
 195 dbd_t       *finddbd();
 196 #endif /* AFS_HPUX1123_ENV */
 197 #include "h/vfd.h"
 198 #include "h/region.h"
 199 #include "h/pregion.h"
 200 #include "h/vmmeter.h"
 201 #include "h/user.h"
 202 #include "h/sysinfo.h"
 203 #include "h/pfdat.h"
 204 #if !defined(AFS_HPUX1123_ENV)
 205 #include "h/tuneable.h"
 206 #endif
 207 #include "h/buf.h"
 208 #include "netinet/in.h"
 209
 210 /* a freelist of one */
 211 struct buf *afs_bread_freebp = 0;
 212
 213 /*
 214  *  Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
 215  *  Thus we can use fake bufs (ie not from the real buffer pool).
 216  */
 217 afs_bread(vp, lbn, bpp)
 218      struct vnode *vp;
 219      kern_daddr_t lbn;
 220      struct buf **bpp;
 221 {
 222     int offset, fsbsize, error;
 223     struct buf *bp;
 224     struct iovec iov;
 225     struct uio uio;
 226
 227     AFS_STATCNT(afs_bread);
 228     fsbsize = vp->v_vfsp->vfs_bsize;
 229     offset = lbn * fsbsize;
 230     if (afs_bread_freebp) {
 231         bp = afs_bread_freebp;
 232         afs_bread_freebp = 0;
 233     } else {
 234         bp = (struct buf *)AFS_KALLOC(sizeof(*bp));
 235         bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
 236     }
 237
 238     iov.iov_base = bp->b_un.b_addr;
 239     iov.iov_len = fsbsize;
 240     uio.afsio_iov = &iov;
 241     uio.afsio_iovcnt = 1;
 242     uio.afsio_seg = AFS_UIOSYS;
 243     uio.afsio_offset = offset;
 244     uio.afsio_resid = fsbsize;
 245     uio.uio_fpflags = 0;
 246     *bpp = 0;
 247
 248     error = afs_read(VTOAFS(vp), &uio, p_cred(u.u_procp), lbn, bpp, 0);
 249     if (error) {
 250         afs_bread_freebp = bp;
 251         return error;
 252     }
 253     if (*bpp) {
 254         afs_bread_freebp = bp;
 255     } else {
 256         *(struct buf **)&bp->b_vp = bp; /* mark as fake */
 257         *bpp = bp;
 258     }
 259     return 0;
 260 }
 261
 262 afs_brelse(vp, bp)
 263      struct vnode *vp;
 264      struct buf *bp;
 265 {
 266     AFS_STATCNT(afs_brelse);
 267
 268     if ((struct buf *)bp->b_vp != bp) { /* not fake */
 269         ufs_brelse(bp->b_vp, bp);
 270     } else if (afs_bread_freebp) {
 271         AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
 272         AFS_KFREE(bp, sizeof(*bp));
 273     } else {
 274         afs_bread_freebp = bp;
 275     }
 276 }
 277
 278
 279 afs_bmap(avc, abn, anvp, anbn)
 280      register struct vcache *avc;
 281      kern_daddr_t abn, *anbn;
 282      struct vcache **anvp;
 283 {
 284     AFS_STATCNT(afs_bmap);
 285     if (anvp)
 286         *anvp = avc;
 287     if (anbn)
 288         *anbn = abn * (8192 / DEV_BSIZE);       /* in 512 byte units */
 289     return 0;
 290 }
 291
 292 afs_inactive(avc, acred)
 293      register struct vcache *avc;
 294      struct AFS_UCRED *acred;
 295 {
 296     struct vnode *vp = AFSTOV(avc);
 297     ulong_t context;
 298     lock_t *sv_lock;
 299     if (afs_shuttingdown)
 300         return;
 301
 302     /*
 303      * In Solaris and HPUX s800 and HP-UX10.0 they actually call us with
 304      * v_count 1 on last reference!
 305      */
 306     MP_H_SPINLOCK_USAV(vn_h_sl_pool, vp, &sv_lock, &context);
 307     if (avc->vrefCount < 1)
 308         osi_Panic("afs_inactive : v_count < 1\n");
 309
 310     /*
 311      * If more than 1 don't unmap the vnode but do decrement the ref count
 312      */
 313     vp->v_count--;
 314     if (vp->v_count > 0) {
 315         MP_SPINUNLOCK_USAV(sv_lock, context);
 316         return 0;
 317     }
 318     MP_SPINUNLOCK_USAV(sv_lock, context);
 319     afs_InactiveVCache(avc, acred);
 320     return 0;
 321 }
 322
 323
 324 int
 325 mp_afs_open(register struct vnode **avcp, int aflags, struct AFS_UCRED *acred)
 326 {
 327     register int code;
 328
 329     AFS_GLOCK();
 330     code = afs_open(avcp, aflags, acred);
 331     AFS_GUNLOCK();
 332     return (code);
 333 }
 334
 335 int
 336 mp_afs_close(register struct vnode *avcp, int aflags, struct AFS_UCRED *acred)
 337 {
 338     register int code;
 339
 340     AFS_GLOCK();
 341     code = afs_close(avcp, aflags, acred);
 342     AFS_GUNLOCK();
 343     return (code);
 344 }
 345
 346 int
 347 mp_afs_rdwr(register struct vnode *avcp, struct uio *uio, enum uio_rw arw,
 348             int aio, struct AFS_UCRED *acred)
 349 {
 350     register int code;
 351     long save_resid;
 352
 353     AFS_GLOCK();
 354     save_resid = uio->uio_resid;
 355     code = afs_rdwr(avcp, uio, arw, aio, acred);
 356     if (arw == UIO_WRITE && code == ENOSPC) {
 357         /* HP clears code if any data written. */
 358         uio->uio_resid = save_resid;
 359     }
 360     AFS_GUNLOCK();
 361     return (code);
 362 }
 363
 364 int
 365 mp_afs_getattr(register struct vnode *avcp, struct vattr *attrs,
 366                struct AFS_UCRED *acred, enum vsync unused1)
 367 {
 368     register int code;
 369
 370     AFS_GLOCK();
 371     code = afs_getattr(avcp, attrs, acred);
 372     AFS_GUNLOCK();
 373     return (code);
 374 }
 375
 376 int
 377 mp_afs_setattr(register struct vnode *avcp, register struct vattr *attrs,
 378                struct AFS_UCRED *acred, int unused1)
 379 {
 380     register int code;
 381
 382     AFS_GLOCK();
 383     code = afs_setattr(avcp, attrs, acred);
 384     AFS_GUNLOCK();
 385     return (code);
 386 }
 387
 388 int
 389 mp_afs_access(register struct vnode *avcp, int mode, struct AFS_UCRED *acred)
 390 {
 391     register int code;
 392
 393     AFS_GLOCK();
 394     code = afs_access(avcp, mode, acred);
 395     AFS_GUNLOCK();
 396     return (code);
 397 }
 398
 399 int
 400 mp_afs_lookup(register struct vnode *adp, char *aname,
 401               register struct vnode **avcp, struct AFS_UCRED *acred,
 402               struct vnode *unused1)
 403 {
 404     register int code;
 405
 406     AFS_GLOCK();
 407     code = afs_lookup(adp, aname, avcp, acred);
 408     AFS_GUNLOCK();
 409     return (code);
 410 }
 411
 412 int
 413 mp_afs_create(register struct vnode *adp, char *aname, struct vattr *attrs,
 414               enum vcexcl aexcl, int amode, struct vnode **avcp,
 415               struct AFS_UCRED *acred)
 416 {
 417     register int code;
 418
 419     AFS_GLOCK();
 420     code = afs_create(adp, aname, attrs, aexcl, amode, avcp, acred);
 421     AFS_GUNLOCK();
 422     return (code);
 423 }
 424
 425
 426 int
 427 mp_afs_remove(register struct vnode *adp, char *aname,
 428               struct AFS_UCRED *acred)
 429 {
 430     register int code;
 431
 432     AFS_GLOCK();
 433     code = afs_remove(adp, aname, acred);
 434     AFS_GUNLOCK();
 435     return (code);
 436 }
 437
 438 int
 439 mp_afs_link(register struct vnode *avc, register struct vnode *adp,
 440             char *aname, struct AFS_UCRED *acred)
 441 {
 442     register int code;
 443
 444     AFS_GLOCK();
 445     code = afs_link(avc, adp, aname, acred);
 446     AFS_GUNLOCK();
 447     return (code);
 448 }
 449
 450 int
 451 mp_afs_rename(register struct vnode *aodp, char *aname1,
 452               register struct vnode *andp, char *aname2,
 453               struct AFS_UCRED *acred)
 454 {
 455     register int code;
 456
 457     AFS_GLOCK();
 458     code = afs_rename(aodp, aname1, andp, aname2, acred);
 459     AFS_GUNLOCK();
 460     return (code);
 461 }
 462
 463 int
 464 mp_afs_mkdir(register struct vnode *adp, char *aname, struct vattr *attrs,
 465              register struct vnode **avcp, struct AFS_UCRED *acred)
 466 {
 467     register int code;
 468
 469     AFS_GLOCK();
 470     code = afs_mkdir(adp, aname, attrs, avcp, acred);
 471     AFS_GUNLOCK();
 472     return (code);
 473 }
 474
 475
 476 int
 477 mp_afs_rmdir(register struct vnode *adp, char *aname, struct AFS_UCRED *acred)
 478 {
 479     register int code;
 480
 481     AFS_GLOCK();
 482     code = afs_rmdir(adp, aname, acred);
 483     AFS_GUNLOCK();
 484     return (code);
 485 }
 486
 487
 488 int
 489 mp_afs_readdir(register struct vnode *avc, struct uio *auio,
 490                struct AFS_UCRED *acred)
 491 {
 492     register int code;
 493
 494     AFS_GLOCK();
 495     code = afs_readdir(avc, auio, acred);
 496     AFS_GUNLOCK();
 497     return (code);
 498 }
 499
 500 int
 501 mp_afs_symlink(register struct vnode *adp, char *aname, struct vattr *attrs,
 502                char *atargetName, struct AFS_UCRED *acred)
 503 {
 504     register int code;
 505
 506     AFS_GLOCK();
 507     code = afs_symlink(adp, aname, attrs, atargetName, acred);
 508     AFS_GUNLOCK();
 509     return (code);
 510 }
 511
 512
 513 int
 514 mp_afs_readlink(register struct vnode *avc, struct uio *auio,
 515                 struct AFS_UCRED *acred)
 516 {
 517     register int code;
 518
 519     AFS_GLOCK();
 520     code = afs_readlink(avc, auio, acred);
 521     AFS_GUNLOCK();
 522     return (code);
 523 }
 524
 525 int
 526 mp_afs_fsync(register struct vnode *avc, struct AFS_UCRED *acred, int unused1)
 527 {
 528     register int code;
 529
 530     AFS_GLOCK();
 531     code = afs_fsync(avc, acred);
 532     AFS_GUNLOCK();
 533     return (code);
 534 }
 535
 536 int
 537 mp_afs_bread(register struct vnode *avc, kern_daddr_t lbn, struct buf **bpp,
 538              struct vattr *unused1, struct ucred *unused2)
 539 {
 540     register int code;
 541
 542     AFS_GLOCK();
 543     code = afs_bread(avc, lbn, bpp);
 544     AFS_GUNLOCK();
 545     return (code);
 546 }
 547
 548 int
 549 mp_afs_brelse(register struct vnode *avc, struct buf *bp)
 550 {
 551     register int code;
 552
 553     AFS_GLOCK();
 554     code = afs_brelse(avc, bp);
 555     AFS_GUNLOCK();
 556     return (code);
 557 }
 558
 559
 560 int
 561 mp_afs_inactive(register struct vnode *avc, struct AFS_UCRED *acred)
 562 {
 563     register int code;
 564
 565     AFS_GLOCK();
 566     code = afs_inactive(avc, acred);
 567     AFS_GUNLOCK();
 568     return (code);
 569 }
 570
 571 int
 572 mp_afs_lockctl(struct vnode *avc, struct flock *af, int cmd,
 573                struct AFS_UCRED *acred, struct file *unused1, off_t unused2,
 574                off_t unused3)
 575 {
 576     register int code;
 577
 578     AFS_GLOCK();
 579     code = afs_lockctl(avc, af, cmd, acred);
 580     AFS_GUNLOCK();
 581     return (code);
 582 }
 583
 584 int
 585 mp_afs_fid(struct vnode *avc, struct fid **fidpp)
 586 {
 587     register int code;
 588
 589     AFS_GLOCK();
 590     code = afs_fid(avc, fidpp);
 591     AFS_GUNLOCK();
 592     return (code);
 593 }
 594
 595 int
 596 mp_afs_readdir2(register struct vnode *avc, struct uio *auio,
 597                 struct AFS_UCRED *acred)
 598 {
 599     register int code;
 600
 601     AFS_GLOCK();
 602     code = afs_readdir2(avc, auio, acred);
 603     AFS_GUNLOCK();
 604     return (code);
 605 }
 606
 607
 608 struct vnodeops Afs_vnodeops = {
 609     mp_afs_open,
 610     mp_afs_close,
 611     mp_afs_rdwr,
 612     afs_ioctl,
 613     afs_noop,
 614     mp_afs_getattr,
 615     mp_afs_setattr,
 616     mp_afs_access,
 617     mp_afs_lookup,
 618     mp_afs_create,
 619     mp_afs_remove,
 620     mp_afs_link,
 621     mp_afs_rename,
 622     mp_afs_mkdir,
 623     mp_afs_rmdir,
 624     afs_readdir,
 625     mp_afs_symlink,
 626     mp_afs_readlink,
 627     mp_afs_fsync,
 628     mp_afs_inactive,
 629     afs_bmap,
 630     afs_hp_strategy,
 631 #if     !defined(AFS_NONFSTRANS)
 632     /* on HPUX102 the nfs translator calls afs_bread but does
 633      * not call afs_brelse. Hence we see a memory leak. If the
 634      * VOP_BREAD() call fails, then nfs does VOP_RDWR() to get
 635      * the same data : this is the path we follow now. */
 636     afs_noop,
 637     afs_noop,
 638 #else
 639     mp_afs_bread,
 640     mp_afs_brelse,
 641 #endif
 642     afs_badop,                  /* pathsend */
 643     afs_noop,                   /* setacl */
 644     afs_noop,                   /* getacl */
 645     afs_pathconf,
 646     afs_pathconf,
 647     mp_afs_lockctl,
 648     afs_lockf,                  /* lockf */
 649     mp_afs_fid,
 650     afs_noop,                   /*fsctl */
 651     afs_badop,
 652     afs_pagein,
 653     afs_pageout,
 654     NULL,
 655     NULL,
 656     afs_prealloc,
 657     afs_mapdbd,
 658     afs_mmap,
 659     afs_cachelimit,
 660     afs_vm_checkpage,
 661     afs_vm_fscontiguous,
 662     afs_vm_stopio,
 663     afs_read_ahead,
 664     afs_release,
 665     afs_unmap,
 666     afs_swapfs_len,
 667     mp_afs_readdir2,
 668     afs_readdir3,
 669 };
 670
 671 struct vnodeops *afs_ops = &Afs_vnodeops;
 672
 673 /* vnode file operations, and our own */
 674 extern int vno_rw();
 675 extern int vno_ioctl();
 676 extern int vno_select();
 677 extern int afs_closex();
 678 extern int vno_close();
 679 struct fileops afs_fileops = {
 680     vno_rw,
 681     vno_ioctl,
 682     vno_select,
 683     afs_close,
 684 };
 685
 686 #define vtoblksz(vp)    ((vp)->v_vfsp->vfs_bsize)
 687
 688 /*
 689  ********************************************************************
 690  ****
 691  ****                   afspgin_setup_io_ranges ()
 692  ****    similar to:    nfspgin_setup_io_ranges ()
 693  ********************************************************************
 694  */
 695 pgcnt_t
 696 afspgin_setup_io_ranges(vfspage_t * vm_info, pgcnt_t bpages, k_off_t isize,
 697                         pgcnt_t startindex)
 698 {
 699     pgcnt_t file_offset = VM_FILE_OFFSET(vm_info);
 700     pgcnt_t minpage;            /* first page to bring in */
 701     pgcnt_t maxpage;            /* one past last page to bring in */
 702     pgcnt_t maxpagein;
 703     pgcnt_t multio_maxpage;
 704     kern_daddr_t start_blk;
 705     dbd_t *dbd;
 706     expnd_flags_t up_reason, down_reason;
 707     int count = 1;
 708     int indx = 0;
 709     int max_num_io;
 710     int dbdtype;
 711     preg_t *prp;
 712
 713     VM_GET_IO_INFO(vm_info, maxpagein, max_num_io);
 714
 715     /*
 716      * We do not go past the end of the current pregion nor past the end
 717      * of the current file.
 718      */
 719
 720     maxpage = startindex + (bpages - (startindex + file_offset) % bpages);
 721     maxpage = vm_reset_maxpage(vm_info, maxpage);
 722     maxpage = MIN(maxpage, (pgcnt_t) btorp(isize) - file_offset);
 723     maxpage = MIN(maxpage, startindex + maxpagein);
 724     multio_maxpage = maxpage = vm_maxpage(vm_info, maxpage);
 725
 726     if (!maxpage)
 727         return (0);
 728
 729     VASSERT(maxpage >= startindex);
 730
 731     /*
 732      * Expanding the fault will create calls to FINDENTRY() for new
 733      * pages, which will obsolete "dbd", so copy what it points to
 734      * and clear it to prevent using stale data.
 735      */
 736
 737     prp = VM_PRP(vm_info);
 738     dbdtype = DBD_TYPE(vm_info);
 739     start_blk = DBD_DATA(vm_info);
 740     vm_info->dbd = NULL;
 741     vm_info->vfd = NULL;
 742     VASSERT(dbdtype != DBD_NONE);
 743
 744     if (max_num_io == 1) {
 745         /*
 746          * We need to set up one I/O: First we attempt to expand the
 747          * I/O forward. Then we expand the I/O backwards.
 748          */
 749         count =
 750             expand_faultin_up(vm_info, dbdtype, (int)bpages, maxpage, count,
 751                               startindex, start_blk, &up_reason);
 752         maxpage = startindex + count;
 753         VASSERT(maxpage <= startindex + maxpagein);
 754         minpage = startindex - (startindex + file_offset) % bpages;
 755         minpage = MAX(minpage, maxpage - maxpagein);
 756         VASSERT(startindex >= VM_BASE_OFFSET(vm_info));
 757         minpage = vm_minpage(vm_info, minpage);
 758         VASSERT(minpage <= startindex);
 759         count =
 760             expand_faultin_down(vm_info, dbdtype, (int)bpages, minpage, count,
 761                                 &startindex, &start_blk, &down_reason);
 762         VM_SET_IO_STARTINDX(vm_info, 0, startindex);
 763         VM_SET_IO_STARTBLK(vm_info, 0, start_blk);
 764         VM_SET_IO_COUNT(vm_info, 0, count);
 765         VM_SET_NUM_IO(vm_info, 1);
 766     }
 767
 768     if (max_num_io > 1) {
 769         /*
 770          * We need to set up multiple I/O information; beginning
 771          * with the startindex, we will expand upwards. The expansion
 772          * could stop for one of 2 reasons; we take the appropriate
 773          * action in each of these cases:
 774          *      o VM reasons: abort setting up the multiple I/O
 775          *        information and return to our caller indicating
 776          *        that "retry" is required.
 777          *      o pagelimit: set up the next I/O info [we may have
 778          *        reached multio_maxpage at this point].
 779          * Note that expansion involves no more than a block at a time;
 780          * hence it could never stop due to "discontiguous block"
 781          * reason.
 782          */
 783         startindex = minpage = vm_minpage(vm_info, 0);
 784         for (indx = 0; (indx < max_num_io) && (startindex < multio_maxpage);
 785              indx++, startindex += count) {
 786             dbd = FINDDBD(prp->p_reg, startindex);
 787             start_blk = dbd->dbd_data;
 788             maxpage =
 789                 startindex + (bpages - (startindex + file_offset) % bpages);
 790             maxpage = min(maxpage, multio_maxpage);
 791             count =
 792                 expand_faultin_up(vm_info, dbdtype, bpages, maxpage,
 793                                   1 /* count */ ,
 794                                   startindex, start_blk, &up_reason);
 795             VM_SET_IO_STARTINDX(vm_info, indx, startindex);
 796             VM_SET_IO_STARTBLK(vm_info, indx, start_blk);
 797             VM_SET_IO_COUNT(vm_info, indx, count);
 798             if (up_reason & VM_REASONS)
 799                 break;
 800             VASSERT(!(up_reason & NONCONTIGUOUS_BLOCK));
 801             VASSERT(up_reason & PAGELIMIT);
 802         }
 803         if (startindex < multio_maxpage) {
 804             VM_MULT_IO_FAILURE(vm_info);
 805             VM_REINIT_FAULT_DBDVFD(vm_info);
 806             return (0);         /* retry */
 807         }
 808         count = maxpagein;
 809         VM_SET_NUM_IO(vm_info, indx);
 810     }
 811
 812     /*
 813      * Tell VM where the I/O intends to start.  This may be different
 814      * from the faulting point.
 815      */
 816
 817     VM_SET_STARTINDX(vm_info, VM_GET_IO_STARTINDX(vm_info, 0));
 818
 819     return (count);
 820
 821 }
 822
 823 /*
 824  ********************************************************************
 825  ****
 826  ****                   afspgin_blkflsh ()
 827  ****   similar to:     nfspgin_blkflsh ()
 828  ********************************************************************
 829  */
 830 retval_t
 831 afspgin_blkflsh(vfspage_t * vm_info, struct vnode * devvp, pgcnt_t * num_4k)
 832 {
 833     int flush_reslt = 0;
 834     pgcnt_t count = *num_4k;
 835     pgcnt_t page_count;
 836     int indx = 0;
 837     int num_io = VM_GET_NUM_IO(vm_info);
 838
 839     /*
 840      * On this blkflush() we don't want to purge the buffer cache and we do
 841      * want to wait, so the flags are '0'.
 842      */
 843
 844     for (indx = 0; indx < num_io; indx++) {
 845         flush_reslt =
 846             blkflush(devvp, (kern_daddr_t) VM_GET_IO_STARTBLK(vm_info, indx),
 847                      ptob(VM_GET_IO_COUNT(vm_info, indx)), 0,
 848                      VM_REGION(vm_info));
 849         if (flush_reslt) {
 850             vm_lock(vm_info);
 851             if (vm_page_now_valid(vm_info, &page_count)) {
 852                 vm_release_memory(vm_info);
 853                 vm_release_structs(vm_info);
 854                 *num_4k = page_count;
 855                 return (VM_PAGE_PRESENT);
 856             }
 857             return (VM_RETRY);
 858         }
 859     }
 860     return (VM_DONE);
 861 }
 862
 863 /*
 864  ********************************************************************
 865  ****
 866  ****                   afspgin_io ()
 867  ****    similar to:    nfspgin_io ()
 868  ********************************************************************
 869  */
 870 int
 871 afspgin_io(vfspage_t * vm_info, struct vnode *devvp, pgcnt_t bpages,
 872            pgcnt_t maxpagein, pgcnt_t count)
 873 {
 874     int i;
 875     int error = 0;
 876     caddr_t vaddr = VM_ADDR(vm_info);
 877     caddr_t virt_addr = VM_MAPPED_ADDR(vm_info);
 878     pagein_info_t *io = VM_PAGEIN_INFO(vm_info);
 879     preg_t *prp = VM_PRP(vm_info);
 880     int wrt = VM_WRT(vm_info);
 881     space_t space = VM_SPACE(vm_info);
 882     int num_io = VM_GET_NUM_IO(vm_info);
 883
 884 #ifdef notdef                   /* Not used in AFS */
 885     /*
 886      * With VM_READ_AHEAD_ALLOWED() macro, check if read-ahead should
 887      * be used in this case.
 888      *
 889      * Unlike UFS, NFS does not start the faulting page I/O
 890      * asynchronously. Why?  Asynchronous requests are handled by the
 891      * biod's.  It doesn't make sense to queue up the faulting request
 892      * behind other asynchrnous requests.  This is not true for UFS
 893      * where the asynchrnous request is immediately handled.
 894      */
 895
 896     if ((VM_READ_AHEAD_ALLOWED(vm_info)) && (nfs_read_ahead_on)
 897         && (NFS_DO_READ_AHEAD) && (should_do_read_ahead(prp, vaddr))) {
 898
 899         pgcnt_t max_rhead_io;
 900         caddr_t rhead_vaddr;
 901         pgcnt_t total_rheads_allowed;
 902
 903         /*
 904          * Determine the maximum amount of read-ahead I/O.
 905          */
 906         total_rheads_allowed = maxpagein - count;
 907
 908         /*
 909          * If the count is less than a block, raise it to one.
 910          */
 911         if (total_rheads_allowed < bpages)
 912             total_rheads_allowed = bpages;
 913
 914         max_rhead_io = total_rheads_allowed;
 915         rhead_vaddr = VM_MAPPED_ADDR(vm_info) + (count * NBPG);
 916         error =
 917             nfs_read_ahead(vm_info->vp, prp, wrt, space, rhead_vaddr,
 918                            &max_rhead_io);
 919
 920         /*
 921          * Set the next fault location.  If read_ahead launches any
 922          * I/O it will adjust it accordingly.
 923          */
 924         vm_info->prp->p_nextfault = vm_info->startindex + count;
 925
 926         /*
 927          * Now perform the faulting I/O synchronously.
 928          */
 929         vm_unlock(vm_info);
 930
 931         error =
 932             syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, 0),
 933                        VM_MAPPED_SPACE(vm_info), VM_MAPPED_ADDR(vm_info),
 934                        (int)ptob(count), B_READ, devvp,
 935                        B_vfs_pagein | B_pagebf, VM_REGION(vm_info));
 936     } else
 937 #endif
 938     {
 939         virt_addr = VM_MAPPED_ADDR(vm_info);
 940         vm_unlock(vm_info);
 941         for (i = 0; i < num_io; i++) {
 942             /*
 943              * REVISIT -- investigate doing asyncpageio().
 944              */
 945             error |= (io[i].error =
 946                       syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, i),
 947                                  VM_MAPPED_SPACE(vm_info), virt_addr,
 948                                  (int)ptob(VM_GET_IO_COUNT(vm_info, i)),
 949                                  B_READ, devvp, B_vfs_pagein | B_pagebf,
 950                                  VM_REGION(vm_info)));
 951             virt_addr += ptob(VM_GET_IO_COUNT(vm_info, i));
 952         }
 953         /*
 954          * Set the next fault location.  If read_ahead launches any
 955          * I/O it will adjust it accordingly.
 956          */
 957         vm_info->prp->p_nextfault = vm_info->startindex + count;
 958     }
 959
 960     return (error);
 961 }
 962
 963 /*
 964  ********************************************************************
 965  ****
 966  ****                   afspgin_update_dbd ()
 967  ****    similar to:    nfspgin_update_dbd ()
 968  ********************************************************************
 969  */
 970 void
 971 afspgin_update_dbd(vfspage_t * vm_info, int bsize)
 972 {
 973     k_off_t off;
 974     pgcnt_t count = bsize / NBPG;
 975     k_off_t rem;
 976     pgcnt_t m;
 977     pgcnt_t pgindx;
 978     kern_daddr_t blkno;
 979     int num_io = VM_GET_NUM_IO(vm_info);
 980     int i;
 981
 982     for (i = 0; i < num_io; i++) {
 983
 984         pgindx = VM_GET_IO_STARTINDX(vm_info, i);
 985         off = vnodindx(VM_REGION(vm_info), pgindx);
 986         rem = off % bsize;
 987         blkno = VM_GET_IO_STARTBLK(vm_info, i);
 988
 989         VASSERT(bsize % NBPG == 0);
 990         VASSERT(rem % NBPG == 0);
 991
 992         pgindx -= (pgcnt_t) btop(rem);
 993         blkno -= (kern_daddr_t) btodb(rem);
 994
 995         /*
 996          * This region could start in mid-block.  If so, pgindx
 997          * could be less than 0, so we adjust pgindx and blkno back
 998          * up so that pgindx is 0.
 999          */
1000
1001         if (pgindx < 0) {
1002             pgcnt_t prem;
1003             prem = 0 - pgindx;
1004             pgindx = 0;
1005             count -= prem;
1006             blkno += btodb(ptob(prem));
1007         }
1008
1009         for (m = 0; m < count && pgindx < VM_REGION_SIZE(vm_info);
1010              m++, pgindx++, blkno += btodb(NBPG)) {
1011             /*
1012              * Note:  since this only changes one block, it
1013              * assumes only one block was faulted in.  Currently
1014              * this is always true for remote files, and we only
1015              * get here for remote files, so everything is ok.
1016              */
1017             vm_mark_dbd(vm_info, pgindx, blkno);
1018         }
1019     }
1020 }
1021
1022 int
1023 afs_pagein(vp, prp, wrt, space, vaddr, ret_startindex)
1024      struct vnode *vp;
1025      preg_t *prp;
1026      int wrt;
1027      space_t space;
1028      caddr_t vaddr;
1029      pgcnt_t *ret_startindex;
1030 {
1031     pgcnt_t startindex;
1032     pgcnt_t pgindx = *ret_startindex;
1033     pgcnt_t maxpagein;
1034     struct vnode *devvp;
1035     pgcnt_t count;
1036     kern_daddr_t start_blk = 0;
1037     int bsize;
1038     int error;
1039     k_off_t isize;
1040     int shared;                 /* writable memory mapped file */
1041     retval_t retval = 0;
1042     pgcnt_t ok_dbd_limit = 0;   /* last dbd that we can trust */
1043     pgcnt_t bpages;             /* number of pages per block */
1044     pgcnt_t page_count;
1045     vfspage_t *vm_info = NULL;
1046     int done;
1047
1048     struct vattr va;
1049
1050     caddr_t nvaddr;
1051     space_t nspace;
1052     int change_to_fstore = 0;   /* need to change dbds to DBD_FSTORE */
1053     int flush_start_blk = 0;
1054     int flush_end_blk = 0;
1055
1056     int i, j;
1057
1058     AFS_STATCNT(afs_pagein);
1059     vmemp_lockx();              /* lock down VM empire */
1060
1061     /* Initialize the VM info structure */
1062     done =
1063         vm_pagein_init(&vm_info, prp, pgindx, space, vaddr, wrt, 0,
1064                        LGPG_ENABLE);
1065
1066     /* Check to see if we slept and the page was falted in. */
1067     if (done) {
1068         vm_release_structs(vm_info);
1069         vmemp_returnx(1);
1070     }
1071
1072     vp = VM_GET_PAGEIN_VNODE(vm_info);
1073     VASSERT(vp != NULL);
1074     shared = VM_SHARED_OBJECT(vm_info);
1075     VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1076
1077     /*
1078      * Get the devvp and block size for this vnode type
1079      */
1080     devvp = vp;
1081     bsize = vp->v_vfsp->vfs_bsize;
1082     if (bsize <= 0 || (bsize & (DEV_BSIZE - 1)))
1083         osi_Panic("afs_pagein: bsize is zero or not a multiple of DEV_BSIZE");
1084
1085     bpages = (pgcnt_t) btop(bsize);
1086     VASSERT(bpages > 0);
1087     VM_SET_FS_MAX_PAGES(vm_info, bpages);
1088
1089     /* this trace cannot be here because the afs_global lock might not be
1090      * held at this point. We hold the vm global lock throughout
1091      * this procedure ( and not the AFS global lock )
1092      * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEIN, ICL_TYPE_POINTER, (afs_int32) vp,
1093      * ICL_TYPE_LONG, DBD_TYPE(vm_info), ICL_TYPE_LONG, bpages,
1094      * ICL_TYPE_LONG, shared);
1095      */
1096     /* Come here if we have to release the region lock before
1097      * locking pages.  This can happen in memreserve() and
1098      * blkflush().
1099      */
1100   retry:
1101     /*
1102      * For remote files like ours, we want to check to see if the file has shrunk.
1103      * If so, we should invalidate any pages past the end.  In the name
1104      * of efficiency, we only do this if the page we want to fault is
1105      * past the end of the file.
1106      */
1107     {
1108         if (VOP_GETATTR(vp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1109             VM_ZOMBIE_OBJECT(vm_info);
1110             vm_release_memory(vm_info);
1111             vm_release_structs(vm_info);
1112             vmemp_returnx(0);
1113         }
1114         isize = va.va_size;
1115         if (vnodindx(VM_REGION(vm_info), pgindx) >= isize) {
1116             /*
1117              * The file has shrunk and someone is trying to access a
1118              * page past the end of the object.  Shrink the object back
1119              * to its currrent size, send a SIGBUS to the faulting
1120              * process and return.
1121              *
1122              * We must release the region lock before calling mtrunc(),
1123              * since mtrunc() locks all the regions that are using this
1124              * file.
1125              */
1126             vm_release_memory(vm_info);
1127             vm_truncate_region(vm_info, isize);
1128             vm_release_structs(vm_info);
1129             vmemp_returnx(-SIGBUS);
1130         }
1131     }
1132
1133     maxpagein = vm_pick_maxpagein(vm_info);
1134     if (vm_wait_for_memory(vm_info, maxpagein, 1)) {
1135         /* Check to see if we should continue faulting.  */
1136         if (vm_page_now_valid(vm_info, &page_count)) {
1137             vm_release_memory(vm_info);
1138             vm_release_structs(vm_info);
1139             vmemp_returnx(page_count);
1140         }
1141     }
1142     if (count = vm_no_io_required(vm_info)) {
1143         /* Release any excess memory.  */
1144         vm_release_memory(vm_info);
1145         vm_release_structs(vm_info);
1146         vmemp_returnx(count);
1147     }
1148 #ifdef OSDEBUG
1149     /*
1150      * We should never have DBD_HOLE pages in a non-MMF region.
1151      */
1152     if (!shared)
1153         VASSERT(dbd->dbd_type != DBD_HOLE);
1154 #endif
1155     VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1156
1157     startindex = *ret_startindex;
1158
1159     /*
1160      * If the page we want is in memory already, take it
1161      */
1162     if (VM_MEMORY_RESERVED(vm_info) < maxpagein) {
1163         /* pick up the rest of memory now.  */
1164         if (vm_wait_for_memory(vm_info, maxpagein, 0)) {
1165             if (vm_page_now_valid(vm_info, &page_count)) {
1166                 vm_release_memory(vm_info);
1167                 vm_release_structs(vm_info);
1168                 vmemp_returnx(page_count);
1169             }
1170             goto retry;
1171         }
1172     }
1173
1174     if (!
1175         (count =
1176          afspgin_setup_io_ranges(vm_info, bpages, isize, startindex))) {
1177         goto retry;
1178     }
1179
1180     startindex = VM_GET_STARTINDX(vm_info);
1181
1182     VASSERT(maxpagein >= count);
1183
1184     /*
1185      * Release the memory we won't need.
1186      */
1187     if (count < maxpagein) {
1188         vm_release_excess_memory(vm_info,
1189                                  (VM_MEMORY_RESERVED(vm_info) - count));
1190     }
1191
1192     retval = afspgin_blkflsh(vm_info, devvp, &count);
1193
1194     if (retval == VM_RETRY) {
1195         goto retry;
1196     }
1197
1198     if (retval == VM_PAGE_PRESENT)
1199         return (count);
1200
1201 #if 0
1202     /*
1203      * The definition of krusage_cntr_t is in h/kmetric.h, which
1204      * is not shipped.  Since it's just statistics, we punt and do
1205      * not update it.  If it's a problem we'll need to get HP to export
1206      * an interface that we can use to increment the counter.
1207      */
1208
1209     /* It's a real fault, not a reclaim */
1210     {
1211         krusage_cntr_t *temp;
1212         temp = kt_cntrp(u.u_kthreadp);
1213         temp->krc_majflt++;
1214     }
1215 #endif
1216
1217     /*
1218      * Tell VM where the I/O intends to start.  This may be different
1219      * from the faulting point.
1220      */
1221
1222     /*
1223      * vm_prepare_io will fill the region with pages and release the
1224      * region lock.
1225      */
1226     vm_prepare_io(vm_info, &count);
1227
1228     /*
1229      * Count may have been adjusted, check to make sure it's non-zero.
1230      */
1231     if (count == 0) {
1232         if (vm_retry(vm_info)) {
1233             goto retry;
1234         }
1235
1236         /*
1237          * Release resources and retry the fault.  Release any excess
1238          * memory.
1239          */
1240
1241         vm_release_memory(vm_info);
1242         vm_release_structs(vm_info);
1243         vmemp_returnx(0);
1244     }
1245
1246     error = afspgin_io(vm_info, devvp, bpages, maxpagein, count);
1247
1248     if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1249         retval = -SIGBUS;
1250         VM_ZOMBIE_OBJECT(vm_info);
1251         goto backout;
1252     }
1253     /*
1254      * For a writable memory mapped file that is remote we must
1255      * detect potential holes in the file and force allocation of
1256      * disk space on the remote system.  Unfortunately, there is
1257      * no easy way to do this, so this gets a little ugly.
1258      */
1259     if (shared && wrt) {
1260         /*
1261          * See if The user wants to write to this page.  Write some
1262          * minimal amount of data back to the remote file to
1263          * force allocation of file space.  We only need to
1264          * write a small amount, since holes are always at
1265          * least one filesystem block in size.
1266          */
1267         error = vm_alloc_hole(vm_info);
1268
1269         /*
1270          * If some sort of I/O error occurred we generate a
1271          * SIGBUS for the process that caused the write,
1272          * undo our page locks, etc and return.
1273          */
1274         if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1275             VM_ZOMBIE_OBJECT(vm_info);
1276             retval = -SIGBUS;
1277             goto backout;
1278         }
1279
1280         /*
1281          * Change these dbds to DBD_FSTORE.  We cannot do it here,
1282          * since the region must be locked, and it is not locked
1283          * at the moment.  We cannot lock the region yet, as we
1284          * first have to release the page locks.
1285          */
1286         change_to_fstore = 1;
1287     }
1288
1289     vm_finish_io(vm_info, count);
1290
1291     /*
1292      * Acquire the lock before we play around with changing the vfd's.
1293      */
1294     vm_lock(vm_info);
1295
1296     if (change_to_fstore)
1297         afspgin_update_dbd(vm_info, bsize);
1298
1299 #if defined(AFS_HPUX110_ENV)
1300     getppdp()->cnt.v_exfod += count;
1301 #else
1302     mpproc_info[getprocindex()].cnt.v_exfod += count;
1303 #endif
1304     vmemp_unlockx();            /* free up VM empire */
1305     *ret_startindex = startindex;
1306
1307     /*
1308      * In case we have any excess memory...
1309      */
1310     if (VM_MEMORY_RESERVED(vm_info))
1311         vm_release_memory(vm_info);
1312     vm_release_structs(vm_info);
1313
1314     return count;
1315
1316   backout:
1317
1318     vm_finish_io_failed(vm_info, count);
1319
1320     vm_lock(vm_info);
1321
1322     vm_undo_validation(vm_info, count);
1323
1324     /*
1325      * In case we have any excess memory...
1326      */
1327     if (VM_MEMORY_RESERVED(vm_info))
1328         vm_release_memory(vm_info);
1329     vm_release_structs(vm_info);
1330
1331     vmemp_unlockx();            /* free up VM empire */
1332     return retval;
1333 }
1334
1335 int
1336 afs_pageout(vp, prp, start, end, flags)
1337      struct vnode *vp;          /* not used */
1338      preg_t *prp;
1339      pgcnt_t start;
1340      pgcnt_t end;
1341      int flags;
1342 {
1343     struct vnode *filevp;
1344     struct vnode *devvp;
1345     pgcnt_t i;
1346     int steal;
1347     int vhand;
1348     int hard;
1349     int *piocnt;                /* wakeup counter used if PAGEOUT_WAIT */
1350     struct ucred *old_cred;
1351     vfspage_t vm_info;
1352     fsdata_t args;
1353
1354     int inode_changed = 0;
1355     int file_is_remote;
1356     struct inode *ip;
1357
1358     AFS_STATCNT(afs_pageout);
1359
1360     steal = (flags & PAGEOUT_FREE);
1361     vhand = (flags & PAGEOUT_VHAND);
1362     hard = (flags & PAGEOUT_HARD);
1363
1364     vmemp_lockx();
1365
1366     /*  Initialize the VM info structure.  */
1367     vm_pageout_init(&vm_info, prp, start, end, 0, 0, 0, flags);
1368
1369     /*
1370      * If the region is marked "don't swap", then don't steal any pages
1371      * from it.  We can, however, write dirty pages out to disk (only if
1372      * PAGEOUT_FREE is not set).
1373      */
1374     if (vm_no_pageout(&vm_info)) {
1375         vmemp_unlockx();
1376         return (0);
1377     }
1378
1379     /*
1380      * If caller wants to wait until the I/O is complete.
1381      */
1382     vm_setup_wait_for_io(&vm_info);
1383
1384     filevp = VM_GET_PAGEOUT_VNODE(&vm_info);    /* always page out to back store */
1385     VASSERT(filevp != NULL);
1386
1387     memset((caddr_t) & args, 0, sizeof(fsdata_t));
1388     args.remote_down = 0;       /* assume remote file servers are up */
1389     args.remote = 1;            /* we are remote */
1390     args.bsize = 0;             /* filled up later by afs_vm_checkpage() */
1391
1392     if (filevp->v_fstype == VUFS) {
1393         ip = VTOI(filevp);
1394         devvp = ip->i_devvp;
1395         file_is_remote = 0;
1396     } else {
1397         file_is_remote = 1;
1398         devvp = filevp;
1399
1400         /*
1401          * If we are vhand(), and this is an NFS file, we need to
1402          * see if the NFS server is "down".  If so, we decide
1403          * if we will try to talk to it again, or defer pageouts
1404          * of dirty NFS pages until a future time.
1405          */
1406 #ifdef  notdef
1407         if (vhand && filevp->v_fstype == VNFS && vtomi(filevp)->mi_down
1408             && vtomi(filevp)->mi_hard) {
1409             extern afs_int32 vhand_nfs_retry;
1410             /*
1411              * If there is still time left on our timer, we will
1412              * not talk to this server right now.
1413              */
1414             if (vhand_nfs_retry > 0)
1415                 args.remote_down = 1;
1416         }
1417 #endif
1418     }
1419
1420     /*
1421      * Initialize args.  We set bsize to 0 to tell vfs_vfdcheck() that
1422      * it must get the file size and other attributes if it comes across
1423      * a dirty page.
1424      */
1425     vm_info.fs_data = (caddr_t) & args;
1426
1427     /* this trace cannot be here because the afs_global lock might not be
1428      * held at this point. We hold the vm global lock throughout
1429      * this procedure ( and not the AFS global lock )
1430      * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEOUT, ICL_TYPE_POINTER, (afs_int32) filevp,
1431      * ICL_TYPE_LONG, start, ICL_TYPE_LONG, end, ICL_TYPE_LONG, flags);
1432      */
1433
1434     i = start;
1435
1436     while (i <= end) {
1437         struct buf *bp;
1438         k_off_t start;
1439         pgcnt_t npages;
1440         k_off_t nbytes;
1441         int error;
1442
1443         extern int pageiodone();
1444         space_t nspace;
1445         caddr_t nvaddr;
1446
1447         /*
1448          * Ask the VM system to find the next run of pages.
1449          */
1450         vm_find_next_range(&vm_info, i, end);
1451
1452         /*
1453          * It's possible that the remote file shrunk in size.  Check the flags
1454          * to see if the request was beyond the end of the file.  If it was,
1455          * truncate the region to the file size and continue.  We could be on a
1456          * run so after trunction continue, there may be some I/O to write
1457          * out.
1458          */
1459         if (VM_FS_FLAGS(&vm_info) & PAGEOUT_TRUNCATE) {
1460             pgcnt_t pglen = (pgcnt_t) btorp(args.isize);
1461
1462             /*
1463              * This page is past the end of the file.  Unlock this page
1464              * (region_trunc will throw it away) and then call
1465              * region_trunc() to invalidate all pages past the new end of
1466              * the file.
1467              */
1468             region_trunc(VM_REGION(&vm_info), pglen, pglen + 1);
1469
1470             /*
1471              * remove the truncation flag.
1472              */
1473             VM_UNSETFS_FLAGS(&vm_info, PAGEOUT_TRUNCATE);
1474         }
1475
1476         if (VM_NO_PAGEOUT_RUN(&vm_info))
1477             break;
1478
1479         /*
1480          * We have a run of dirty pages [args.start...args.end].
1481          */
1482         VASSERT(filevp->v_fstype != VCDFS);
1483         VASSERT((filevp->v_vfsp->vfs_flag & VFS_RDONLY) == 0);
1484         VASSERT(VM_GET_NUM_IO(&vm_info) == 1);
1485
1486         /*
1487          * We will be doing an I/O on the region, let the VM system know.
1488          */
1489         (void)vm_up_physio_count(&vm_info);
1490
1491         /*
1492          * Okay, get set to perform the I/O.
1493          */
1494         inode_changed = 1;
1495         npages =
1496             (VM_END_PAGEOUT_INDX(&vm_info) + 1) -
1497             VM_START_PAGEOUT_INDX(&vm_info);
1498
1499         /*
1500          * Allocate and initialize an I/O buffer.
1501          */
1502         bp = bswalloc();
1503         vm_init_bp(&vm_info, bp);       /* Let the VM system initialize */
1504
1505         /* Identify this buffer for KI */
1506         bp->b_bptype = B_vfs_pageout | B_pagebf;
1507
1508         if (steal)
1509             bp->b_flags = B_CALL | B_BUSY | B_PAGEOUT;  /* steal pages */
1510         else
1511             bp->b_flags = B_CALL | B_BUSY;      /* keep pages */
1512
1513         /*
1514          * If we are vhand paging over NFS, we will wait for the I/O
1515          * to complete.
1516          */
1517         if (vhand && filevp->v_fstype == VNFS) {
1518             bp->b_flags &= ~B_CALL;
1519         } else {
1520             bp->b_iodone = (int (*)())pageiodone;
1521         }
1522
1523         /*
1524          * Make sure we do not write past the end of the file.
1525          */
1526         nbytes = ptob(npages);
1527         start = vnodindx(VM_REGION(&vm_info), vm_info.start);
1528         if (start + nbytes > args.isize) {
1529 #ifdef OSDEBUG
1530             /*
1531              * The amount we are off better not be bigger than a
1532              * filesystem block.
1533              */
1534             if (start + nbytes - args.isize >= args.bsize) {
1535                 osi_Panic("afs_pageout: remainder too large");
1536             }
1537 #endif
1538             /*
1539              * Reset the size of the I/O as necessary.  For remote
1540              * files, we set the size to the exact number of bytes to
1541              * the end of the file.  For local files, we round this up
1542              * to the nearest DEV_BSIZE chunk since disk I/O must always
1543              * be in multiples of DEV_BSIZE.  In this case, we do not
1544              * bother to zero out the data past the "real" end of the
1545              * file, this is done when the data is read (either through
1546              * mmap() or by normal file system access).
1547              */
1548             if (file_is_remote)
1549                 nbytes = args.isize - start;
1550             else
1551                 nbytes = roundup(args.isize - start, DEV_BSIZE);
1552         }
1553
1554         /*
1555          * Now get ready to perform the I/O
1556          */
1557         if (!vm_protect_pageout(&vm_info, npages)) {
1558             VASSERT(vhand);
1559             vm_undo_invalidation(&vm_info, vm_info.start, vm_info.end);
1560             vm_finish_io_failed(&vm_info, npages);
1561             bswfree(bp);
1562             break;
1563         }
1564         /*
1565          * If this is an NFS write by vhand(), we will not be calling
1566          * pageiodone().  asyncpageio() increments parolemem for us
1567          * if bp->b_iodone is pageiodone, so we must do it manually
1568          * if pageiodone() will not be called automatically.
1569          */
1570         if (!(bp->b_flags & B_CALL) && steal) {
1571             register ulong_t context;
1572
1573             SPINLOCK_USAV(pfdat_lock, context);
1574             parolemem += btorp(nbytes);
1575             SPINUNLOCK_USAV(pfdat_lock, context);
1576         }
1577         blkflush(devvp, VM_START_PAGEOUT_BLK(&vm_info), (long)nbytes,
1578                  (BX_NOBUFWAIT | BX_PURGE), VM_REGION(&vm_info));
1579
1580         /*
1581          * If vhand is the one paging things out, and this is an NFS
1582          * file, we need to temporarily become a different user so
1583          * that we are not trying to page over NFS as root.  We use
1584          * the user credentials associated with the writable file
1585          * pointer that is in the psuedo-vas for this MMF.
1586          *
1587          * NOTE: we are currently using "va_rss" to store the ucred
1588          *       value in the vas (this should be fixed in 10.0).
1589          */
1590         old_cred = kt_cred(u.u_kthreadp);
1591         if (vhand) {
1592 #if defined(AFS_HPUX1123_ENV)
1593                 /*
1594                  * DEE - 1123 does not have the vas.h, and it looks
1595                  * we should never be called with a NFS type file anyway.
1596                  * so where did this come from? Was it copied from NFS?
1597                  * I assume it was, so we will add an assert for now
1598                  * and see if the code runs at all.
1599                  */
1600                 VASSERT(filevp->v_fstype != VNFS);
1601 #else
1602             set_kt_cred(u.u_kthreadp, filevp->v_vas->va_cred);
1603
1604             /*
1605              * If root was the one who opened the mmf for write,
1606              * va_cred will be NULL.  So reset kt_cred(u.u_kthreadp) to what it
1607              * was.  We will page out as root, but that is the
1608              * correct thing to do in this case anyway.
1609              */
1610             if (kt_cred(u.u_kthreadp) == NULL)
1611                 set_kt_cred(u.u_kthreadp, old_cred);
1612 #endif
1613         }
1614
1615         /*
1616          * Really do the I/O.
1617          */
1618         error =
1619             asyncpageio(bp, VM_START_PAGEOUT_BLK(&vm_info),
1620                         VM_MAPPED_SPACE(&vm_info), VM_MAPPED_ADDR(&vm_info),
1621                         (int)nbytes, B_WRITE, devvp);
1622
1623         VASSERT(error == 0);
1624
1625 #ifdef  notdef
1626         /*
1627          * If we are vhand paging over NFS we want to wait for the
1628          * I/O to complete and take the appropriate actions if an
1629          * error is encountered.
1630          */
1631         if (vhand) {
1632             if (waitforpageio(bp) && nfs_mi_harddown(filevp)) {
1633                 /*
1634                  * The server is down, ignore this failure, and
1635                  * try again later. (rfscall() has set our retry
1636                  * timer).
1637                  */
1638                 fsdata.remote_down = 1;
1639                 pageiocleanup(bp, 0);
1640
1641                 /*
1642                  * vm_vfdcheck() has cleared the valid bit on the
1643                  * vfds for these pages.  We must go back and set the
1644                  * valid bit, as the pages are really not gone.
1645                  *
1646                  * NOTE: we can do this because we still hold (and have
1647                  * not released) the region lock.
1648                  */
1649                 if (steal)
1650                     vm_undo_invalidation(&vm_info, vm_info.start,
1651                                          vm_info.end);
1652             } else {
1653                 /*
1654                  * The I/O succeeded, or we had an error that we do
1655                  * not want to defer until later.  Call pageidone()
1656                  * to handle things.
1657                  */
1658                 pageiodone(bp);
1659             }
1660         }
1661 #endif
1662
1663         /*
1664          * And restore our credentials to what they were.
1665          */
1666         set_kt_cred(u.u_kthreadp, old_cred);
1667
1668         /*
1669          * If we reserved memory in vfs_vfdcheck(), (only for NFS) we
1670          * can now unreserve it.
1671          */
1672         if (vm_info.vm_flags & PAGEOUT_RESERVED) {
1673             vm_info.vm_flags &= ~PAGEOUT_RESERVED;
1674             vm_release_malloc_memory();
1675         }
1676
1677         /*
1678          * Update statistics
1679          */
1680         if (steal) {
1681             if (flags & PF_DEACT) {
1682 #if defined(AFS_HPUX110_ENV)
1683                 getppdp()->cnt.v_pswpout += npages;
1684 #else
1685                 mpproc_info[getprocindex()].cnt.v_pswpout += npages;
1686 #endif
1687 /*              sar_bswapout += ptod(npages);*/
1688             } else if (vhand) {
1689 #if defined(AFS_HPUX110_ENV)
1690                 getppdp()->cnt.v_pgout++;
1691                 getppdp()->cnt.v_pgpgout += npages;
1692 #else
1693                 mpproc_info[getprocindex()].cnt.v_pgout++;
1694                 mpproc_info[getprocindex()].cnt.v_pgpgout += npages;
1695 #endif
1696             }
1697         }
1698
1699         /*
1700          * If time and patience have delivered enough
1701          * pages, then quit now while we are ahead.
1702          */
1703         if (VM_STOP_PAGING(&vm_info))
1704             break;
1705
1706         i = VM_END_PAGEOUT_INDX(&vm_info) - VM_BASE_OFFSET(&vm_info) + 1;
1707     }
1708
1709     vm_finish_pageout(&vm_info);        /* update vhand's stealscan */
1710
1711     vmemp_unlockx();
1712
1713     /*
1714      * If we wanted to wait for the I/O to complete, sleep on piocnt.
1715      * We must decrement it by one first, and then make sure that it
1716      * is non-zero before going to sleep.
1717      */
1718     vm_wait_for_io(&vm_info);
1719
1720     if (inode_changed && !file_is_remote) {
1721         imark(ip, IUPD | ICHG);
1722         iupdat(ip, 0, 0);
1723     }
1724     return 0;
1725 }
1726
1727 int
1728 afs_mapdbd(filevp, offset, bn, flags, hole, startidx, endidx)
1729      struct vnode *filevp;
1730      off_t offset;
1731      kern_daddr_t *bn;          /* Block number. */
1732      int flags;                 /* B_READ or B_WRITE */
1733      int *hole;                 /* To be used for read-ahead. */
1734      pgcnt_t *startidx;         /* To be used for read-ahead. */
1735      pgcnt_t *endidx;           /* To be used for read-ahead. */
1736 {
1737     kern_daddr_t lbn, local_bn;
1738     int on;
1739     int err;
1740     long bsize = vtoblksz(filevp) & ~(DEV_BSIZE - 1);
1741
1742     if (startidx)
1743         *startidx = (pgcnt_t) (offset / NBPG);
1744     if (endidx)
1745         *endidx = (pgcnt_t) (offset / NBPG);
1746     if (hole)
1747         *hole = 0;              /* Can't have holes. */
1748     if (bsize <= 0)
1749         osi_Panic("afs_mapdbd: zero size");
1750
1751     lbn = (kern_daddr_t) (offset / bsize);
1752     on = offset % bsize;
1753
1754     err = VOP_BMAP(filevp, lbn, NULL, &local_bn, flags);
1755     VASSERT(err == 0);
1756
1757     /*
1758      * We can never get a bn less than zero on remote files.
1759      */
1760     VASSERT(local_bn >= 0);
1761
1762     local_bn = local_bn + btodb(on);
1763     *bn = local_bn;
1764
1765     return (0);
1766 }
1767
1768 /*
1769  * Return values:
1770  *      1: The blocks are contiguous.
1771  *      0: The blocks are not contiguous.
1772  */
1773 int
1774 afs_vm_fscontiguous(vp, args, cur_data)
1775      struct vnode *vp;
1776      vfspage_t *args;
1777      u_int cur_data;
1778 {
1779     if (cur_data == (VM_END_PAGEOUT_BLK(args) + btodb(NBPG))) {
1780         return (1);
1781     } else {
1782         return (0);
1783     }
1784 }
1785
1786 /*
1787  * Return values:
1788  *      1: Stop, this page is the last in the block.
1789  *      0: Continue on
1790  * Terminate requests at filesystem block boundaries
1791  */
1792 afs_vm_stopio(vp, args)
1793      struct vnode *vp;
1794      vfspage_t *args;
1795 {
1796     fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1797
1798 #if defined(AFS_HPUX1123_ENV)
1799         uint64_t tmpdb;
1800         tmpdb = VM_END_PAGEOUT_BLK(args);
1801
1802         if ((dbtob(tmpdb) + NBPG) % (fsdata->bsize) == 0)
1803 #else
1804     if ((dbtob(VM_END_PAGEOUT_BLK(args)) + NBPG) % (fsdata->bsize) == 0)
1805 #endif /* AFS_HPUX1123_ENV */
1806         {
1807         return (1);
1808     } else {
1809         return (0);
1810     }
1811 }
1812
1813 /*
1814  *      afs_vm_checkpage is called by the VM while collecting a run of
1815  *      pages on a pageout.  afs_vm_checkpage() is called for each page
1816  *      VM wants to write to disk.
1817  */
1818 afs_vm_checkpage(vp, args, pgindx, cur_data)
1819      struct vnode *vp;
1820      vfspage_t *args;
1821      pgcnt_t pgindx;
1822      int cur_data;
1823 {
1824     fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1825
1826     if (fsdata->remote_down) {  /* never happens for AFS */
1827         /*
1828          * The remote system is down.
1829          */
1830         VASSERT(args->run == 0);
1831         return 1;
1832     }
1833     /*
1834      * A dirty page.  If we have not yet determined the file size and
1835      * other attributes that we need to write out pages (the block
1836      * size and ok_dbd_limit), get that information now.
1837      */
1838     if (fsdata->bsize == 0) {
1839         k_off_t isize;
1840         long bsize;
1841         struct vattr va;
1842         struct vnode *filevp;
1843         /*
1844          * Get the various attributes about the file.  Store them
1845          * in args for the next time around.
1846          */
1847         filevp = args->vp;
1848
1849         bsize = vtoblksz(filevp);
1850         args->maxpgs = (pgcnt_t) btop(bsize);
1851
1852         if (VOP_GETATTR(filevp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1853             /*
1854              * The VOP_GETATTR() failed.
1855              * we are vhand, and this is a hard mount, we will
1856              * skip dirty pages for a while and try again later.
1857              */
1858             if (args->vm_flags & PAGEOUT_VHAND) {
1859                 VASSERT(args->run == 0);
1860                 return 1;
1861             }
1862             /*
1863              * This is a "soft" mount, or some other error was
1864              * returned from the server.  Mark this region
1865              * as a zombie, and free this dirty page.
1866              */
1867             VM_ZOMBIE_OBJECT(args);
1868
1869             /*
1870              * The caller will see r_zomb and remove the page
1871              * appropriately.
1872              */
1873             return (1);
1874         }
1875         isize = va.va_size;
1876         fsdata->isize = isize;
1877         fsdata->bsize = bsize;
1878         fsdata->remote = 1;
1879     }
1880     /*
1881      * See if the file has shrunk (this could have happened
1882      * asynchronously because of NFS or DUX).  If so, invalidate
1883      * all of the pages past the end of the file. This is only
1884      * needed for remote files, as local files are truncated
1885      * synchronously.
1886      */
1887
1888     if (vnodindx(VM_REGION(args), pgindx) > fsdata->isize) {
1889         /*
1890          * This page is past the end of the file.  Unlock this page
1891          * (region_trunc will throw it away) and then call region_trunc()
1892          * to invalidate all pages past the new end of the file.
1893          */
1894         VM_SETFS_FLAGS(args, PAGEOUT_TRUNCATE);
1895         return (1);
1896     }
1897 #ifdef notdef
1898     if ((args->vm_flags & PAGEOUT_VHAND)
1899         && (!(args->vm_flags & PAGEOUT_RESERVED))
1900         && (!(VM_IS_ZOMBIE(args)))) {
1901         VASSERT(args->run == 0);
1902         if (vm_reserve_malloc_memory(NFS_PAGEOUT_MEM)) {
1903             /*
1904              * Got enough memory to pageout.  Mark the fact that we did
1905              * a sysprocmemreserve(), so that we can sysprocmemunreserve() it
1906              * later (in remote_pageout()).
1907              */
1908             args->vm_flags |= PAGEOUT_RESERVED;
1909         } else {
1910             /*
1911              * We do not have enough memory to do this pageout.  By
1912              * definition, we do not yet have a run, so we just unlock
1913              * this page and tell foreach_valid() to continue scanning.
1914              * If we come across another dirty page, we will try to
1915              * reserve memory again.  That is okay, in fact some memory
1916              * may have freed up (as earlier pageouts complete under
1917              * interrupt).
1918              */
1919             return 1;
1920         }
1921     }
1922 #endif
1923     return (0);
1924 }
1925
1926 afs_swapfs_len(bp)
1927      struct buf *bp;
1928 {
1929     long fs_bsize;
1930     long max_size;
1931     long bnrem;
1932
1933     fs_bsize = vtoblksz(bp->b_vp);
1934     /*
1935      * Check to see if we are starting mid block.  If so, then
1936      * we must return the remainder of the block or less depending
1937      * on the length.
1938      */
1939     bnrem = bp->b_offset % fs_bsize;
1940     if (bnrem) {
1941         max_size = fs_bsize - bnrem;
1942     } else {
1943         max_size = fs_bsize;
1944     }
1945
1946     if (bp->b_bcount > max_size) {
1947         return (max_size);
1948     } else {
1949         return (bp->b_bcount);
1950     }
1951 }
1952
1953 afs_mmap(vp, off, size_bytes, access)
1954      struct vnode *vp;
1955      u_int off;
1956 #if defined(AFS_HPUX1111_ENV)
1957      u_long size_bytes;
1958 #else
1959      u_int size_bytes;
1960 #endif
1961      int access;
1962 {
1963     long bsize = vtoblksz(vp);
1964
1965     if (bsize % NBPG != 0) {
1966         return (EINVAL);
1967     }
1968
1969     return (0);
1970 }
1971
1972 afs_cachelimit(vp, len, location)
1973      struct vnode *vp;
1974      k_off_t len;
1975      int *location;
1976 {
1977     /*
1978      * Disk addresses are logical, not physical, so fragments are
1979      * transparent.
1980      */
1981     *location = btorp(len) + 1;
1982 }
1983
1984 afs_release(vp)
1985      struct vnode *vp;
1986 {
1987     return (0);
1988 }
1989
1990 int
1991 afs_unmap(vp, off, size_bytes, access)
1992      struct vnode *vp;
1993      u_int off;
1994 #if defined(AFS_HPUX1111_ENV)
1995      u_long size_bytes;
1996 #else
1997      u_int size_bytes;
1998 #endif
1999      int access;
2000 {
2001     return 0;
2002 }
2003
2004 int
2005 afs_read_ahead(vp, prp, wrt, space, vaddr, rhead_cnt)
2006      struct vnode *vp;
2007      preg_t *prp;
2008      int wrt;
2009      space_t space;
2010      caddr_t vaddr;
2011      pgcnt_t *rhead_cnt;
2012 {
2013     printf("afs_read_ahead returning 0 \n");
2014     return 0;
2015 }
2016
2017 int
2018 afs_prealloc(vp, size, ignore_minfree, reserved)
2019      struct vnode *vp;
2020       /* DEE on 11.22 following is off_t */
2021      size_t size;
2022      int ignore_minfree;
2023      int reserved;
2024 {
2025     printf("afs_prealloc returning ENOSPC\n");
2026     return ENOSPC;
2027 }
2028
2029 int
2030 afs_ioctl(vp, com, data, flag, cred)
2031      struct vnode *vp;
2032      int com;
2033      caddr_t data;
2034      int flag;
2035      struct ucred *cred;
2036 {
2037     int error;
2038     struct afs_ioctl afsioctl, *ai;
2039
2040     AFS_STATCNT(afs_ioctl);
2041
2042     /* The call must be a VICEIOCTL call */
2043     if (((com >> 8) & 0xff) == 'V') {
2044 #ifdef notdef
2045         /* AFS_COPYIN returns error 14. Copy data in instead */
2046         AFS_COPYIN(data, (caddr_t) & afsioctl, sizeof(afsioctl), error);
2047         if (error)
2048             return (error);
2049 #endif
2050         ai = (struct afs_ioctl *)data;
2051         afsioctl.in = ai->in;
2052         afsioctl.out = ai->out;
2053         afsioctl.in_size = ai->in_size;
2054         afsioctl.out_size = ai->out_size;
2055         error = HandleIoctl(VTOAFS(vp), com, &afsioctl);
2056         return (error);
2057     }
2058     return (ENOTTY);
2059 }
2060
2061 #if defined(AFS_HPUX1111_ENV)
2062 /* looks like even if appl is 32 bit, we need to round to 8 bytes */
2063 /* This had no effect, it must not be being used */
2064
2065 #define roundtoint(x)   (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2066 #define reclen(dp)      roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2067                                 sizeof(u_int) + 2 * sizeof(u_short)))
2068 #else
2069
2070 #define roundtoint(x)   (((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
2071 #define reclen(dp)      roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2072                                 2 * sizeof(u_short)))
2073 #endif
2074
2075 int
2076 afs_readdir(vp, uiop, cred)
2077      struct vnode *vp;
2078      struct uio *uiop;
2079      struct ucred *cred;
2080 {
2081     struct uio auio;
2082     struct iovec aiov;
2083     caddr_t ibuf, obuf, ibufend, obufend;
2084     struct __dirent32 *idp;
2085     struct dirent *odp;
2086     int count, outcount;
2087     dir_off_t offset;
2088     uint64_t tmp_offset;
2089
2090     count = uiop->uio_resid;
2091     /* Allocate temporary space for format conversion */
2092     ibuf = kmem_alloc(2 * count);       /* overkill - fix later */
2093     obuf = kmem_alloc(count + sizeof(struct dirent));
2094     aiov.iov_base = ibuf;
2095     aiov.iov_len = count;
2096     auio.uio_iov = &aiov;
2097     auio.uio_iovcnt = 1;
2098     offset = auio.uio_offset = uiop->uio_offset;
2099     auio.uio_seg = UIOSEG_KERNEL;
2100     auio.uio_resid = count;
2101     auio.uio_fpflags = 0;
2102
2103     u.u_error = mp_afs_readdir2(vp, &auio, cred);
2104     if (u.u_error)
2105         goto out;
2106
2107     /* Convert entries from __dirent32 to dirent format */
2108
2109     for (idp = (struct __dirent32 *)ibuf, odp =
2110          (struct dirent *)obuf, ibufend =
2111          ibuf + (count - auio.uio_resid), obufend = obuf + count;
2112          (caddr_t) idp < ibufend;
2113          idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2114          (struct dirent *)((caddr_t) odp + odp->d_reclen)) {
2115         odp->d_ino = idp->__d_ino;
2116         odp->d_namlen = idp->__d_namlen;
2117         (void)strcpy(odp->d_name, idp->__d_name);
2118         odp->d_reclen = reclen(odp);
2119         if ((caddr_t) odp + odp->d_reclen > obufend)
2120             break;
2121         /* record offset *after* we're sure to use this entry */
2122         memcpy((char *)&tmp_offset, (char *)&idp->__d_off, sizeof tmp_offset);
2123         offset = tmp_offset;
2124     }
2125
2126     outcount = (caddr_t) odp - obuf;
2127     AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2128     if (u.u_error)
2129         goto out;
2130     uiop->uio_offset = offset;
2131   out:
2132     kmem_free(ibuf, count);
2133     kmem_free(obuf, count + sizeof(struct dirent));
2134     return u.u_error;
2135 }
2136
2137
2138 #define roundtolong(x)   (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2139 #define reclen_dirent64(dp)      roundtolong(((dp)->__d_namlen + 1 + (2*sizeof(u_long)) +\
2140                                 2 * sizeof(u_short)))
2141
2142 int
2143 afs_readdir3(vp, uiop, cred)
2144      struct vnode *vp;
2145      struct uio *uiop;
2146      struct ucred *cred;
2147 {
2148     struct uio auio;
2149     struct iovec aiov;
2150     caddr_t ibuf, obuf, ibufend, obufend;
2151     struct __dirent32 *idp;
2152     struct __dirent64 *odp;
2153     int count, outcount;
2154     dir_off_t offset;
2155
2156     count = uiop->uio_resid;
2157     /* Allocate temporary space for format conversion */
2158     ibuf = kmem_alloc(2 * count);       /* overkill - fix later */
2159     obuf = kmem_alloc(count + sizeof(struct __dirent64));
2160     aiov.iov_base = ibuf;
2161     aiov.iov_len = count;
2162     auio.uio_iov = &aiov;
2163     auio.uio_iovcnt = 1;
2164     offset = auio.uio_offset = uiop->uio_offset;
2165     auio.uio_seg = UIOSEG_KERNEL;
2166     auio.uio_resid = count;
2167     auio.uio_fpflags = 0;
2168
2169     u.u_error = mp_afs_readdir2(vp, &auio, cred);
2170     if (u.u_error)
2171         goto out;
2172
2173     /* Convert entries from __dirent32 to __dirent64 format */
2174
2175     for (idp = (struct __dirent32 *)ibuf, odp =
2176          (struct __dirent64 *)obuf, ibufend =
2177          ibuf + (count - auio.uio_resid), obufend = obuf + count;
2178          (caddr_t) idp < ibufend;
2179          idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2180          (struct __dirent64 *)((caddr_t) odp + odp->__d_reclen)) {
2181         memcpy((char *)&odp->__d_off, (char *)&idp->__d_off,
2182                sizeof odp->__d_off);
2183         odp->__d_ino = idp->__d_ino;
2184         odp->__d_namlen = idp->__d_namlen;
2185         (void)strcpy(odp->__d_name, idp->__d_name);
2186         odp->__d_reclen = reclen_dirent64(odp);
2187         if ((caddr_t) odp + odp->__d_reclen > obufend)
2188             break;
2189         /* record offset *after* we're sure to use this entry */
2190         offset = odp->__d_off;
2191     }
2192
2193     outcount = (caddr_t) odp - obuf;
2194     AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2195     if (u.u_error)
2196         goto out;
2197     uiop->uio_offset = offset;
2198   out:
2199     kmem_free(ibuf, count);
2200     kmem_free(obuf, count + sizeof(struct __dirent64));
2201     return u.u_error;
2202 }
2203
2204 #define AFS_SV_SEMA_HASH 1
2205 #define AFS_SV_SEMA_HASH_DEBUG 0
2206
2207 #if AFS_SV_SEMA_HASH
2208 /* This portion of the code was originally used to implement
2209  * thread specific storage for the semaphore save area. However,
2210  * there were some spare fields in the proc structure, this is
2211  * now being used for the saving semapores.  Hence, this portion of
2212  * the code is no longer used.
2213  */
2214
2215 /* This portion of the code implements thread specific information.
2216  * The thread id is passed in as the key. The semaphore saved area
2217  * is hashed on this key.
2218  */
2219
2220 /* why is this hash table required ?
2221  * The AFS code is written in such a way that a GLOCK() is done in
2222  * one function and the GUNLOCK() is done in another function further
2223  * down the call chain. The GLOCK() call has to save the current
2224  * semaphore status before acquiring afs_global_sema. The GUNLOCK
2225  * has to release afs_global_sema and reacquire the sempahore status
2226  * that existed before the corresponding GLOCK. If GLOCK() and
2227  * GUNLOCK() were called in the same function, the GLOCK call could
2228  * have stored the saved sempahore status in a local variable and the
2229  * corresponding GUNLOCK() call could have restored the original
2230  * status from this local variable. But this is not the case with
2231  * AFS code. Hence, we have to implement a thread specific semaphore
2232  * save area. This is implemented as a hash table. The key is the
2233  * thread id.
2234  */
2235
2236 /* In order for multithreaded processes to work, the sv_sema structures
2237  * must be saved on a per-thread basis, not a per-process basis.  There
2238  * is no per-thread storage available to hijack in the OS per-thread
2239  * data structures (e.g. struct user) so we revive this code.
2240  * I removed the upper limit on the memory consumption since we don't
2241  * know how many threads there will be.  Now the code first checks the
2242  * freeList.  If that fails it then tries garbage collecting.  If that
2243  * doesn't free up anything then it allocs what it needs.
2244  */
2245
2246 #define ELEMENT         sv_sema_t
2247 #define KEY             tid_t
2248 #define Hash(xx)        (  (xx) % sizeOfHashTable )
2249 #define hashLockInit(xx) initsema(&xx,1, FILESYS_SEMA_PRI, FILESYS_SEMA_ORDER)
2250 #define hashLock(xx)    MP_PSEMA(&xx)
2251 #define hashUnlock(xx)  MP_VSEMA(&xx)
2252
2253 typedef struct elem {
2254     struct elem *next;
2255     ELEMENT element;
2256     KEY key;
2257     int refCnt;
2258 } Element;
2259
2260 typedef struct bucket {
2261     sema_t lock;
2262     Element *element;
2263 } Bucket;
2264
2265 static int sizeOfHashTable;
2266 static Bucket *hashTable;
2267
2268 static int currentSize = 0;
2269 static Element *freeList;       /* free list */
2270
2271 #pragma align 64
2272 static sema_t afsHashLock = { 0 };      /* global lock for hash table */
2273
2274 static void afsHashGarbageCollect();
2275
2276 /*
2277 ** The global lock protects the global data structures,
2278 ** e.g. freeList and currentSize.
2279 ** The bucket lock protects the link list hanging off that bucket.
2280 ** The lock hierarchy : one can obtain the bucket lock while holding
2281 ** the global lock, but not vice versa.
2282 */
2283
2284
2285 void
2286 afsHash(int nbuckets)
2287 {                               /* allocate the hash table */
2288     int i;
2289
2290 #if AFS_SV_SEMA_HASH_DEBUG
2291     printf("afsHash: enter\n");
2292 #endif
2293
2294     sizeOfHashTable = nbuckets;
2295     currentSize = nbuckets * sizeof(Bucket);
2296
2297     if (hashTable)
2298         osi_Panic("afs: SEMA Hashtable already created\n");
2299
2300     hashTable = (Bucket *) AFS_KALLOC(sizeOfHashTable * sizeof(Bucket));
2301     if (!hashTable)
2302         osi_Panic("afs: cannot create SEMA Hashtable\n");
2303
2304     /* initialize the hash table and associated locks */
2305     memset((char *)hashTable, 0, sizeOfHashTable * sizeof(Bucket));
2306     for (i = 0; i < sizeOfHashTable; i++)
2307         hashLockInit(hashTable[i].lock);
2308     hashLockInit(afsHashLock);
2309
2310 #if AFS_SV_SEMA_HASH_DEBUG
2311     printf("afsHash: exit\n");
2312 #endif
2313 }
2314
2315 ELEMENT *
2316 afsHashInsertFind(KEY key)
2317 {
2318     int index;
2319     Element *ptr;
2320
2321 #if AFS_SV_SEMA_HASH_DEBUG
2322     printf("afsHashInsertFind: %d\n", key);
2323 #endif
2324     if (!hashTable)
2325         osi_Panic("afs: afsHashInsertFind: no hashTable\n");
2326
2327     index = Hash(key);          /* get bucket number */
2328     hashLock(hashTable[index].lock);    /* lock this bucket */
2329     ptr = hashTable[index].element;
2330
2331     /* if it is already there */
2332     while (ptr) {
2333         if (ptr->key == key) {
2334             ptr->refCnt++;      /* hold it */
2335             hashUnlock(hashTable[index].lock);
2336 #if AFS_SV_SEMA_HASH_DEBUG
2337             printf("afsHashInsertFind: %d FOUND\n", key);
2338 #endif
2339             return &(ptr->element);
2340         } else {
2341             ptr = ptr->next;
2342         }
2343     }
2344
2345     hashUnlock(hashTable[index].lock);
2346
2347     /*  if something exists in the freeList, take it from there */
2348     ptr = NULL;
2349     hashLock(afsHashLock);
2350
2351     if (freeList) {
2352         ptr = freeList;         /* reuse entry */
2353         freeList = freeList->next;
2354     } else {
2355         afsHashGarbageCollect();        /* afsHashLock locked */
2356         if (freeList) {
2357             ptr = freeList;     /* reuse entry */
2358             freeList = freeList->next;
2359         } else {
2360             ptr = (Element *) AFS_KALLOC(sizeof(Element));
2361         }
2362     }
2363
2364     currentSize += sizeof(Element);     /* update memory used */
2365     hashUnlock(afsHashLock);
2366
2367     if (!ptr)
2368         osi_Panic("afs: SEMA Hashtable cannot create new entry\n");
2369     /* create new entry */
2370     ptr->key = key;
2371     memset((char *)&ptr->element, 0, sizeof(ptr->element));
2372     ptr->refCnt = 1;            /* this guy */
2373
2374     /* insert new entry in bucket */
2375     hashLock(hashTable[index].lock);    /* lock this bucket */
2376     ptr->next = hashTable[index].element;
2377     hashTable[index].element = ptr;
2378     hashUnlock(hashTable[index].lock);
2379
2380 #if AFS_SV_SEMA_HASH_DEBUG
2381     printf("afsHashInsertFind: %d MADE\n", key);
2382 #endif
2383
2384     return &(ptr->element);
2385 }
2386
2387 ELEMENT *
2388 afsHashFind(KEY key)
2389 {
2390     int index;
2391     Element *ptr;
2392
2393 #if AFS_SV_SEMA_HASH_DEBUG
2394     printf("afsHashFind: %d\n", key);
2395 #endif
2396     if (!hashTable)
2397         osi_Panic("afs: afsHashFind: no hashTable\n");
2398
2399     index = Hash(key);          /* get bucket number */
2400     hashLock(hashTable[index].lock);    /* lock this bucket */
2401     ptr = hashTable[index].element;
2402
2403     /* it should be in the hash table */
2404     while (ptr) {
2405         if (ptr->key == key) {
2406             if (ptr->refCnt <= 0)
2407                 osi_Panic("afs: SEMA HashTable entry already released\n");
2408             hashUnlock(hashTable[index].lock);
2409 #if AFS_SV_SEMA_HASH_DEBUG
2410             printf("afsHashFind: %d FOUND\n", key);
2411 #endif
2412             return &(ptr->element);
2413         } else {
2414             ptr = ptr->next;
2415         }
2416     }
2417
2418     hashUnlock(hashTable[index].lock);
2419     /* it better be in the hash table */
2420     osi_Panic("afs: SEMA HashTable wants non-existent entry \n");
2421     return 0;
2422 }
2423
2424 void
2425 afsHashRelease(KEY key)
2426 {
2427     int index;
2428     Element *ptr;
2429
2430 #if AFS_SV_SEMA_HASH_DEBUG
2431     printf("afsHashRelease: %d\n", key);
2432 #endif
2433     if (!hashTable)
2434         osi_Panic("afs: afsHashRelease: no hashTable\n");
2435
2436     index = Hash(key);          /* get bucket number */
2437     hashLock(hashTable[index].lock);    /* lock this bucket */
2438     ptr = hashTable[index].element;
2439
2440     /* it should be in the hash table */
2441     while (ptr) {
2442         if (ptr->key == key) {
2443             if (ptr->refCnt <= 0)
2444                 osi_Panic("afs: SEMA HashTable entry already released\n");
2445             ptr->refCnt--;      /* release this guy */
2446             hashUnlock(hashTable[index].lock);
2447 #if AFS_SV_SEMA_HASH_DEBUG
2448             printf("afsHashRelease: %d FOUND\n", key);
2449 #endif
2450             return;
2451         } else {
2452             ptr = ptr->next;
2453         }
2454     }
2455
2456     hashUnlock(hashTable[index].lock);
2457     /* it better be in the hash table */
2458     osi_Panic("afs: SEMA HashTable deleting non-existent entry \n");
2459 }
2460
2461 /* this should be called with afsHashLock WRITE locked */
2462 static void
2463 afsHashGarbageCollect()
2464 {
2465     int index;
2466     Element *ptr;
2467     int foundFlag = 0;
2468
2469     if (!hashTable)
2470         osi_Panic("afs: afsHashGarbageCollect: no hashTable\n");
2471
2472     for (index = 0; index < sizeOfHashTable; index++) {
2473         hashLock(hashTable[index].lock);
2474         ptr = hashTable[index].element; /* pick up bucket */
2475
2476         while (ptr && !ptr->refCnt) {
2477             /* insert this element into free list */
2478             Element *temp;
2479             temp = ptr->next;
2480             ptr->next = freeList;
2481             freeList = ptr;
2482
2483             foundFlag = 1;      /* found at least one */
2484             currentSize -= sizeof(Element);
2485             ptr = temp;
2486         }
2487         hashTable[index].element = ptr;
2488
2489         /* scan thru the remaining list */
2490         if (ptr) {
2491             while (ptr->next) {
2492                 if (ptr->next->refCnt == 0) {
2493                     /* collect this element */
2494                     Element *temp;
2495                     temp = ptr->next;
2496                     ptr->next = ptr->next->next;
2497                     temp->next = freeList;
2498                     freeList = temp;
2499                     foundFlag = 1;
2500                     currentSize -= sizeof(Element);
2501                 } else {
2502                     ptr = ptr->next;
2503                 }
2504             }
2505         }
2506         hashUnlock(hashTable[index].lock);
2507     }
2508 #if 0
2509     if (!foundFlag)
2510         osi_Panic("afs: SEMA HashTable full\n");
2511 #endif
2512 }
2513
2514 #endif /* AFS_SV_SEMA_HASH */
2515
2516
2517 afs_hp_strategy(bp)
2518      register struct buf *bp;
2519 {
2520     register afs_int32 code;
2521     struct uio tuio;
2522     struct iovec tiovec[1];
2523     extern caddr_t hdl_kmap_bp();
2524     register struct kthread *t = u.u_kthreadp;
2525
2526     AFS_STATCNT(afs_hp_strategy);
2527     /*
2528      * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after
2529      * the I/O.  We must save and restore the count because pageiodone()
2530      * uses b_bcount to determine how many pages to unlock.
2531      *
2532      * Remap the entire range.
2533      */
2534     hdl_kmap_bp(bp);
2535
2536     AFS_GLOCK();
2537     afs_Trace4(afs_iclSetp, CM_TRACE_HPSTRAT, ICL_TYPE_POINTER, bp->b_vp,
2538                ICL_TYPE_LONG, (int)bp->b_blkno * DEV_BSIZE, ICL_TYPE_LONG,
2539                bp->b_bcount, ICL_TYPE_LONG, 0);
2540
2541     /* Set up the uio structure */
2542     tuio.afsio_iov = tiovec;
2543     tuio.afsio_iovcnt = 1;
2544     tuio.afsio_offset = DEV_BSIZE * bp->b_blkno;
2545     tuio.afsio_seg = AFS_UIOSYS;
2546     tuio.afsio_resid = bp->b_bcount;
2547     tuio.uio_fpflags = 0;
2548     tiovec[0].iov_base = bp->b_un.b_addr;
2549     tiovec[0].iov_len = bp->b_bcount;
2550
2551     /* Do the I/O */
2552     if ((bp->b_flags & B_READ) == B_READ) {
2553         /* read b_bcount bytes into kernel address b_un.b_addr
2554          * starting at byte DEV_BSIZE * b_blkno. Bzero anything
2555          * we can't read, and finally call iodone(bp).  File is
2556          * in bp->b_vp. Credentials are from u area??
2557          */
2558         code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_READ, 0, kt_cred(t));
2559         if (code == 0)
2560             if (tuio.afsio_resid > 0) {
2561                 privlbzero(bvtospace(bp, bp->b_un.b_addr),
2562                            bp->b_un.b_addr + bp->b_bcount - tuio.afsio_resid,
2563                            (size_t) tuio.afsio_resid);
2564
2565             }
2566     } else
2567         code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_WRITE, 0, kt_cred(t));
2568
2569     /* Remap back to the user's space */
2570     hdl_remap_bp(bp);
2571
2572     AFS_GUNLOCK();
2573
2574     iodone(bp);
2575     return code;
2576 }
2577
2578 afs_pathconf(vp, name, resultp, cred)
2579      struct vnode *vp;
2580      int name;
2581      int *resultp;
2582      struct ucred *cred;        /* unused */
2583 {
2584     switch (name) {
2585     case _PC_LINK_MAX:          /* Maximum number of links to a file */
2586         *resultp = 255;         /* an unsigned short on the fileserver */
2587         break;                  /* a unsigned char in the client.... */
2588
2589     case _PC_NAME_MAX:          /* Max length of file name */
2590         *resultp = 255;
2591         break;
2592
2593     case _PC_PATH_MAX:          /* Maximum length of Path Name */
2594         *resultp = 1024;
2595         break;
2596
2597     case _PC_PIPE_BUF:          /* Max atomic write to pipe.  See fifo_vnops */
2598     case _PC_CHOWN_RESTRICTED:  /* Anybody can chown? */
2599     case _PC_NO_TRUNC:          /* No file name truncation on overflow? */
2600         u.u_error = EOPNOTSUPP;
2601         return (EOPNOTSUPP);
2602         break;
2603
2604     case _PC_MAX_CANON: /* TTY buffer size for canonical input */
2605         /* need more work here for pty, ite buffer size, if differ */
2606         if (vp->v_type != VCHR) {
2607             u.u_error = EINVAL;
2608             return (EINVAL);
2609         }
2610         *resultp = CANBSIZ;     /*for tty */
2611         break;
2612
2613     case _PC_MAX_INPUT:
2614         /* need more work here for pty, ite buffer size, if differ */
2615         if (vp->v_type != VCHR) {       /* TTY buffer size */
2616             u.u_error = EINVAL;
2617             return (EINVAL);
2618         }
2619         *resultp = TTYHOG;      /*for tty */
2620         break;
2621
2622     case _PC_VDISABLE:
2623         /* Terminal special characters can be disabled? */
2624         if (vp->v_type != VCHR) {
2625             u.u_error = EINVAL;
2626             return (EINVAL);
2627         }
2628         *resultp = 1;
2629         break;
2630
2631     case _PC_SYNC_IO:
2632         if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2633             *resultp = -1;
2634             return EINVAL;
2635         }
2636         *resultp = 1;           /* Synchronized IO supported for this file */
2637         break;
2638
2639     case _PC_FILESIZEBITS:
2640         if (vp->v_type != VDIR)
2641             return (EINVAL);
2642         *resultp = MAX_SMALL_FILE_BITS;
2643         break;
2644
2645     default:
2646         return (EINVAL);
2647     }
2648
2649     return (0);
2650 }