src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13 #ifdef KERNEL
  14 # if defined(UKERNEL)
  15 #  include "afs/sysincludes.h"
  16 #  include "afsincludes.h"
  17 #  include "rx_kcommon.h"
  18 # else /* defined(UKERNEL) */
  19 #  ifdef RX_KERNEL_TRACE
  20 #   include "rx_kcommon.h"
  21 #  endif
  22 #  include "h/types.h"
  23 #  ifndef AFS_LINUX20_ENV
  24 #   include "h/systm.h"
  25 #  endif
  26 #  if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
  27 #   include "afs/sysincludes.h"
  28 #  endif
  29 #  if defined(AFS_OBSD_ENV)
  30 #   include "h/proc.h"
  31 #  endif
  32 #  include "h/socket.h"
  33 #  if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  34 #   if  !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  35 #    include "sys/mount.h"              /* it gets pulled in by something later anyway */
  36 #   endif
  37 #   include "h/mbuf.h"
  38 #  endif
  39 #  include "netinet/in.h"
  40 #  include "afs/afs_osi.h"
  41 #  include "rx_kmutex.h"
  42 # endif /* defined(UKERNEL) */
  43 #else /* KERNEL */
  44 # include <roken.h>
  45 # include <assert.h>
  46 # include <afs/opr.h>
  47 # if defined(AFS_NT40_ENV)
  48 #  ifndef EWOULDBLOCK
  49 #   define EWOULDBLOCK WSAEWOULDBLOCK
  50 #  endif
  51 #  include "rx_user.h"
  52 #  include "rx_xmit_nt.h"
  53 # endif
  54 # include <lwp.h>
  55 #endif /* KERNEL */
  56
  57 #ifdef  AFS_SUN5_ENV
  58 # include <sys/sysmacros.h>
  59 #endif
  60
  61 #include "rx.h"
  62 #include "rx_clock.h"
  63 #include "rx_queue.h"
  64 #include "rx_packet.h"
  65 #include "rx_atomic.h"
  66 #include "rx_globals.h"
  67 #include "rx_internal.h"
  68 #include "rx_stats.h"
  69
  70 #include "rx_peer.h"
  71 #include "rx_conn.h"
  72 #include "rx_call.h"
  73
  74 #ifdef RX_LOCKS_DB
  75 /* rxdb_fileID is used to identify the lock location, along with line#. */
  76 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  77 #endif /* RX_LOCKS_DB */
  78 static struct rx_packet *rx_mallocedP = 0;
  79 #ifdef RXDEBUG_PACKET
  80 static afs_uint32       rx_packet_id = 0;
  81 #endif
  82
  83 extern char cml_version_number[];
  84
  85 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
  86
  87 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
  88                                 afs_uint32 ahost, short aport,
  89                                 afs_int32 istack);
  90 static struct rx_packet *rxi_AllocPacketNoLock(int class);
  91
  92 #ifndef KERNEL
  93 static void rxi_MorePacketsNoLock(int apackets);
  94 #endif
  95
  96 #ifdef RX_ENABLE_TSFPQ
  97 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
  98                                  int flush_global);
  99 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
 100                                         int allow_overcommit);
 101 #else
 102 static void rxi_FreePacketNoLock(struct rx_packet *p);
 103 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
 104 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
 105                                    struct rx_queue * q);
 106 #endif
 107
 108 /* some rules about packets:
 109  * 1.  When a packet is allocated, the final iov_buf contains room for
 110  * a security trailer, but iov_len masks that fact.  If the security
 111  * package wants to add the trailer, it may do so, and then extend
 112  * iov_len appropriately.  For this reason, packet's niovecs and
 113  * iov_len fields should be accurate before calling PreparePacket.
 114 */
 115
 116 /* Preconditions:
 117  *        all packet buffers (iov_base) are integral multiples of
 118  *        the word size.
 119  *        offset is an integral multiple of the word size.
 120  */
 121 afs_int32
 122 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 123 {
 124     unsigned int i;
 125     size_t l;
 126     for (l = 0, i = 1; i < packet->niovecs; i++) {
 127         if (l + packet->wirevec[i].iov_len > offset) {
 128             return
 129                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 130                                  (offset - l)));
 131         }
 132         l += packet->wirevec[i].iov_len;
 133     }
 134
 135     return 0;
 136 }
 137
 138 /* Preconditions:
 139  *        all packet buffers (iov_base) are integral multiples of the word size.
 140  *        offset is an integral multiple of the word size.
 141  */
 142 afs_int32
 143 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 144 {
 145     unsigned int i;
 146     size_t l;
 147     for (l = 0, i = 1; i < packet->niovecs; i++) {
 148         if (l + packet->wirevec[i].iov_len > offset) {
 149             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 150                              (offset - l))) = data;
 151             return 0;
 152         }
 153         l += packet->wirevec[i].iov_len;
 154     }
 155
 156     return 0;
 157 }
 158
 159 /* Preconditions:
 160  *        all packet buffers (iov_base) are integral multiples of the
 161  *        word size.
 162  *        offset is an integral multiple of the word size.
 163  * Packet Invariants:
 164  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 165  */
 166 afs_int32
 167 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 168                   char *out)
 169 {
 170     unsigned int i, j, l, r;
 171     for (l = 0, i = 1; i < packet->niovecs; i++) {
 172         if (l + packet->wirevec[i].iov_len > offset) {
 173             break;
 174         }
 175         l += packet->wirevec[i].iov_len;
 176     }
 177
 178     /* i is the iovec which contains the first little bit of data in which we
 179      * are interested.  l is the total length of everything prior to this iovec.
 180      * j is the number of bytes we can safely copy out of this iovec.
 181      * offset only applies to the first iovec.
 182      */
 183     r = resid;
 184     while ((r > 0) && (i < packet->niovecs)) {
 185         j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
 186         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 187         r -= j;
 188         out += j;
 189         l += packet->wirevec[i].iov_len;
 190         offset = l;
 191         i++;
 192     }
 193
 194     return (r ? (resid - r) : resid);
 195 }
 196
 197
 198 /* Preconditions:
 199  *        all packet buffers (iov_base) are integral multiples of the
 200  *        word size.
 201  *        offset is an integral multiple of the word size.
 202  */
 203 afs_int32
 204 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 205 {
 206     unsigned int i, j, l, o, r;
 207     char *b;
 208
 209     for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
 210         if (l + packet->wirevec[i].iov_len > o) {
 211             break;
 212         }
 213         l += packet->wirevec[i].iov_len;
 214     }
 215
 216     /* i is the iovec which contains the first little bit of data in which we
 217      * are interested.  l is the total length of everything prior to this iovec.
 218      * j is the number of bytes we can safely copy out of this iovec.
 219      * offset only applies to the first iovec.
 220      */
 221     r = resid;
 222     while ((r > 0) && (i <= RX_MAXWVECS)) {
 223         if (i >= packet->niovecs)
 224             if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0)     /* ++niovecs as a side-effect */
 225                 break;
 226
 227         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 228         j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
 229         memcpy(b, in, j);
 230         r -= j;
 231         in += j;
 232         l += packet->wirevec[i].iov_len;
 233         offset = l;
 234         i++;
 235     }
 236
 237     return (r ? (resid - r) : resid);
 238 }
 239
 240 int
 241 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
 242 {
 243     struct rx_packet *p, *np;
 244
 245     num_pkts = AllocPacketBufs(class, num_pkts, q);
 246
 247     for (queue_Scan(q, p, np, rx_packet)) {
 248         RX_PACKET_IOV_FULLINIT(p);
 249     }
 250
 251     return num_pkts;
 252 }
 253
 254 #ifdef RX_ENABLE_TSFPQ
 255 static int
 256 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 257 {
 258     struct rx_ts_info_t * rx_ts_info;
 259     int transfer;
 260     SPLVAR;
 261
 262     RX_TS_INFO_GET(rx_ts_info);
 263
 264     transfer = num_pkts - rx_ts_info->_FPQ.len;
 265     if (transfer > 0) {
 266         NETPRI;
 267         MUTEX_ENTER(&rx_freePktQ_lock);
 268         transfer = MAX(transfer, rx_TSFPQGlobSize);
 269         if (transfer > rx_nFreePackets) {
 270             /* alloc enough for us, plus a few globs for other threads */
 271             rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
 272         }
 273
 274         RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
 275
 276         MUTEX_EXIT(&rx_freePktQ_lock);
 277         USERPRI;
 278     }
 279
 280     RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
 281
 282     return num_pkts;
 283 }
 284 #else /* RX_ENABLE_TSFPQ */
 285 static int
 286 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 287 {
 288     struct rx_packet *c;
 289     int i;
 290 #ifdef KERNEL
 291     int overq = 0;
 292 #endif
 293     SPLVAR;
 294
 295     NETPRI;
 296
 297     MUTEX_ENTER(&rx_freePktQ_lock);
 298
 299 #ifdef KERNEL
 300     for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
 301          num_pkts--, overq++);
 302
 303     if (overq) {
 304         rxi_NeedMorePackets = TRUE;
 305         if (rx_stats_active) {
 306             switch (class) {
 307             case RX_PACKET_CLASS_RECEIVE:
 308                 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
 309                 break;
 310             case RX_PACKET_CLASS_SEND:
 311                 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
 312                 break;
 313             case RX_PACKET_CLASS_SPECIAL:
 314                 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
 315                 break;
 316             case RX_PACKET_CLASS_RECV_CBUF:
 317                 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
 318                 break;
 319             case RX_PACKET_CLASS_SEND_CBUF:
 320                 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
 321                 break;
 322             }
 323         }
 324     }
 325
 326     if (rx_nFreePackets < num_pkts)
 327         num_pkts = rx_nFreePackets;
 328
 329     if (!num_pkts) {
 330         rxi_NeedMorePackets = TRUE;
 331         goto done;
 332     }
 333 #else /* KERNEL */
 334     if (rx_nFreePackets < num_pkts) {
 335         rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
 336     }
 337 #endif /* KERNEL */
 338
 339     for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
 340          i < num_pkts;
 341          i++, c=queue_Next(c, rx_packet)) {
 342         RX_FPQ_MARK_USED(c);
 343     }
 344
 345     queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
 346
 347     rx_nFreePackets -= num_pkts;
 348
 349 #ifdef KERNEL
 350   done:
 351 #endif
 352     MUTEX_EXIT(&rx_freePktQ_lock);
 353
 354     USERPRI;
 355     return num_pkts;
 356 }
 357 #endif /* RX_ENABLE_TSFPQ */
 358
 359 /*
 360  * Free a packet currently used as a continuation buffer
 361  */
 362 #ifdef RX_ENABLE_TSFPQ
 363 /* num_pkts=0 means queue length is unknown */
 364 int
 365 rxi_FreePackets(int num_pkts, struct rx_queue * q)
 366 {
 367     struct rx_ts_info_t * rx_ts_info;
 368     struct rx_packet *c, *nc;
 369     SPLVAR;
 370
 371     osi_Assert(num_pkts >= 0);
 372     RX_TS_INFO_GET(rx_ts_info);
 373
 374     if (!num_pkts) {
 375         for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
 376             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 377         }
 378     } else {
 379         for (queue_Scan(q, c, nc, rx_packet)) {
 380             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 381         }
 382     }
 383
 384     if (num_pkts) {
 385         RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
 386     }
 387
 388     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 389         NETPRI;
 390         MUTEX_ENTER(&rx_freePktQ_lock);
 391
 392         RX_TS_FPQ_LTOG(rx_ts_info);
 393
 394         /* Wakeup anyone waiting for packets */
 395         rxi_PacketsUnWait();
 396
 397         MUTEX_EXIT(&rx_freePktQ_lock);
 398         USERPRI;
 399     }
 400
 401     return num_pkts;
 402 }
 403 #else /* RX_ENABLE_TSFPQ */
 404 /* num_pkts=0 means queue length is unknown */
 405 int
 406 rxi_FreePackets(int num_pkts, struct rx_queue *q)
 407 {
 408     struct rx_queue cbs;
 409     struct rx_packet *p, *np;
 410     int qlen = 0;
 411     SPLVAR;
 412
 413     osi_Assert(num_pkts >= 0);
 414     queue_Init(&cbs);
 415
 416     if (!num_pkts) {
 417         for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
 418             if (p->niovecs > 2) {
 419                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 420             }
 421             RX_FPQ_MARK_FREE(p);
 422         }
 423         if (!num_pkts)
 424             return 0;
 425     } else {
 426         for (queue_Scan(q, p, np, rx_packet)) {
 427             if (p->niovecs > 2) {
 428                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 429             }
 430             RX_FPQ_MARK_FREE(p);
 431         }
 432     }
 433
 434     if (qlen) {
 435         queue_SpliceAppend(q, &cbs);
 436         qlen += num_pkts;
 437     } else
 438         qlen = num_pkts;
 439
 440     NETPRI;
 441     MUTEX_ENTER(&rx_freePktQ_lock);
 442
 443     queue_SpliceAppend(&rx_freePacketQueue, q);
 444     rx_nFreePackets += qlen;
 445
 446     /* Wakeup anyone waiting for packets */
 447     rxi_PacketsUnWait();
 448
 449     MUTEX_EXIT(&rx_freePktQ_lock);
 450     USERPRI;
 451
 452     return num_pkts;
 453 }
 454 #endif /* RX_ENABLE_TSFPQ */
 455
 456 /* this one is kind of awful.
 457  * In rxkad, the packet has been all shortened, and everything, ready for
 458  * sending.  All of a sudden, we discover we need some of that space back.
 459  * This isn't terribly general, because it knows that the packets are only
 460  * rounded up to the EBS (userdata + security header).
 461  */
 462 int
 463 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 464 {
 465     int i;
 466     i = p->niovecs - 1;
 467     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 468         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 469             p->wirevec[i].iov_len += nb;
 470             return 0;
 471         }
 472     } else {
 473         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 474             p->wirevec[i].iov_len += nb;
 475             return 0;
 476         }
 477     }
 478
 479     return 0;
 480 }
 481
 482 /* get sufficient space to store nb bytes of data (or more), and hook
 483  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 484  * returns the number of bytes >0 which it failed to come up with.
 485  * Don't need to worry about locking on packet, since only
 486  * one thread can manipulate one at a time. Locking on continution
 487  * packets is handled by AllocPacketBufs */
 488 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 489 int
 490 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 491 {
 492     int i, nv;
 493     struct rx_queue q;
 494     struct rx_packet *cb, *ncb;
 495
 496     /* compute the number of cbuf's we need */
 497     nv = nb / RX_CBUFFERSIZE;
 498     if ((nv * RX_CBUFFERSIZE) < nb)
 499         nv++;
 500     if ((nv + p->niovecs) > RX_MAXWVECS)
 501         nv = RX_MAXWVECS - p->niovecs;
 502     if (nv < 1)
 503         return nb;
 504
 505     /* allocate buffers */
 506     queue_Init(&q);
 507     nv = AllocPacketBufs(class, nv, &q);
 508
 509     /* setup packet iovs */
 510     for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
 511         queue_Remove(cb);
 512         p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 513         p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 514     }
 515
 516     nb -= (nv * RX_CBUFFERSIZE);
 517     p->length += (nv * RX_CBUFFERSIZE);
 518     p->niovecs += nv;
 519
 520     return nb;
 521 }
 522
 523 /* Add more packet buffers */
 524 #ifdef RX_ENABLE_TSFPQ
 525 void
 526 rxi_MorePackets(int apackets)
 527 {
 528     struct rx_packet *p, *e;
 529     struct rx_ts_info_t * rx_ts_info;
 530     int getme;
 531     SPLVAR;
 532
 533     getme = apackets * sizeof(struct rx_packet);
 534     p = osi_Alloc(getme);
 535     osi_Assert(p);
 536
 537     PIN(p, getme);              /* XXXXX */
 538     memset(p, 0, getme);
 539     RX_TS_INFO_GET(rx_ts_info);
 540
 541     RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
 542     /* TSFPQ patch also needs to keep track of total packets */
 543
 544     MUTEX_ENTER(&rx_packets_mutex);
 545     rx_nPackets += apackets;
 546     RX_TS_FPQ_COMPUTE_LIMITS;
 547     MUTEX_EXIT(&rx_packets_mutex);
 548
 549     for (e = p + apackets; p < e; p++) {
 550         RX_PACKET_IOV_INIT(p);
 551         p->niovecs = 2;
 552
 553         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 554
 555         NETPRI;
 556         MUTEX_ENTER(&rx_freePktQ_lock);
 557 #ifdef RXDEBUG_PACKET
 558         p->packetId = rx_packet_id++;
 559         p->allNextp = rx_mallocedP;
 560 #endif /* RXDEBUG_PACKET */
 561         rx_mallocedP = p;
 562         MUTEX_EXIT(&rx_freePktQ_lock);
 563         USERPRI;
 564     }
 565     rx_ts_info->_FPQ.delta += apackets;
 566
 567     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 568         NETPRI;
 569         MUTEX_ENTER(&rx_freePktQ_lock);
 570
 571         RX_TS_FPQ_LTOG(rx_ts_info);
 572         rxi_NeedMorePackets = FALSE;
 573         rxi_PacketsUnWait();
 574
 575         MUTEX_EXIT(&rx_freePktQ_lock);
 576         USERPRI;
 577     }
 578 }
 579 #else /* RX_ENABLE_TSFPQ */
 580 void
 581 rxi_MorePackets(int apackets)
 582 {
 583     struct rx_packet *p, *e;
 584     int getme;
 585     SPLVAR;
 586
 587     getme = apackets * sizeof(struct rx_packet);
 588     p = osi_Alloc(getme);
 589     osi_Assert(p);
 590
 591     PIN(p, getme);              /* XXXXX */
 592     memset(p, 0, getme);
 593     NETPRI;
 594     MUTEX_ENTER(&rx_freePktQ_lock);
 595
 596     for (e = p + apackets; p < e; p++) {
 597         RX_PACKET_IOV_INIT(p);
 598 #ifdef RX_TRACK_PACKETS
 599         p->flags |= RX_PKTFLAG_FREE;
 600 #endif
 601         p->niovecs = 2;
 602
 603         queue_Append(&rx_freePacketQueue, p);
 604 #ifdef RXDEBUG_PACKET
 605         p->packetId = rx_packet_id++;
 606         p->allNextp = rx_mallocedP;
 607 #endif /* RXDEBUG_PACKET */
 608         rx_mallocedP = p;
 609     }
 610
 611     rx_nPackets += apackets;
 612     rx_nFreePackets += apackets;
 613     rxi_NeedMorePackets = FALSE;
 614     rxi_PacketsUnWait();
 615
 616     MUTEX_EXIT(&rx_freePktQ_lock);
 617     USERPRI;
 618 }
 619 #endif /* RX_ENABLE_TSFPQ */
 620
 621 #ifdef RX_ENABLE_TSFPQ
 622 void
 623 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
 624 {
 625     struct rx_packet *p, *e;
 626     struct rx_ts_info_t * rx_ts_info;
 627     int getme;
 628     SPLVAR;
 629
 630     getme = apackets * sizeof(struct rx_packet);
 631     p = osi_Alloc(getme);
 632
 633     PIN(p, getme);              /* XXXXX */
 634     memset(p, 0, getme);
 635     RX_TS_INFO_GET(rx_ts_info);
 636
 637     RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
 638     /* TSFPQ patch also needs to keep track of total packets */
 639     MUTEX_ENTER(&rx_packets_mutex);
 640     rx_nPackets += apackets;
 641     RX_TS_FPQ_COMPUTE_LIMITS;
 642     MUTEX_EXIT(&rx_packets_mutex);
 643
 644     for (e = p + apackets; p < e; p++) {
 645         RX_PACKET_IOV_INIT(p);
 646         p->niovecs = 2;
 647         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 648
 649         NETPRI;
 650         MUTEX_ENTER(&rx_freePktQ_lock);
 651 #ifdef RXDEBUG_PACKET
 652         p->packetId = rx_packet_id++;
 653         p->allNextp = rx_mallocedP;
 654 #endif /* RXDEBUG_PACKET */
 655         rx_mallocedP = p;
 656         MUTEX_EXIT(&rx_freePktQ_lock);
 657         USERPRI;
 658     }
 659     rx_ts_info->_FPQ.delta += apackets;
 660
 661     if (flush_global &&
 662         (num_keep_local < apackets)) {
 663         NETPRI;
 664         MUTEX_ENTER(&rx_freePktQ_lock);
 665
 666         RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
 667         rxi_NeedMorePackets = FALSE;
 668         rxi_PacketsUnWait();
 669
 670         MUTEX_EXIT(&rx_freePktQ_lock);
 671         USERPRI;
 672     }
 673 }
 674 #endif /* RX_ENABLE_TSFPQ */
 675
 676 #ifndef KERNEL
 677 /* Add more packet buffers */
 678 static void
 679 rxi_MorePacketsNoLock(int apackets)
 680 {
 681 #ifdef RX_ENABLE_TSFPQ
 682     struct rx_ts_info_t * rx_ts_info;
 683 #endif /* RX_ENABLE_TSFPQ */
 684     struct rx_packet *p, *e;
 685     int getme;
 686
 687     /* allocate enough packets that 1/4 of the packets will be able
 688      * to hold maximal amounts of data */
 689     apackets += (apackets / 4)
 690         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 691     do {
 692         getme = apackets * sizeof(struct rx_packet);
 693         p = osi_Alloc(getme);
 694         if (p == NULL) {
 695             apackets -= apackets / 4;
 696             osi_Assert(apackets > 0);
 697         }
 698     } while(p == NULL);
 699     memset(p, 0, getme);
 700
 701 #ifdef RX_ENABLE_TSFPQ
 702     RX_TS_INFO_GET(rx_ts_info);
 703     RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
 704 #endif /* RX_ENABLE_TSFPQ */
 705
 706     for (e = p + apackets; p < e; p++) {
 707         RX_PACKET_IOV_INIT(p);
 708 #ifdef RX_TRACK_PACKETS
 709         p->flags |= RX_PKTFLAG_FREE;
 710 #endif
 711         p->niovecs = 2;
 712
 713         queue_Append(&rx_freePacketQueue, p);
 714 #ifdef RXDEBUG_PACKET
 715         p->packetId = rx_packet_id++;
 716         p->allNextp = rx_mallocedP;
 717 #endif /* RXDEBUG_PACKET */
 718         rx_mallocedP = p;
 719     }
 720
 721     rx_nFreePackets += apackets;
 722     MUTEX_ENTER(&rx_packets_mutex);
 723     rx_nPackets += apackets;
 724 #ifdef RX_ENABLE_TSFPQ
 725     RX_TS_FPQ_COMPUTE_LIMITS;
 726 #endif /* RX_ENABLE_TSFPQ */
 727     MUTEX_EXIT(&rx_packets_mutex);
 728     rxi_NeedMorePackets = FALSE;
 729     rxi_PacketsUnWait();
 730 }
 731 #endif /* !KERNEL */
 732
 733 void
 734 rxi_FreeAllPackets(void)
 735 {
 736     /* must be called at proper interrupt level, etcetera */
 737     /* MTUXXX need to free all Packets */
 738     osi_Free(rx_mallocedP,
 739              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 740     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 741 }
 742
 743 #ifdef RX_ENABLE_TSFPQ
 744 static void
 745 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
 746 {
 747     struct rx_ts_info_t * rx_ts_info;
 748     int xfer;
 749     SPLVAR;
 750
 751     RX_TS_INFO_GET(rx_ts_info);
 752
 753     if (num_keep_local != rx_ts_info->_FPQ.len) {
 754         NETPRI;
 755         MUTEX_ENTER(&rx_freePktQ_lock);
 756         if (num_keep_local < rx_ts_info->_FPQ.len) {
 757             xfer = rx_ts_info->_FPQ.len - num_keep_local;
 758             RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
 759             rxi_PacketsUnWait();
 760         } else {
 761             xfer = num_keep_local - rx_ts_info->_FPQ.len;
 762             if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
 763                 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
 764             if (rx_nFreePackets < xfer) {
 765                 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
 766             }
 767             RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
 768         }
 769         MUTEX_EXIT(&rx_freePktQ_lock);
 770         USERPRI;
 771     }
 772 }
 773
 774 void
 775 rxi_FlushLocalPacketsTSFPQ(void)
 776 {
 777     rxi_AdjustLocalPacketsTSFPQ(0, 0);
 778 }
 779 #endif /* RX_ENABLE_TSFPQ */
 780
 781 /* Allocate more packets iff we need more continuation buffers */
 782 /* In kernel, can't page in memory with interrupts disabled, so we
 783  * don't use the event mechanism. */
 784 void
 785 rx_CheckPackets(void)
 786 {
 787     if (rxi_NeedMorePackets) {
 788         rxi_MorePackets(rx_maxSendWindow);
 789     }
 790 }
 791
 792 /* In the packet freeing routine below, the assumption is that
 793    we want all of the packets to be used equally frequently, so that we
 794    don't get packet buffers paging out.  It would be just as valid to
 795    assume that we DO want them to page out if not many are being used.
 796    In any event, we assume the former, and append the packets to the end
 797    of the free list.  */
 798 /* This explanation is bogus.  The free list doesn't remain in any kind of
 799    useful order for afs_int32: the packets in use get pretty much randomly scattered
 800    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 801    must be stored so that packets which are adjacent in memory are adjacent in the
 802    free list.  An array springs rapidly to mind.
 803    */
 804
 805 /* Actually free the packet p. */
 806 #ifndef RX_ENABLE_TSFPQ
 807 static void
 808 rxi_FreePacketNoLock(struct rx_packet *p)
 809 {
 810     dpf(("Free %"AFS_PTR_FMT"\n", p));
 811
 812     RX_FPQ_MARK_FREE(p);
 813     rx_nFreePackets++;
 814     queue_Append(&rx_freePacketQueue, p);
 815 }
 816 #endif /* RX_ENABLE_TSFPQ */
 817
 818 #ifdef RX_ENABLE_TSFPQ
 819 static void
 820 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
 821 {
 822     struct rx_ts_info_t * rx_ts_info;
 823     dpf(("Free %"AFS_PTR_FMT"\n", p));
 824
 825     RX_TS_INFO_GET(rx_ts_info);
 826     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 827
 828     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 829         NETPRI;
 830         MUTEX_ENTER(&rx_freePktQ_lock);
 831
 832         RX_TS_FPQ_LTOG(rx_ts_info);
 833
 834         /* Wakeup anyone waiting for packets */
 835         rxi_PacketsUnWait();
 836
 837         MUTEX_EXIT(&rx_freePktQ_lock);
 838         USERPRI;
 839     }
 840 }
 841 #endif /* RX_ENABLE_TSFPQ */
 842
 843 /*
 844  * free continuation buffers off a packet into a queue
 845  *
 846  * [IN] p      -- packet from which continuation buffers will be freed
 847  * [IN] first  -- iovec offset of first continuation buffer to free
 848  * [IN] q      -- queue into which continuation buffers will be chained
 849  *
 850  * returns:
 851  *   number of continuation buffers freed
 852  */
 853 #ifndef RX_ENABLE_TSFPQ
 854 static int
 855 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
 856 {
 857     struct iovec *iov;
 858     struct rx_packet * cb;
 859     int count = 0;
 860
 861     for (first = MAX(2, first); first < p->niovecs; first++, count++) {
 862         iov = &p->wirevec[first];
 863         if (!iov->iov_base)
 864             osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
 865         cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
 866         RX_FPQ_MARK_FREE(cb);
 867         queue_Append(q, cb);
 868     }
 869     p->length = 0;
 870     p->niovecs = 0;
 871
 872     return count;
 873 }
 874
 875 /*
 876  * free packet continuation buffers into the global free packet pool
 877  *
 878  * [IN] p      -- packet from which to free continuation buffers
 879  * [IN] first  -- iovec offset of first continuation buffer to free
 880  *
 881  * returns:
 882  *   zero always
 883  */
 884 static int
 885 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
 886 {
 887     struct iovec *iov;
 888
 889     for (first = MAX(2, first); first < p->niovecs; first++) {
 890         iov = &p->wirevec[first];
 891         if (!iov->iov_base)
 892             osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
 893         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 894     }
 895     p->length = 0;
 896     p->niovecs = 0;
 897
 898     return 0;
 899 }
 900
 901 #else
 902
 903 /*
 904  * free packet continuation buffers into the thread-local free pool
 905  *
 906  * [IN] p             -- packet from which continuation buffers will be freed
 907  * [IN] first         -- iovec offset of first continuation buffer to free
 908  *                       any value less than 2, the min number of iovecs,
 909  *                       is treated as if it is 2.
 910  * [IN] flush_global  -- if nonzero, we will flush overquota packets to the
 911  *                       global free pool before returning
 912  *
 913  * returns:
 914  *   zero always
 915  */
 916 static int
 917 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
 918 {
 919     struct iovec *iov;
 920     struct rx_ts_info_t * rx_ts_info;
 921
 922     RX_TS_INFO_GET(rx_ts_info);
 923
 924     for (first = MAX(2, first); first < p->niovecs; first++) {
 925         iov = &p->wirevec[first];
 926         if (!iov->iov_base)
 927             osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
 928         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 929     }
 930     p->length = 0;
 931     p->niovecs = 0;
 932
 933     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 934         NETPRI;
 935         MUTEX_ENTER(&rx_freePktQ_lock);
 936
 937         RX_TS_FPQ_LTOG(rx_ts_info);
 938
 939         /* Wakeup anyone waiting for packets */
 940         rxi_PacketsUnWait();
 941
 942         MUTEX_EXIT(&rx_freePktQ_lock);
 943         USERPRI;
 944     }
 945     return 0;
 946 }
 947 #endif /* RX_ENABLE_TSFPQ */
 948
 949 int rxi_nBadIovecs = 0;
 950
 951 /* rxi_RestoreDataBufs
 952  *
 953  * Restore the correct sizes to the iovecs. Called when reusing a packet
 954  * for reading off the wire.
 955  */
 956 void
 957 rxi_RestoreDataBufs(struct rx_packet *p)
 958 {
 959     unsigned int i;
 960     struct iovec *iov;
 961
 962     RX_PACKET_IOV_INIT(p);
 963
 964     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 965         if (!iov->iov_base) {
 966             rxi_nBadIovecs++;
 967             p->niovecs = i;
 968             break;
 969         }
 970         iov->iov_len = RX_CBUFFERSIZE;
 971     }
 972 }
 973
 974 #ifdef RX_ENABLE_TSFPQ
 975 int
 976 rxi_TrimDataBufs(struct rx_packet *p, int first)
 977 {
 978     int length;
 979     struct iovec *iov, *end;
 980     struct rx_ts_info_t * rx_ts_info;
 981     SPLVAR;
 982
 983     if (first != 1)
 984         osi_Panic("TrimDataBufs 1: first must be 1");
 985
 986     /* Skip over continuation buffers containing message data */
 987     iov = &p->wirevec[2];
 988     end = iov + (p->niovecs - 2);
 989     length = p->length - p->wirevec[1].iov_len;
 990     for (; iov < end && length > 0; iov++) {
 991         if (!iov->iov_base)
 992             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 993         length -= iov->iov_len;
 994     }
 995
 996     /* iov now points to the first empty data buffer. */
 997     if (iov >= end)
 998         return 0;
 999
1000     RX_TS_INFO_GET(rx_ts_info);
1001     for (; iov < end; iov++) {
1002         if (!iov->iov_base)
1003             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1004         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1005         p->niovecs--;
1006     }
1007     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1008         NETPRI;
1009         MUTEX_ENTER(&rx_freePktQ_lock);
1010
1011         RX_TS_FPQ_LTOG(rx_ts_info);
1012         rxi_PacketsUnWait();
1013
1014         MUTEX_EXIT(&rx_freePktQ_lock);
1015         USERPRI;
1016     }
1017
1018     return 0;
1019 }
1020 #else /* RX_ENABLE_TSFPQ */
1021 int
1022 rxi_TrimDataBufs(struct rx_packet *p, int first)
1023 {
1024     int length;
1025     struct iovec *iov, *end;
1026     SPLVAR;
1027
1028     if (first != 1)
1029         osi_Panic("TrimDataBufs 1: first must be 1");
1030
1031     /* Skip over continuation buffers containing message data */
1032     iov = &p->wirevec[2];
1033     end = iov + (p->niovecs - 2);
1034     length = p->length - p->wirevec[1].iov_len;
1035     for (; iov < end && length > 0; iov++) {
1036         if (!iov->iov_base)
1037             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1038         length -= iov->iov_len;
1039     }
1040
1041     /* iov now points to the first empty data buffer. */
1042     if (iov >= end)
1043         return 0;
1044
1045     NETPRI;
1046     MUTEX_ENTER(&rx_freePktQ_lock);
1047
1048     for (; iov < end; iov++) {
1049         if (!iov->iov_base)
1050             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1051         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1052         p->niovecs--;
1053     }
1054     rxi_PacketsUnWait();
1055
1056     MUTEX_EXIT(&rx_freePktQ_lock);
1057     USERPRI;
1058
1059     return 0;
1060 }
1061 #endif /* RX_ENABLE_TSFPQ */
1062
1063 /* Free the packet p.  P is assumed not to be on any queue, i.e.
1064  * remove it yourself first if you call this routine. */
1065 #ifdef RX_ENABLE_TSFPQ
1066 void
1067 rxi_FreePacket(struct rx_packet *p)
1068 {
1069     rxi_FreeDataBufsTSFPQ(p, 2, 0);
1070     rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1071 }
1072 #else /* RX_ENABLE_TSFPQ */
1073 void
1074 rxi_FreePacket(struct rx_packet *p)
1075 {
1076     SPLVAR;
1077
1078     NETPRI;
1079     MUTEX_ENTER(&rx_freePktQ_lock);
1080
1081     rxi_FreeDataBufsNoLock(p, 2);
1082     rxi_FreePacketNoLock(p);
1083     /* Wakeup anyone waiting for packets */
1084     rxi_PacketsUnWait();
1085
1086     MUTEX_EXIT(&rx_freePktQ_lock);
1087     USERPRI;
1088 }
1089 #endif /* RX_ENABLE_TSFPQ */
1090
1091 /* rxi_AllocPacket sets up p->length so it reflects the number of
1092  * bytes in the packet at this point, **not including** the header.
1093  * The header is absolutely necessary, besides, this is the way the
1094  * length field is usually used */
1095 #ifdef RX_ENABLE_TSFPQ
1096 static struct rx_packet *
1097 rxi_AllocPacketNoLock(int class)
1098 {
1099     struct rx_packet *p;
1100     struct rx_ts_info_t * rx_ts_info;
1101
1102     RX_TS_INFO_GET(rx_ts_info);
1103
1104 #ifdef KERNEL
1105     if (rxi_OverQuota(class)) {
1106         rxi_NeedMorePackets = TRUE;
1107         if (rx_stats_active) {
1108             switch (class) {
1109             case RX_PACKET_CLASS_RECEIVE:
1110                 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1111                 break;
1112             case RX_PACKET_CLASS_SEND:
1113                 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1114                 break;
1115             case RX_PACKET_CLASS_SPECIAL:
1116                 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1117                 break;
1118             case RX_PACKET_CLASS_RECV_CBUF:
1119                 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1120                 break;
1121             case RX_PACKET_CLASS_SEND_CBUF:
1122                 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1123                 break;
1124             }
1125         }
1126         return (struct rx_packet *)0;
1127     }
1128 #endif /* KERNEL */
1129
1130     if (rx_stats_active)
1131         rx_atomic_inc(&rx_stats.packetRequests);
1132     if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1133
1134 #ifdef KERNEL
1135         if (queue_IsEmpty(&rx_freePacketQueue))
1136             osi_Panic("rxi_AllocPacket error");
1137 #else /* KERNEL */
1138         if (queue_IsEmpty(&rx_freePacketQueue))
1139             rxi_MorePacketsNoLock(rx_maxSendWindow);
1140 #endif /* KERNEL */
1141
1142
1143         RX_TS_FPQ_GTOL(rx_ts_info);
1144     }
1145
1146     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1147
1148     dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1149
1150
1151     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1152      * order to truncate outbound packets.  In the near future, may need
1153      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1154      */
1155     RX_PACKET_IOV_FULLINIT(p);
1156     return p;
1157 }
1158 #else /* RX_ENABLE_TSFPQ */
1159 static struct rx_packet *
1160 rxi_AllocPacketNoLock(int class)
1161 {
1162     struct rx_packet *p;
1163
1164 #ifdef KERNEL
1165     if (rxi_OverQuota(class)) {
1166         rxi_NeedMorePackets = TRUE;
1167         if (rx_stats_active) {
1168             switch (class) {
1169             case RX_PACKET_CLASS_RECEIVE:
1170                 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1171                 break;
1172             case RX_PACKET_CLASS_SEND:
1173                 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1174                 break;
1175             case RX_PACKET_CLASS_SPECIAL:
1176                 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1177                 break;
1178             case RX_PACKET_CLASS_RECV_CBUF:
1179                 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1180                 break;
1181             case RX_PACKET_CLASS_SEND_CBUF:
1182                 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1183                 break;
1184             }
1185         }
1186         return (struct rx_packet *)0;
1187     }
1188 #endif /* KERNEL */
1189
1190     if (rx_stats_active)
1191         rx_atomic_inc(&rx_stats.packetRequests);
1192
1193 #ifdef KERNEL
1194     if (queue_IsEmpty(&rx_freePacketQueue))
1195         osi_Panic("rxi_AllocPacket error");
1196 #else /* KERNEL */
1197     if (queue_IsEmpty(&rx_freePacketQueue))
1198         rxi_MorePacketsNoLock(rx_maxSendWindow);
1199 #endif /* KERNEL */
1200
1201     rx_nFreePackets--;
1202     p = queue_First(&rx_freePacketQueue, rx_packet);
1203     queue_Remove(p);
1204     RX_FPQ_MARK_USED(p);
1205
1206     dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1207
1208
1209     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1210      * order to truncate outbound packets.  In the near future, may need
1211      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1212      */
1213     RX_PACKET_IOV_FULLINIT(p);
1214     return p;
1215 }
1216 #endif /* RX_ENABLE_TSFPQ */
1217
1218 #ifdef RX_ENABLE_TSFPQ
1219 static struct rx_packet *
1220 rxi_AllocPacketTSFPQ(int class, int pull_global)
1221 {
1222     struct rx_packet *p;
1223     struct rx_ts_info_t * rx_ts_info;
1224
1225     RX_TS_INFO_GET(rx_ts_info);
1226
1227     if (rx_stats_active)
1228         rx_atomic_inc(&rx_stats.packetRequests);
1229     if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1230         MUTEX_ENTER(&rx_freePktQ_lock);
1231
1232         if (queue_IsEmpty(&rx_freePacketQueue))
1233             rxi_MorePacketsNoLock(rx_maxSendWindow);
1234
1235         RX_TS_FPQ_GTOL(rx_ts_info);
1236
1237         MUTEX_EXIT(&rx_freePktQ_lock);
1238     } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1239         return NULL;
1240     }
1241
1242     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1243
1244     dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1245
1246     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1247      * order to truncate outbound packets.  In the near future, may need
1248      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1249      */
1250     RX_PACKET_IOV_FULLINIT(p);
1251     return p;
1252 }
1253 #endif /* RX_ENABLE_TSFPQ */
1254
1255 #ifdef RX_ENABLE_TSFPQ
1256 struct rx_packet *
1257 rxi_AllocPacket(int class)
1258 {
1259     struct rx_packet *p;
1260
1261     p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1262     return p;
1263 }
1264 #else /* RX_ENABLE_TSFPQ */
1265 struct rx_packet *
1266 rxi_AllocPacket(int class)
1267 {
1268     struct rx_packet *p;
1269
1270     MUTEX_ENTER(&rx_freePktQ_lock);
1271     p = rxi_AllocPacketNoLock(class);
1272     MUTEX_EXIT(&rx_freePktQ_lock);
1273     return p;
1274 }
1275 #endif /* RX_ENABLE_TSFPQ */
1276
1277 /* This guy comes up with as many buffers as it {takes,can get} given
1278  * the MTU for this call. It also sets the packet length before
1279  * returning.  caution: this is often called at NETPRI
1280  * Called with call locked.
1281  */
1282 struct rx_packet *
1283 rxi_AllocSendPacket(struct rx_call *call, int want)
1284 {
1285     struct rx_packet *p = (struct rx_packet *)0;
1286     int mud;
1287     unsigned delta;
1288
1289     SPLVAR;
1290     mud = call->MTU - RX_HEADER_SIZE;
1291     delta =
1292         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1293         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1294
1295 #ifdef RX_ENABLE_TSFPQ
1296     if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1297         want += delta;
1298         want = MIN(want, mud);
1299
1300         if ((unsigned)want > p->length)
1301             (void)rxi_AllocDataBuf(p, (want - p->length),
1302                                    RX_PACKET_CLASS_SEND_CBUF);
1303
1304         if (p->length > mud)
1305             p->length = mud;
1306
1307         if (delta >= p->length) {
1308             rxi_FreePacket(p);
1309             p = NULL;
1310         } else {
1311             p->length -= delta;
1312         }
1313         return p;
1314     }
1315 #endif /* RX_ENABLE_TSFPQ */
1316
1317     while (!(call->error)) {
1318         MUTEX_ENTER(&rx_freePktQ_lock);
1319         /* if an error occurred, or we get the packet we want, we're done */
1320         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1321             MUTEX_EXIT(&rx_freePktQ_lock);
1322
1323             want += delta;
1324             want = MIN(want, mud);
1325
1326             if ((unsigned)want > p->length)
1327                 (void)rxi_AllocDataBuf(p, (want - p->length),
1328                                        RX_PACKET_CLASS_SEND_CBUF);
1329
1330             if (p->length > mud)
1331                 p->length = mud;
1332
1333             if (delta >= p->length) {
1334                 rxi_FreePacket(p);
1335                 p = NULL;
1336             } else {
1337                 p->length -= delta;
1338             }
1339             break;
1340         }
1341
1342         /* no error occurred, and we didn't get a packet, so we sleep.
1343          * At this point, we assume that packets will be returned
1344          * sooner or later, as packets are acknowledged, and so we
1345          * just wait.  */
1346         NETPRI;
1347         call->flags |= RX_CALL_WAIT_PACKETS;
1348         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1349         MUTEX_EXIT(&call->lock);
1350         rx_waitingForPackets = 1;
1351
1352 #ifdef  RX_ENABLE_LOCKS
1353         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1354 #else
1355         osi_rxSleep(&rx_waitingForPackets);
1356 #endif
1357         MUTEX_EXIT(&rx_freePktQ_lock);
1358         MUTEX_ENTER(&call->lock);
1359         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1360         call->flags &= ~RX_CALL_WAIT_PACKETS;
1361         USERPRI;
1362     }
1363
1364     return p;
1365 }
1366
1367 #ifndef KERNEL
1368 #ifdef AFS_NT40_ENV
1369 /* Windows does not use file descriptors. */
1370 #define CountFDs(amax) 0
1371 #else
1372 /* count the number of used FDs */
1373 static int
1374 CountFDs(int amax)
1375 {
1376     struct stat tstat;
1377     int i, code;
1378     int count;
1379
1380     count = 0;
1381     for (i = 0; i < amax; i++) {
1382         code = fstat(i, &tstat);
1383         if (code == 0)
1384             count++;
1385     }
1386     return count;
1387 }
1388 #endif /* AFS_NT40_ENV */
1389 #else /* KERNEL */
1390
1391 #define CountFDs(amax) amax
1392
1393 #endif /* KERNEL */
1394
1395 #if !defined(KERNEL) || defined(UKERNEL)
1396
1397 /* This function reads a single packet from the interface into the
1398  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
1399  * (host,port) of the sender are stored in the supplied variables, and
1400  * the data length of the packet is stored in the packet structure.
1401  * The header is decoded. */
1402 int
1403 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1404                u_short * port)
1405 {
1406     struct sockaddr_in from;
1407     int nbytes;
1408     afs_int32 rlen;
1409     afs_uint32 tlen, savelen;
1410     struct msghdr msg;
1411     rx_computelen(p, tlen);
1412     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
1413
1414     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
1415     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
1416                                  * it once in order to avoid races.  */
1417     tlen = rlen - tlen;
1418     if (tlen > 0) {
1419         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1420         if (tlen > 0) {
1421             tlen = rlen - tlen;
1422         } else
1423             tlen = rlen;
1424     } else
1425         tlen = rlen;
1426
1427     /* Extend the last iovec for padding, it's just to make sure that the
1428      * read doesn't return more data than we expect, and is done to get around
1429      * our problems caused by the lack of a length field in the rx header.
1430      * Use the extra buffer that follows the localdata in each packet
1431      * structure. */
1432     savelen = p->wirevec[p->niovecs - 1].iov_len;
1433     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1434
1435     memset(&msg, 0, sizeof(msg));
1436     msg.msg_name = (char *)&from;
1437     msg.msg_namelen = sizeof(struct sockaddr_in);
1438     msg.msg_iov = p->wirevec;
1439     msg.msg_iovlen = p->niovecs;
1440     nbytes = rxi_Recvmsg(socket, &msg, 0);
1441
1442     /* restore the vec to its correct state */
1443     p->wirevec[p->niovecs - 1].iov_len = savelen;
1444
1445     p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1446     if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1447         if (nbytes < 0 && errno == EWOULDBLOCK) {
1448             if (rx_stats_active)
1449                 rx_atomic_inc(&rx_stats.noPacketOnRead);
1450         } else if (nbytes <= 0) {
1451             if (rx_stats_active) {
1452                 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1453                 rx_stats.bogusHost = from.sin_addr.s_addr;
1454             }
1455             dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1456                  ntohs(from.sin_port), nbytes));
1457         }
1458         return 0;
1459     }
1460 #ifdef RXDEBUG
1461     else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1462                 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1463         rxi_DecodePacketHeader(p);
1464
1465         *host = from.sin_addr.s_addr;
1466         *port = from.sin_port;
1467
1468         dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1469               p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1470               p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1471               p->length));
1472 #ifdef RX_TRIMDATABUFS
1473         rxi_TrimDataBufs(p, 1);
1474 #endif
1475         return 0;
1476     }
1477 #endif
1478     else {
1479         /* Extract packet header. */
1480         rxi_DecodePacketHeader(p);
1481
1482         *host = from.sin_addr.s_addr;
1483         *port = from.sin_port;
1484         if (rx_stats_active
1485             && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1486
1487                 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1488         }
1489
1490 #ifdef RX_TRIMDATABUFS
1491         /* Free any empty packet buffers at the end of this packet */
1492         rxi_TrimDataBufs(p, 1);
1493 #endif
1494         return 1;
1495     }
1496 }
1497
1498 #endif /* !KERNEL || UKERNEL */
1499
1500 /* This function splits off the first packet in a jumbo packet.
1501  * As of AFS 3.5, jumbograms contain more than one fixed size
1502  * packet, and the RX_JUMBO_PACKET flag is set in all but the
1503  * last packet header. All packets (except the last) are padded to
1504  * fall on RX_CBUFFERSIZE boundaries.
1505  * HACK: We store the length of the first n-1 packets in the
1506  * last two pad bytes. */
1507
1508 struct rx_packet *
1509 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1510                      int first)
1511 {
1512     struct rx_packet *np;
1513     struct rx_jumboHeader *jp;
1514     int niov, i;
1515     struct iovec *iov;
1516     int length;
1517     afs_uint32 temp;
1518
1519     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1520      * bytes in length. All but the first packet are preceded by
1521      * an abbreviated four byte header. The length of the last packet
1522      * is calculated from the size of the jumbogram. */
1523     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1524
1525     if ((int)p->length < length) {
1526         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1527         return NULL;
1528     }
1529     niov = p->niovecs - 2;
1530     if (niov < 1) {
1531         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1532         return NULL;
1533     }
1534     iov = &p->wirevec[2];
1535     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1536
1537     /* Get a pointer to the abbreviated packet header */
1538     jp = (struct rx_jumboHeader *)
1539         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1540
1541     /* Set up the iovecs for the next packet */
1542     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1543     np->wirevec[0].iov_len = sizeof(struct rx_header);
1544     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1545     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1546     np->niovecs = niov + 1;
1547     for (i = 2, iov++; i <= niov; i++, iov++) {
1548         np->wirevec[i] = *iov;
1549     }
1550     np->length = p->length - length;
1551     p->length = RX_JUMBOBUFFERSIZE;
1552     p->niovecs = 2;
1553
1554     /* Convert the jumbo packet header to host byte order */
1555     temp = ntohl(*(afs_uint32 *) jp);
1556     jp->flags = (u_char) (temp >> 24);
1557     jp->cksum = (u_short) (temp);
1558
1559     /* Fill in the packet header */
1560     np->header = p->header;
1561     np->header.serial = p->header.serial + 1;
1562     np->header.seq = p->header.seq + 1;
1563     np->header.flags = jp->flags;
1564     np->header.spare = jp->cksum;
1565
1566     return np;
1567 }
1568
1569 #ifndef KERNEL
1570 /* Send a udp datagram */
1571 int
1572 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1573             int length, int istack)
1574 {
1575     struct msghdr msg;
1576         int ret;
1577
1578     memset(&msg, 0, sizeof(msg));
1579     msg.msg_iov = dvec;
1580     msg.msg_iovlen = nvecs;
1581     msg.msg_name = addr;
1582     msg.msg_namelen = sizeof(struct sockaddr_in);
1583
1584     ret = rxi_Sendmsg(socket, &msg, 0);
1585
1586     return ret;
1587 }
1588 #elif !defined(UKERNEL)
1589 /*
1590  * message receipt is done in rxk_input or rx_put.
1591  */
1592
1593 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1594 /*
1595  * Copy an mblock to the contiguous area pointed to by cp.
1596  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1597  * but it doesn't really.
1598  * Returns the number of bytes not transferred.
1599  * The message is NOT changed.
1600  */
1601 static int
1602 cpytoc(mblk_t * mp, int off, int len, char *cp)
1603 {
1604     int n;
1605
1606     for (; mp && len > 0; mp = mp->b_cont) {
1607         if (mp->b_datap->db_type != M_DATA) {
1608             return -1;
1609         }
1610         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1611         memcpy(cp, (char *)mp->b_rptr, n);
1612         cp += n;
1613         len -= n;
1614         mp->b_rptr += n;
1615     }
1616     return (len);
1617 }
1618
1619 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1620  * but it doesn't really.
1621  * This sucks, anyway, do it like m_cpy.... below
1622  */
1623 static int
1624 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1625            int niovs)
1626 {
1627     int m, n, o, t, i;
1628
1629     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1630         if (mp->b_datap->db_type != M_DATA) {
1631             return -1;
1632         }
1633         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1634         len -= n;
1635         while (n) {
1636             if (!t) {
1637                 o = 0;
1638                 i++;
1639                 t = iovs[i].iov_len;
1640             }
1641             m = MIN(n, t);
1642             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1643             mp->b_rptr += m;
1644             o += m;
1645             t -= m;
1646             n -= m;
1647         }
1648     }
1649     return (len);
1650 }
1651
1652 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1653 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1654 #else
1655 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1656 static int
1657 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1658 {
1659     caddr_t p1, p2;
1660     unsigned int l1, l2, i, t;
1661
1662     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1663         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1664
1665     while (off && m)
1666         if (m->m_len <= off) {
1667             off -= m->m_len;
1668             m = m->m_next;
1669             continue;
1670         } else
1671             break;
1672
1673     if (m == NULL)
1674         return len;
1675
1676     p1 = mtod(m, caddr_t) + off;
1677     l1 = m->m_len - off;
1678     i = 0;
1679     p2 = iovs[0].iov_base;
1680     l2 = iovs[0].iov_len;
1681
1682     while (len) {
1683         t = MIN(l1, MIN(l2, (unsigned int)len));
1684         memcpy(p2, p1, t);
1685         p1 += t;
1686         p2 += t;
1687         l1 -= t;
1688         l2 -= t;
1689         len -= t;
1690         if (!l1) {
1691             m = m->m_next;
1692             if (!m)
1693                 break;
1694             p1 = mtod(m, caddr_t);
1695             l1 = m->m_len;
1696         }
1697         if (!l2) {
1698             if (++i >= niovs)
1699                 break;
1700             p2 = iovs[i].iov_base;
1701             l2 = iovs[i].iov_len;
1702         }
1703
1704     }
1705
1706     return len;
1707 }
1708 #endif /* LINUX */
1709 #endif /* AFS_SUN5_ENV */
1710
1711 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1712 #if defined(AFS_NBSD_ENV)
1713 int
1714 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1715 #else
1716 int
1717 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1718 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1719      mblk_t *amb;
1720 #else
1721      struct mbuf *amb;
1722 #endif
1723      void (*free) ();
1724      struct rx_packet *phandle;
1725      int hdr_len, data_len;
1726 #endif /* AFS_NBSD_ENV */
1727 {
1728     int code;
1729
1730     code =
1731         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1732                      phandle->niovecs);
1733     (*free) (amb);
1734
1735     return code;
1736 }
1737 #endif /* LINUX */
1738 #endif /*KERNEL && !UKERNEL */
1739
1740
1741 /* send a response to a debug packet */
1742
1743 struct rx_packet *
1744 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1745                        afs_uint32 ahost, short aport, int istack)
1746 {
1747     struct rx_debugIn tin;
1748     afs_int32 tl;
1749     struct rx_serverQueueEntry *np, *nqe;
1750
1751     /*
1752      * Only respond to client-initiated Rx debug packets,
1753      * and clear the client flag in the response.
1754      */
1755     if (ap->header.flags & RX_CLIENT_INITIATED) {
1756         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1757         rxi_EncodePacketHeader(ap);
1758     } else {
1759         return ap;
1760     }
1761
1762     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1763     /* all done with packet, now set length to the truth, so we can
1764      * reuse this packet */
1765     rx_computelen(ap, ap->length);
1766
1767     tin.type = ntohl(tin.type);
1768     tin.index = ntohl(tin.index);
1769     switch (tin.type) {
1770     case RX_DEBUGI_GETSTATS:{
1771             struct rx_debugStats tstat;
1772
1773             /* get basic stats */
1774             memset(&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1775             tstat.version = RX_DEBUGI_VERSION;
1776 #ifndef RX_ENABLE_LOCKS
1777             tstat.waitingForPackets = rx_waitingForPackets;
1778 #endif
1779             MUTEX_ENTER(&rx_serverPool_lock);
1780             tstat.nFreePackets = htonl(rx_nFreePackets);
1781             tstat.nPackets = htonl(rx_nPackets);
1782             tstat.callsExecuted = htonl(rxi_nCalls);
1783             tstat.packetReclaims = htonl(rx_packetReclaims);
1784             tstat.usedFDs = CountFDs(64);
1785             tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1786             tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1787             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1788                         tstat.idleThreads);
1789             MUTEX_EXIT(&rx_serverPool_lock);
1790             tstat.idleThreads = htonl(tstat.idleThreads);
1791             tl = sizeof(struct rx_debugStats) - ap->length;
1792             if (tl > 0)
1793                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1794
1795             if (tl <= 0) {
1796                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1797                                (char *)&tstat);
1798                 ap->length = sizeof(struct rx_debugStats);
1799                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1800                 rx_computelen(ap, ap->length);
1801             }
1802             break;
1803         }
1804
1805     case RX_DEBUGI_GETALLCONN:
1806     case RX_DEBUGI_GETCONN:{
1807             unsigned int i, j;
1808             struct rx_connection *tc;
1809             struct rx_call *tcall;
1810             struct rx_debugConn tconn;
1811             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1812
1813
1814             tl = sizeof(struct rx_debugConn) - ap->length;
1815             if (tl > 0)
1816                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1817             if (tl > 0)
1818                 return ap;
1819
1820             memset(&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1821             /* get N'th (maybe) "interesting" connection info */
1822             for (i = 0; i < rx_hashTableSize; i++) {
1823 #if !defined(KERNEL)
1824                 /* the time complexity of the algorithm used here
1825                  * exponentially increses with the number of connections.
1826                  */
1827 #ifdef AFS_PTHREAD_ENV
1828                 pthread_yield();
1829 #else
1830                 (void)IOMGR_Poll();
1831 #endif
1832 #endif
1833                 MUTEX_ENTER(&rx_connHashTable_lock);
1834                 /* We might be slightly out of step since we are not
1835                  * locking each call, but this is only debugging output.
1836                  */
1837                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1838                     if ((all || rxi_IsConnInteresting(tc))
1839                         && tin.index-- <= 0) {
1840                         tconn.host = tc->peer->host;
1841                         tconn.port = tc->peer->port;
1842                         tconn.cid = htonl(tc->cid);
1843                         tconn.epoch = htonl(tc->epoch);
1844                         tconn.serial = htonl(tc->serial);
1845                         for (j = 0; j < RX_MAXCALLS; j++) {
1846                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1847                             if ((tcall = tc->call[j])) {
1848                                 tconn.callState[j] = tcall->state;
1849                                 tconn.callMode[j] = tcall->mode;
1850                                 tconn.callFlags[j] = tcall->flags;
1851                                 if (queue_IsNotEmpty(&tcall->rq))
1852                                     tconn.callOther[j] |= RX_OTHER_IN;
1853                                 if (queue_IsNotEmpty(&tcall->tq))
1854                                     tconn.callOther[j] |= RX_OTHER_OUT;
1855                             } else
1856                                 tconn.callState[j] = RX_STATE_NOTINIT;
1857                         }
1858
1859                         tconn.natMTU = htonl(tc->peer->natMTU);
1860                         tconn.error = htonl(tc->error);
1861                         tconn.flags = tc->flags;
1862                         tconn.type = tc->type;
1863                         tconn.securityIndex = tc->securityIndex;
1864                         if (tc->securityObject) {
1865                             RXS_GetStats(tc->securityObject, tc,
1866                                          &tconn.secStats);
1867 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1868 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1869                             DOHTONL(flags);
1870                             DOHTONL(expires);
1871                             DOHTONL(packetsReceived);
1872                             DOHTONL(packetsSent);
1873                             DOHTONL(bytesReceived);
1874                             DOHTONL(bytesSent);
1875                             for (i = 0;
1876                                  i <
1877                                  sizeof(tconn.secStats.spares) /
1878                                  sizeof(short); i++)
1879                                 DOHTONS(spares[i]);
1880                             for (i = 0;
1881                                  i <
1882                                  sizeof(tconn.secStats.sparel) /
1883                                  sizeof(afs_int32); i++)
1884                                 DOHTONL(sparel[i]);
1885                         }
1886
1887                         MUTEX_EXIT(&rx_connHashTable_lock);
1888                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1889                                        (char *)&tconn);
1890                         tl = ap->length;
1891                         ap->length = sizeof(struct rx_debugConn);
1892                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1893                                             istack);
1894                         ap->length = tl;
1895                         return ap;
1896                     }
1897                 }
1898                 MUTEX_EXIT(&rx_connHashTable_lock);
1899             }
1900             /* if we make it here, there are no interesting packets */
1901             tconn.cid = htonl(0xffffffff);      /* means end */
1902             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1903                            (char *)&tconn);
1904             tl = ap->length;
1905             ap->length = sizeof(struct rx_debugConn);
1906             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1907             ap->length = tl;
1908             break;
1909         }
1910
1911         /*
1912          * Pass back all the peer structures we have available
1913          */
1914
1915     case RX_DEBUGI_GETPEER:{
1916             unsigned int i;
1917             struct rx_peer *tp;
1918             struct rx_debugPeer tpeer;
1919
1920
1921             tl = sizeof(struct rx_debugPeer) - ap->length;
1922             if (tl > 0)
1923                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1924             if (tl > 0)
1925                 return ap;
1926
1927             memset(&tpeer, 0, sizeof(tpeer));
1928             for (i = 0; i < rx_hashTableSize; i++) {
1929 #if !defined(KERNEL)
1930                 /* the time complexity of the algorithm used here
1931                  * exponentially increses with the number of peers.
1932                  *
1933                  * Yielding after processing each hash table entry
1934                  * and dropping rx_peerHashTable_lock.
1935                  * also increases the risk that we will miss a new
1936                  * entry - but we are willing to live with this
1937                  * limitation since this is meant for debugging only
1938                  */
1939 #ifdef AFS_PTHREAD_ENV
1940                 pthread_yield();
1941 #else
1942                 (void)IOMGR_Poll();
1943 #endif
1944 #endif
1945                 MUTEX_ENTER(&rx_peerHashTable_lock);
1946                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1947                     if (tin.index-- <= 0) {
1948                         tp->refCount++;
1949                         MUTEX_EXIT(&rx_peerHashTable_lock);
1950
1951                         MUTEX_ENTER(&tp->peer_lock);
1952                         tpeer.host = tp->host;
1953                         tpeer.port = tp->port;
1954                         tpeer.ifMTU = htons(tp->ifMTU);
1955                         tpeer.idleWhen = htonl(tp->idleWhen);
1956                         tpeer.refCount = htons(tp->refCount);
1957                         tpeer.burstSize = 0;
1958                         tpeer.burst = 0;
1959                         tpeer.burstWait.sec = 0;
1960                         tpeer.burstWait.usec = 0;
1961                         tpeer.rtt = htonl(tp->rtt);
1962                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1963                         tpeer.nSent = htonl(tp->nSent);
1964                         tpeer.reSends = htonl(tp->reSends);
1965                         tpeer.natMTU = htons(tp->natMTU);
1966                         tpeer.maxMTU = htons(tp->maxMTU);
1967                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1968                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1969                         tpeer.MTU = htons(tp->MTU);
1970                         tpeer.cwind = htons(tp->cwind);
1971                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1972                         tpeer.congestSeq = htons(tp->congestSeq);
1973                         tpeer.bytesSent.high =
1974                             htonl(tp->bytesSent >> 32);
1975                         tpeer.bytesSent.low =
1976                             htonl(tp->bytesSent & MAX_AFS_UINT32);
1977                         tpeer.bytesReceived.high =
1978                             htonl(tp->bytesReceived >> 32);
1979                         tpeer.bytesReceived.low =
1980                             htonl(tp->bytesReceived & MAX_AFS_UINT32);
1981                         MUTEX_EXIT(&tp->peer_lock);
1982
1983                         MUTEX_ENTER(&rx_peerHashTable_lock);
1984                         tp->refCount--;
1985                         MUTEX_EXIT(&rx_peerHashTable_lock);
1986
1987                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1988                                        (char *)&tpeer);
1989                         tl = ap->length;
1990                         ap->length = sizeof(struct rx_debugPeer);
1991                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1992                                             istack);
1993                         ap->length = tl;
1994                         return ap;
1995                     }
1996                 }
1997                 MUTEX_EXIT(&rx_peerHashTable_lock);
1998             }
1999             /* if we make it here, there are no interesting packets */
2000             tpeer.host = htonl(0xffffffff);     /* means end */
2001             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2002                            (char *)&tpeer);
2003             tl = ap->length;
2004             ap->length = sizeof(struct rx_debugPeer);
2005             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2006             ap->length = tl;
2007             break;
2008         }
2009
2010     case RX_DEBUGI_RXSTATS:{
2011             int i;
2012             afs_int32 *s;
2013
2014             tl = sizeof(rx_stats) - ap->length;
2015             if (tl > 0)
2016                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2017             if (tl > 0)
2018                 return ap;
2019
2020             /* Since its all int32s convert to network order with a loop. */
2021         if (rx_stats_active)
2022             MUTEX_ENTER(&rx_stats_mutex);
2023             s = (afs_int32 *) & rx_stats;
2024             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2025                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2026
2027             tl = ap->length;
2028             ap->length = sizeof(rx_stats);
2029         if (rx_stats_active)
2030             MUTEX_EXIT(&rx_stats_mutex);
2031             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2032             ap->length = tl;
2033             break;
2034         }
2035
2036     default:
2037         /* error response packet */
2038         tin.type = htonl(RX_DEBUGI_BADTYPE);
2039         tin.index = tin.type;
2040         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2041         tl = ap->length;
2042         ap->length = sizeof(struct rx_debugIn);
2043         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2044         ap->length = tl;
2045         break;
2046     }
2047     return ap;
2048 }
2049
2050 struct rx_packet *
2051 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2052                          afs_uint32 ahost, short aport, int istack)
2053 {
2054     afs_int32 tl;
2055
2056     /*
2057      * Only respond to client-initiated version requests, and
2058      * clear that flag in the response.
2059      */
2060     if (ap->header.flags & RX_CLIENT_INITIATED) {
2061         char buf[66];
2062
2063         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2064         rxi_EncodePacketHeader(ap);
2065         memset(buf, 0, sizeof(buf));
2066         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2067         rx_packetwrite(ap, 0, 65, buf);
2068         tl = ap->length;
2069         ap->length = 65;
2070         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2071         ap->length = tl;
2072     }
2073
2074     return ap;
2075 }
2076
2077
2078 /* send a debug packet back to the sender */
2079 static void
2080 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2081                     afs_uint32 ahost, short aport, afs_int32 istack)
2082 {
2083     struct sockaddr_in taddr;
2084     unsigned int i, nbytes, savelen = 0;
2085     int saven = 0;
2086 #ifdef KERNEL
2087     int waslocked = ISAFS_GLOCK();
2088 #endif
2089
2090     taddr.sin_family = AF_INET;
2091     taddr.sin_port = aport;
2092     taddr.sin_addr.s_addr = ahost;
2093 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2094     taddr.sin_len = sizeof(struct sockaddr_in);
2095 #endif
2096
2097     /* We need to trim the niovecs. */
2098     nbytes = apacket->length;
2099     for (i = 1; i < apacket->niovecs; i++) {
2100         if (nbytes <= apacket->wirevec[i].iov_len) {
2101             savelen = apacket->wirevec[i].iov_len;
2102             saven = apacket->niovecs;
2103             apacket->wirevec[i].iov_len = nbytes;
2104             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
2105         } else
2106             nbytes -= apacket->wirevec[i].iov_len;
2107     }
2108 #ifdef KERNEL
2109 #ifdef RX_KERNEL_TRACE
2110     if (ICL_SETACTIVE(afs_iclSetp)) {
2111         if (!waslocked)
2112             AFS_GLOCK();
2113         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2114                    "before osi_NetSend()");
2115         AFS_GUNLOCK();
2116     }
2117 #else
2118     if (waslocked)
2119         AFS_GUNLOCK();
2120 #endif
2121 #endif
2122     /* debug packets are not reliably delivered, hence the cast below. */
2123     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2124                       apacket->length + RX_HEADER_SIZE, istack);
2125 #ifdef KERNEL
2126 #ifdef RX_KERNEL_TRACE
2127     if (ICL_SETACTIVE(afs_iclSetp)) {
2128         AFS_GLOCK();
2129         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2130                    "after osi_NetSend()");
2131         if (!waslocked)
2132             AFS_GUNLOCK();
2133     }
2134 #else
2135     if (waslocked)
2136         AFS_GLOCK();
2137 #endif
2138 #endif
2139     if (saven) {                /* means we truncated the packet above. */
2140         apacket->wirevec[i - 1].iov_len = savelen;
2141         apacket->niovecs = saven;
2142     }
2143
2144 }
2145
2146 static void
2147 rxi_NetSendError(struct rx_call *call, int code)
2148 {
2149     int down = 0;
2150 #ifdef AFS_NT40_ENV
2151     if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2152         down = 1;
2153     }
2154     if (code == -WSAEHOSTUNREACH) {
2155         down = 1;
2156     }
2157 #elif defined(AFS_LINUX20_ENV)
2158     if (code == -ENETUNREACH) {
2159         down = 1;
2160     }
2161 #elif defined(AFS_DARWIN_ENV)
2162     if (code == EHOSTUNREACH) {
2163         down = 1;
2164     }
2165 #endif
2166     if (down) {
2167         call->lastReceiveTime = 0;
2168     }
2169 }
2170
2171 /* Send the packet to appropriate destination for the specified
2172  * call.  The header is first encoded and placed in the packet.
2173  */
2174 void
2175 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2176                struct rx_packet *p, int istack)
2177 {
2178 #if defined(KERNEL)
2179     int waslocked;
2180 #endif
2181     int code;
2182     struct sockaddr_in addr;
2183     struct rx_peer *peer = conn->peer;
2184     osi_socket socket;
2185 #ifdef RXDEBUG
2186     char deliveryType = 'S';
2187 #endif
2188     /* The address we're sending the packet to */
2189     memset(&addr, 0, sizeof(addr));
2190     addr.sin_family = AF_INET;
2191     addr.sin_port = peer->port;
2192     addr.sin_addr.s_addr = peer->host;
2193
2194     /* This stuff should be revamped, I think, so that most, if not
2195      * all, of the header stuff is always added here.  We could
2196      * probably do away with the encode/decode routines. XXXXX */
2197
2198     /* Stamp each packet with a unique serial number.  The serial
2199      * number is maintained on a connection basis because some types
2200      * of security may be based on the serial number of the packet,
2201      * and security is handled on a per authenticated-connection
2202      * basis. */
2203     /* Pre-increment, to guarantee no zero serial number; a zero
2204      * serial number means the packet was never sent. */
2205     MUTEX_ENTER(&conn->conn_data_lock);
2206     p->header.serial = ++conn->serial;
2207     if (p->length > conn->peer->maxPacketSize) {
2208         if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2209             (p->header.flags & RX_REQUEST_ACK)) {
2210             conn->lastPingSize = p->length;
2211             conn->lastPingSizeSer = p->header.serial;
2212         } else if (p->header.seq != 0) {
2213             conn->lastPacketSize = p->length;
2214             conn->lastPacketSizeSeq = p->header.seq;
2215         }
2216     }
2217     MUTEX_EXIT(&conn->conn_data_lock);
2218     /* This is so we can adjust retransmit time-outs better in the face of
2219      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2220      */
2221     if (p->firstSerial == 0) {
2222         p->firstSerial = p->header.serial;
2223     }
2224 #ifdef RXDEBUG
2225     /* If an output tracer function is defined, call it with the packet and
2226      * network address.  Note this function may modify its arguments. */
2227     if (rx_almostSent) {
2228         int drop = (*rx_almostSent) (p, &addr);
2229         /* drop packet if return value is non-zero? */
2230         if (drop)
2231             deliveryType = 'D'; /* Drop the packet */
2232     }
2233 #endif
2234
2235     /* Get network byte order header */
2236     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
2237                                  * touch ALL the fields */
2238
2239     /* Send the packet out on the same socket that related packets are being
2240      * received on */
2241     socket =
2242         (conn->type ==
2243          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2244
2245 #ifdef RXDEBUG
2246     /* Possibly drop this packet,  for testing purposes */
2247     if ((deliveryType == 'D')
2248         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2249             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2250         deliveryType = 'D';     /* Drop the packet */
2251     } else {
2252         deliveryType = 'S';     /* Send the packet */
2253 #endif /* RXDEBUG */
2254
2255         /* Loop until the packet is sent.  We'd prefer just to use a
2256          * blocking socket, but unfortunately the interface doesn't
2257          * allow us to have the socket block in send mode, and not
2258          * block in receive mode */
2259 #ifdef KERNEL
2260         waslocked = ISAFS_GLOCK();
2261 #ifdef RX_KERNEL_TRACE
2262         if (ICL_SETACTIVE(afs_iclSetp)) {
2263             if (!waslocked)
2264                 AFS_GLOCK();
2265             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2266                        "before osi_NetSend()");
2267             AFS_GUNLOCK();
2268         }
2269 #else
2270         if (waslocked)
2271             AFS_GUNLOCK();
2272 #endif
2273 #endif
2274         if ((code =
2275              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2276                          p->length + RX_HEADER_SIZE, istack)) != 0) {
2277             /* send failed, so let's hurry up the resend, eh? */
2278             if (rx_stats_active)
2279                 rx_atomic_inc(&rx_stats.netSendFailures);
2280             p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2281
2282             /* Some systems are nice and tell us right away that we cannot
2283              * reach this recipient by returning an error code.
2284              * So, when this happens let's "down" the host NOW so
2285              * we don't sit around waiting for this host to timeout later.
2286              */
2287             if (call) {
2288                 rxi_NetSendError(call, code);
2289             }
2290         }
2291 #ifdef KERNEL
2292 #ifdef RX_KERNEL_TRACE
2293         if (ICL_SETACTIVE(afs_iclSetp)) {
2294             AFS_GLOCK();
2295             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2296                        "after osi_NetSend()");
2297             if (!waslocked)
2298                 AFS_GUNLOCK();
2299         }
2300 #else
2301         if (waslocked)
2302             AFS_GLOCK();
2303 #endif
2304 #endif
2305 #ifdef RXDEBUG
2306     }
2307     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2308           deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2309           ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2310           p->header.seq, p->header.flags, p, p->length));
2311 #endif
2312     if (rx_stats_active) {
2313         rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2314         MUTEX_ENTER(&peer->peer_lock);
2315         peer->bytesSent += p->length;
2316         MUTEX_EXIT(&peer->peer_lock);
2317     }
2318 }
2319
2320 /* Send a list of packets to appropriate destination for the specified
2321  * connection.  The headers are first encoded and placed in the packets.
2322  */
2323 void
2324 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2325                    struct rx_packet **list, int len, int istack)
2326 {
2327 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2328     int waslocked;
2329 #endif
2330     struct sockaddr_in addr;
2331     struct rx_peer *peer = conn->peer;
2332     osi_socket socket;
2333     struct rx_packet *p = NULL;
2334     struct iovec wirevec[RX_MAXIOVECS];
2335     int i, length, code;
2336     afs_uint32 serial;
2337     afs_uint32 temp;
2338     struct rx_jumboHeader *jp;
2339 #ifdef RXDEBUG
2340     char deliveryType = 'S';
2341 #endif
2342     /* The address we're sending the packet to */
2343     addr.sin_family = AF_INET;
2344     addr.sin_port = peer->port;
2345     addr.sin_addr.s_addr = peer->host;
2346
2347     if (len + 1 > RX_MAXIOVECS) {
2348         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2349     }
2350
2351     /*
2352      * Stamp the packets in this jumbogram with consecutive serial numbers
2353      */
2354     MUTEX_ENTER(&conn->conn_data_lock);
2355     serial = conn->serial;
2356     conn->serial += len;
2357     for (i = 0; i < len; i++) {
2358         p = list[i];
2359         if (p->length > conn->peer->maxPacketSize) {
2360             /* a ping *or* a sequenced packet can count */
2361             if ((p->length > conn->peer->maxPacketSize)) {
2362                 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2363                      (p->header.flags & RX_REQUEST_ACK)) &&
2364                     ((i == 0) || (p->length >= conn->lastPingSize))) {
2365                     conn->lastPingSize = p->length;
2366                     conn->lastPingSizeSer = serial + i;
2367                 } else if ((p->header.seq != 0) &&
2368                            ((i == 0) || (p->length >= conn->lastPacketSize))) {
2369                     conn->lastPacketSize = p->length;
2370                     conn->lastPacketSizeSeq = p->header.seq;
2371                 }
2372             }
2373         }
2374     }
2375     MUTEX_EXIT(&conn->conn_data_lock);
2376
2377
2378     /* This stuff should be revamped, I think, so that most, if not
2379      * all, of the header stuff is always added here.  We could
2380      * probably do away with the encode/decode routines. XXXXX */
2381
2382     jp = NULL;
2383     length = RX_HEADER_SIZE;
2384     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2385     wirevec[0].iov_len = RX_HEADER_SIZE;
2386     for (i = 0; i < len; i++) {
2387         p = list[i];
2388
2389         /* The whole 3.5 jumbogram scheme relies on packets fitting
2390          * in a single packet buffer. */
2391         if (p->niovecs > 2) {
2392             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2393         }
2394
2395         /* Set the RX_JUMBO_PACKET flags in all but the last packets
2396          * in this chunk.  */
2397         if (i < len - 1) {
2398             if (p->length != RX_JUMBOBUFFERSIZE) {
2399                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2400             }
2401             p->header.flags |= RX_JUMBO_PACKET;
2402             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2403             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2404         } else {
2405             wirevec[i + 1].iov_len = p->length;
2406             length += p->length;
2407         }
2408         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2409         if (jp != NULL) {
2410             /* Convert jumbo packet header to network byte order */
2411             temp = (afs_uint32) (p->header.flags) << 24;
2412             temp |= (afs_uint32) (p->header.spare);
2413             *(afs_uint32 *) jp = htonl(temp);
2414         }
2415         jp = (struct rx_jumboHeader *)
2416             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2417
2418         /* Stamp each packet with a unique serial number.  The serial
2419          * number is maintained on a connection basis because some types
2420          * of security may be based on the serial number of the packet,
2421          * and security is handled on a per authenticated-connection
2422          * basis. */
2423         /* Pre-increment, to guarantee no zero serial number; a zero
2424          * serial number means the packet was never sent. */
2425         p->header.serial = ++serial;
2426         /* This is so we can adjust retransmit time-outs better in the face of
2427          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2428          */
2429         if (p->firstSerial == 0) {
2430             p->firstSerial = p->header.serial;
2431         }
2432 #ifdef RXDEBUG
2433         /* If an output tracer function is defined, call it with the packet and
2434          * network address.  Note this function may modify its arguments. */
2435         if (rx_almostSent) {
2436             int drop = (*rx_almostSent) (p, &addr);
2437             /* drop packet if return value is non-zero? */
2438             if (drop)
2439                 deliveryType = 'D';     /* Drop the packet */
2440         }
2441 #endif
2442
2443         /* Get network byte order header */
2444         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
2445                                          * touch ALL the fields */
2446     }
2447
2448     /* Send the packet out on the same socket that related packets are being
2449      * received on */
2450     socket =
2451         (conn->type ==
2452          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2453
2454 #ifdef RXDEBUG
2455     /* Possibly drop this packet,  for testing purposes */
2456     if ((deliveryType == 'D')
2457         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2458             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2459         deliveryType = 'D';     /* Drop the packet */
2460     } else {
2461         deliveryType = 'S';     /* Send the packet */
2462 #endif /* RXDEBUG */
2463
2464         /* Loop until the packet is sent.  We'd prefer just to use a
2465          * blocking socket, but unfortunately the interface doesn't
2466          * allow us to have the socket block in send mode, and not
2467          * block in receive mode */
2468 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2469         waslocked = ISAFS_GLOCK();
2470         if (!istack && waslocked)
2471             AFS_GUNLOCK();
2472 #endif
2473         if ((code =
2474              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2475                          istack)) != 0) {
2476             /* send failed, so let's hurry up the resend, eh? */
2477             if (rx_stats_active)
2478                 rx_atomic_inc(&rx_stats.netSendFailures);
2479             for (i = 0; i < len; i++) {
2480                 p = list[i];
2481                 p->flags &= ~RX_PKTFLAG_SENT;  /* resend it very soon */
2482             }
2483             /* Some systems are nice and tell us right away that we cannot
2484              * reach this recipient by returning an error code.
2485              * So, when this happens let's "down" the host NOW so
2486              * we don't sit around waiting for this host to timeout later.
2487              */
2488             if (call) {
2489                 rxi_NetSendError(call, code);
2490             }
2491         }
2492 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2493         if (!istack && waslocked)
2494             AFS_GLOCK();
2495 #endif
2496 #ifdef RXDEBUG
2497     }
2498
2499     osi_Assert(p != NULL);
2500
2501     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2502           deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2503           ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2504           p->header.seq, p->header.flags, p, p->length));
2505
2506 #endif
2507     if (rx_stats_active) {
2508         rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2509         MUTEX_ENTER(&peer->peer_lock);
2510         peer->bytesSent += p->length;
2511         MUTEX_EXIT(&peer->peer_lock);
2512     }
2513 }
2514
2515 /* Send a raw abort packet, without any call or connection structures */
2516 void
2517 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2518                  afs_int32 error, struct rx_packet *source, int istack)
2519 {
2520     struct rx_header theader;
2521     struct sockaddr_in addr;
2522     struct iovec iov[2];
2523
2524     memset(&theader, 0, sizeof(theader));
2525     theader.epoch = htonl(source->header.epoch);
2526     theader.callNumber = htonl(source->header.callNumber);
2527     theader.serial = htonl(1);
2528     theader.type = RX_PACKET_TYPE_ABORT;
2529     theader.serviceId = htons(source->header.serviceId);
2530     theader.securityIndex = source->header.securityIndex;
2531     theader.cid = htonl(source->header.cid);
2532
2533     error = htonl(error);
2534
2535     iov[0].iov_base = &theader;
2536     iov[0].iov_len = sizeof(struct rx_header);
2537     iov[1].iov_base = &error;
2538     iov[1].iov_len = sizeof(error);
2539
2540     addr.sin_family = AF_INET;
2541     addr.sin_addr.s_addr = host;
2542     addr.sin_port = port;
2543 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2544     addr.sin_len = sizeof(struct sockaddr_in);
2545 #endif
2546
2547     osi_NetSend(socket, &addr, iov, 2,
2548                 sizeof(struct rx_header) + sizeof(error), istack);
2549 }
2550
2551 /* Send a "special" packet to the peer connection.  If call is
2552  * specified, then the packet is directed to a specific call channel
2553  * associated with the connection, otherwise it is directed to the
2554  * connection only. Uses optionalPacket if it is supplied, rather than
2555  * allocating a new packet buffer.  Nbytes is the length of the data
2556  * portion of the packet.  If data is non-null, nbytes of data are
2557  * copied into the packet.  Type is the type of the packet, as defined
2558  * in rx.h.  Bug: there's a lot of duplication between this and other
2559  * routines.  This needs to be cleaned up. */
2560 struct rx_packet *
2561 rxi_SendSpecial(struct rx_call *call,
2562                 struct rx_connection *conn,
2563                 struct rx_packet *optionalPacket, int type, char *data,
2564                 int nbytes, int istack)
2565 {
2566     /* Some of the following stuff should be common code for all
2567      * packet sends (it's repeated elsewhere) */
2568     struct rx_packet *p;
2569     unsigned int i = 0;
2570     int savelen = 0, saven = 0;
2571     int channel, callNumber;
2572     if (call) {
2573         channel = call->channel;
2574         callNumber = *call->callNumber;
2575         /* BUSY packets refer to the next call on this connection */
2576         if (type == RX_PACKET_TYPE_BUSY) {
2577             callNumber++;
2578         }
2579     } else {
2580         channel = 0;
2581         callNumber = 0;
2582     }
2583     p = optionalPacket;
2584     if (!p) {
2585         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2586         if (!p)
2587             osi_Panic("rxi_SendSpecial failure");
2588     }
2589
2590     if (nbytes != -1)
2591         p->length = nbytes;
2592     else
2593         nbytes = p->length;
2594     p->header.serviceId = conn->serviceId;
2595     p->header.securityIndex = conn->securityIndex;
2596     p->header.cid = (conn->cid | channel);
2597     p->header.callNumber = callNumber;
2598     p->header.seq = 0;
2599     p->header.epoch = conn->epoch;
2600     p->header.type = type;
2601     p->header.flags = 0;
2602     if (conn->type == RX_CLIENT_CONNECTION)
2603         p->header.flags |= RX_CLIENT_INITIATED;
2604     if (data)
2605         rx_packetwrite(p, 0, nbytes, data);
2606
2607     for (i = 1; i < p->niovecs; i++) {
2608         if (nbytes <= p->wirevec[i].iov_len) {
2609             savelen = p->wirevec[i].iov_len;
2610             saven = p->niovecs;
2611             p->wirevec[i].iov_len = nbytes;
2612             p->niovecs = i + 1; /* so condition fails because i == niovecs */
2613         } else
2614             nbytes -= p->wirevec[i].iov_len;
2615     }
2616
2617     if (call)
2618         rxi_Send(call, p, istack);
2619     else
2620         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2621     if (saven) {                /* means we truncated the packet above.  We probably don't  */
2622         /* really need to do this, but it seems safer this way, given that  */
2623         /* sneaky optionalPacket... */
2624         p->wirevec[i - 1].iov_len = savelen;
2625         p->niovecs = saven;
2626     }
2627     if (!optionalPacket)
2628         rxi_FreePacket(p);
2629     return optionalPacket;
2630 }
2631
2632
2633 /* Encode the packet's header (from the struct header in the packet to
2634  * the net byte order representation in the wire representation of the
2635  * packet, which is what is actually sent out on the wire) */
2636 void
2637 rxi_EncodePacketHeader(struct rx_packet *p)
2638 {
2639     afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base);  /* MTUXXX */
2640
2641     memset(buf, 0, RX_HEADER_SIZE);
2642     *buf++ = htonl(p->header.epoch);
2643     *buf++ = htonl(p->header.cid);
2644     *buf++ = htonl(p->header.callNumber);
2645     *buf++ = htonl(p->header.seq);
2646     *buf++ = htonl(p->header.serial);
2647     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2648                    | (((afs_uint32) p->header.flags) << 16)
2649                    | (p->header.userStatus << 8) | p->header.securityIndex);
2650     /* Note: top 16 bits of this next word were reserved */
2651     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2652 }
2653
2654 /* Decode the packet's header (from net byte order to a struct header) */
2655 void
2656 rxi_DecodePacketHeader(struct rx_packet *p)
2657 {
2658     afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base);  /* MTUXXX */
2659     afs_uint32 temp;
2660
2661     p->header.epoch = ntohl(*buf);
2662     buf++;
2663     p->header.cid = ntohl(*buf);
2664     buf++;
2665     p->header.callNumber = ntohl(*buf);
2666     buf++;
2667     p->header.seq = ntohl(*buf);
2668     buf++;
2669     p->header.serial = ntohl(*buf);
2670     buf++;
2671
2672     temp = ntohl(*buf);
2673     buf++;
2674
2675     /* C will truncate byte fields to bytes for me */
2676     p->header.type = temp >> 24;
2677     p->header.flags = temp >> 16;
2678     p->header.userStatus = temp >> 8;
2679     p->header.securityIndex = temp >> 0;
2680
2681     temp = ntohl(*buf);
2682     buf++;
2683
2684     p->header.serviceId = (temp & 0xffff);
2685     p->header.spare = temp >> 16;
2686     /* Note: top 16 bits of this last word are the security checksum */
2687 }
2688
2689 /*
2690  * LOCKS HELD: called with call->lock held.
2691  *
2692  * PrepareSendPacket is the only place in the code that
2693  * can increment call->tnext.  This could become an atomic
2694  * in the future.  Beyond that there is nothing in this
2695  * function that requires the call being locked.  This
2696  * function can only be called by the application thread.
2697  */
2698 void
2699 rxi_PrepareSendPacket(struct rx_call *call,
2700                       struct rx_packet *p, int last)
2701 {
2702     struct rx_connection *conn = call->conn;
2703     afs_uint32 seq = call->tnext++;
2704     unsigned int i;
2705     afs_int32 len;              /* len must be a signed type; it can go negative */
2706
2707     /* No data packets on call 0. Where do these come from? */
2708     if (*call->callNumber == 0)
2709         *call->callNumber = 1;
2710
2711     MUTEX_EXIT(&call->lock);
2712     p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2713
2714     p->header.cid = (conn->cid | call->channel);
2715     p->header.serviceId = conn->serviceId;
2716     p->header.securityIndex = conn->securityIndex;
2717
2718     p->header.callNumber = *call->callNumber;
2719     p->header.seq = seq;
2720     p->header.epoch = conn->epoch;
2721     p->header.type = RX_PACKET_TYPE_DATA;
2722     p->header.flags = 0;
2723     p->header.spare = 0;
2724     if (conn->type == RX_CLIENT_CONNECTION)
2725         p->header.flags |= RX_CLIENT_INITIATED;
2726
2727     if (last)
2728         p->header.flags |= RX_LAST_PACKET;
2729
2730     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2731     p->header.serial = 0;       /* Another way of saying never transmitted... */
2732
2733     /* Now that we're sure this is the last data on the call, make sure
2734      * that the "length" and the sum of the iov_lens matches. */
2735     len = p->length + call->conn->securityHeaderSize;
2736
2737     for (i = 1; i < p->niovecs && len > 0; i++) {
2738         len -= p->wirevec[i].iov_len;
2739     }
2740     if (len > 0) {
2741         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2742     } else if (i < p->niovecs) {
2743         /* Free any extra elements in the wirevec */
2744 #if defined(RX_ENABLE_TSFPQ)
2745         rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2746 #else /* !RX_ENABLE_TSFPQ */
2747         MUTEX_ENTER(&rx_freePktQ_lock);
2748         rxi_FreeDataBufsNoLock(p, i);
2749         MUTEX_EXIT(&rx_freePktQ_lock);
2750 #endif /* !RX_ENABLE_TSFPQ */
2751
2752         p->niovecs = i;
2753     }
2754     if (len)
2755         p->wirevec[i - 1].iov_len += len;
2756     MUTEX_ENTER(&call->lock);
2757     RXS_PreparePacket(conn->securityObject, call, p);
2758 }
2759
2760 /* Given an interface MTU size, calculate an adjusted MTU size that
2761  * will make efficient use of the RX buffers when the peer is sending
2762  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2763 int
2764 rxi_AdjustIfMTU(int mtu)
2765 {
2766     int adjMTU;
2767     int frags;
2768
2769     if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2770         return mtu;
2771     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2772     if (mtu <= adjMTU) {
2773         return mtu;
2774     }
2775     mtu -= adjMTU;
2776     if (mtu <= 0) {
2777         return adjMTU;
2778     }
2779     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2780     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2781 }
2782
2783 /* Given an interface MTU size, and the peer's advertised max receive
2784  * size, calculate an adjisted maxMTU size that makes efficient use
2785  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2786 int
2787 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2788 {
2789     int maxMTU = mtu * rxi_nSendFrags;
2790     maxMTU = MIN(maxMTU, peerMaxMTU);
2791     return rxi_AdjustIfMTU(maxMTU);
2792 }
2793
2794 /* Given a packet size, figure out how many datagram packet will fit.
2795  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2796  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2797  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2798 int
2799 rxi_AdjustDgramPackets(int frags, int mtu)
2800 {
2801     int maxMTU;
2802     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2803         return 1;
2804     }
2805     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2806     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2807     /* subtract the size of the first and last packets */
2808     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2809     if (maxMTU < 0) {
2810         return 1;
2811     }
2812     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2813 }
2814
2815 #ifndef KERNEL
2816 /*
2817  * This function can be used by the Windows Cache Manager
2818  * to dump the list of all rx packets so that we can determine
2819  * where the packet leakage is.
2820  */
2821 int rx_DumpPackets(FILE *outputFile, char *cookie)
2822 {
2823 #ifdef RXDEBUG_PACKET
2824     struct rx_packet *p;
2825 #ifdef AFS_NT40_ENV
2826     int zilch;
2827     char output[2048];
2828 #define RXDPRINTF sprintf
2829 #define RXDPRINTOUT output
2830 #else
2831 #define RXDPRINTF fprintf
2832 #define RXDPRINTOUT outputFile
2833 #endif
2834
2835     NETPRI;
2836     MUTEX_ENTER(&rx_freePktQ_lock);
2837     RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2838 #ifdef AFS_NT40_ENV
2839     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2840 #endif
2841
2842     for (p = rx_mallocedP; p; p = p->allNextp) {
2843         RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u  header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2844                 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2845                 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2846                 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2847                 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2848                 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2849 #ifdef AFS_NT40_ENV
2850         WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2851 #endif
2852     }
2853
2854     RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2855 #ifdef AFS_NT40_ENV
2856     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2857 #endif
2858
2859     MUTEX_EXIT(&rx_freePktQ_lock);
2860     USERPRI;
2861 #endif /* RXDEBUG_PACKET */
2862     return 0;
2863 }
2864 #endif