src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_user.h"
  75 #include "rx_xmit_nt.h"
  76 #include <stdlib.h>
  77 #else
  78 #include <sys/socket.h>
  79 #include <netinet/in.h>
  80 #endif
  81 #include "rx_clock.h"
  82 #include "rx.h"
  83 #include "rx_queue.h"
  84 #ifdef  AFS_SUN5_ENV
  85 #include <sys/sysmacros.h>
  86 #endif
  87 #include "rx_packet.h"
  88 #include "rx_globals.h"
  89 #include <lwp.h>
  90 #include <assert.h>
  91 #ifdef HAVE_STRING_H
  92 #include <string.h>
  93 #else
  94 #ifdef HAVE_STRINGS_H
  95 #include <strings.h>
  96 #endif
  97 #endif
  98 #ifdef HAVE_UNISTD_H
  99 #include <unistd.h>
 100 #endif
 101 #endif /* KERNEL */
 102
 103 #ifdef RX_LOCKS_DB
 104 /* rxdb_fileID is used to identify the lock location, along with line#. */
 105 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 106 #endif /* RX_LOCKS_DB */
 107 struct rx_packet *rx_mallocedP = 0;
 108
 109 extern char cml_version_number[];
 110 extern int (*rx_almostSent) ();
 111
 112 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
 113
 114 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 115                                 afs_int32 ahost, short aport,
 116                                 afs_int32 istack);
 117
 118 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, int first,
 119                                    struct rx_queue * q);
 120
 121 /* some rules about packets:
 122  * 1.  When a packet is allocated, the final iov_buf contains room for
 123  * a security trailer, but iov_len masks that fact.  If the security
 124  * package wants to add the trailer, it may do so, and then extend
 125  * iov_len appropriately.  For this reason, packet's niovecs and
 126  * iov_len fields should be accurate before calling PreparePacket.
 127 */
 128
 129 /* Preconditions:
 130  *        all packet buffers (iov_base) are integral multiples of
 131  *        the word size.
 132  *        offset is an integral multiple of the word size.
 133  */
 134 afs_int32
 135 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 136 {
 137     unsigned int i;
 138     size_t l;
 139     for (l = 0, i = 1; i < packet->niovecs; i++) {
 140         if (l + packet->wirevec[i].iov_len > offset) {
 141             return
 142                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 143                                  (offset - l)));
 144         }
 145         l += packet->wirevec[i].iov_len;
 146     }
 147
 148     return 0;
 149 }
 150
 151 /* Preconditions:
 152  *        all packet buffers (iov_base) are integral multiples of the word size.
 153  *        offset is an integral multiple of the word size.
 154  */
 155 afs_int32
 156 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 157 {
 158     unsigned int i;
 159     size_t l;
 160     for (l = 0, i = 1; i < packet->niovecs; i++) {
 161         if (l + packet->wirevec[i].iov_len > offset) {
 162             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 163                              (offset - l))) = data;
 164             return 0;
 165         }
 166         l += packet->wirevec[i].iov_len;
 167     }
 168
 169     return 0;
 170 }
 171
 172 /* Preconditions:
 173  *        all packet buffers (iov_base) are integral multiples of the
 174  *        word size.
 175  *        offset is an integral multiple of the word size.
 176  * Packet Invariants:
 177  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 178  */
 179 afs_int32
 180 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 181                   char *out)
 182 {
 183     unsigned int i, j, l, r;
 184     for (l = 0, i = 1; i < packet->niovecs; i++) {
 185         if (l + packet->wirevec[i].iov_len > offset) {
 186             break;
 187         }
 188         l += packet->wirevec[i].iov_len;
 189     }
 190
 191     /* i is the iovec which contains the first little bit of data in which we
 192      * are interested.  l is the total length of everything prior to this iovec.
 193      * j is the number of bytes we can safely copy out of this iovec.
 194      * offset only applies to the first iovec.
 195      */
 196     r = resid;
 197     while ((resid > 0) && (i < packet->niovecs)) {
 198         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 199         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 200         resid -= j;
 201         out += j;
 202         l += packet->wirevec[i].iov_len;
 203         offset = l;
 204         i++;
 205     }
 206
 207     return (resid ? (r - resid) : r);
 208 }
 209
 210
 211 /* Preconditions:
 212  *        all packet buffers (iov_base) are integral multiples of the
 213  *        word size.
 214  *        offset is an integral multiple of the word size.
 215  */
 216 afs_int32
 217 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 218 {
 219     int i, j, l, r;
 220     char *b;
 221
 222     for (l = 0, i = 1; i < packet->niovecs; i++) {
 223         if (l + packet->wirevec[i].iov_len > offset) {
 224             break;
 225         }
 226         l += packet->wirevec[i].iov_len;
 227     }
 228
 229     /* i is the iovec which contains the first little bit of data in which we
 230      * are interested.  l is the total length of everything prior to this iovec.
 231      * j is the number of bytes we can safely copy out of this iovec.
 232      * offset only applies to the first iovec.
 233      */
 234     r = resid;
 235     while ((resid > 0) && (i < RX_MAXWVECS)) {
 236         if (i >= packet->niovecs)
 237             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 238                 break;
 239
 240         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 241         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 242         memcpy(b, in, j);
 243         resid -= j;
 244         in += j;
 245         l += packet->wirevec[i].iov_len;
 246         offset = l;
 247         i++;
 248     }
 249
 250     return (resid ? (r - resid) : r);
 251 }
 252
 253 int
 254 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
 255 {
 256     register struct rx_packet *p, *np;
 257
 258     num_pkts = AllocPacketBufs(class, num_pkts, q);
 259
 260     for (queue_Scan(q, p, np, rx_packet)) {
 261         RX_PACKET_IOV_FULLINIT(p);
 262     }
 263
 264     return num_pkts;
 265 }
 266
 267 #ifdef RX_ENABLE_TSFPQ
 268 static int
 269 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 270 {
 271     register struct rx_packet *c;
 272     register struct rx_ts_info_t * rx_ts_info;
 273     int transfer, alloc;
 274     SPLVAR;
 275
 276     RX_TS_INFO_GET(rx_ts_info);
 277
 278     transfer = num_pkts - rx_ts_info->_FPQ.len;
 279     if (transfer > 0) {
 280         NETPRI;
 281         MUTEX_ENTER(&rx_freePktQ_lock);
 282
 283         if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
 284             transfer += rx_TSFPQGlobSize;
 285         } else if (transfer <= rx_nFreePackets) {
 286             transfer = rx_nFreePackets;
 287         } else {
 288             /* alloc enough for us, plus a few globs for other threads */
 289             alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
 290             rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
 291             transfer += rx_TSFPQGlobSize;
 292         }
 293
 294         RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
 295
 296         MUTEX_EXIT(&rx_freePktQ_lock);
 297         USERPRI;
 298     }
 299
 300     RX_TS_FPQ_CHECKOUT2(rx_ts_info, num_pkts, q);
 301
 302     return num_pkts;
 303 }
 304 #else /* RX_ENABLE_TSFPQ */
 305 static int
 306 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 307 {
 308     struct rx_packet *c;
 309     int i, overq = 0;
 310     SPLVAR;
 311
 312     NETPRI;
 313
 314     MUTEX_ENTER(&rx_freePktQ_lock);
 315
 316 #ifdef KERNEL
 317     for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
 318          num_pkts--, overq++);
 319
 320     if (overq) {
 321         rxi_NeedMorePackets = TRUE;
 322         MUTEX_ENTER(&rx_stats_mutex);
 323         switch (class) {
 324         case RX_PACKET_CLASS_RECEIVE:
 325             rx_stats.receivePktAllocFailures++;
 326             break;
 327         case RX_PACKET_CLASS_SEND:
 328             rx_stats.sendPktAllocFailures++;
 329             break;
 330         case RX_PACKET_CLASS_SPECIAL:
 331             rx_stats.specialPktAllocFailures++;
 332             break;
 333         case RX_PACKET_CLASS_RECV_CBUF:
 334             rx_stats.receiveCbufPktAllocFailures++;
 335             break;
 336         case RX_PACKET_CLASS_SEND_CBUF:
 337             rx_stats.sendCbufPktAllocFailures++;
 338             break;
 339         }
 340         MUTEX_EXIT(&rx_stats_mutex);
 341     }
 342
 343     if (rx_nFreePackets < num_pkts)
 344         num_pkts = rx_nFreePackets;
 345
 346     if (!num_pkts) {
 347         rxi_NeedMorePackets = TRUE;
 348         goto done;
 349     }
 350 #else /* KERNEL */
 351     if (rx_nFreePackets < num_pkts) {
 352         rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
 353     }
 354 #endif /* KERNEL */
 355
 356     for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
 357          i < num_pkts;
 358          i++, c=queue_Next(c, rx_packet)) {
 359         RX_FPQ_MARK_USED(c);
 360     }
 361
 362     queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
 363
 364     rx_nFreePackets -= num_pkts;
 365
 366 #ifdef KERNEL
 367   done:
 368 #endif
 369     MUTEX_EXIT(&rx_freePktQ_lock);
 370
 371     USERPRI;
 372     return num_pkts;
 373 }
 374 #endif /* RX_ENABLE_TSFPQ */
 375
 376 /*
 377  * Free a packet currently used as a continuation buffer
 378  */
 379 #ifdef RX_ENABLE_TSFPQ
 380 /* num_pkts=0 means queue length is unknown */
 381 int
 382 rxi_FreePackets(int num_pkts, struct rx_queue * q)
 383 {
 384     register struct rx_ts_info_t * rx_ts_info;
 385     register struct rx_packet *c, *nc;
 386     SPLVAR;
 387
 388     osi_Assert(num_pkts >= 0);
 389     RX_TS_INFO_GET(rx_ts_info);
 390
 391     if (!num_pkts) {
 392         for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
 393             rxi_FreeDataBufsTSFPQ(c, 1, 0);
 394         }
 395     } else {
 396         RX_TS_FPQ_CHECKIN2(rx_ts_info, num_pkts, q);
 397     }
 398
 399     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 400         NETPRI;
 401         MUTEX_ENTER(&rx_freePktQ_lock);
 402
 403         RX_TS_FPQ_LTOG(rx_ts_info);
 404
 405         /* Wakeup anyone waiting for packets */
 406         rxi_PacketsUnWait();
 407
 408         MUTEX_EXIT(&rx_freePktQ_lock);
 409         USERPRI;
 410     }
 411
 412     return num_pkts;
 413 }
 414 #else /* RX_ENABLE_TSFPQ */
 415 /* num_pkts=0 means queue length is unknown */
 416 int
 417 rxi_FreePackets(int num_pkts, struct rx_queue *q)
 418 {
 419     struct rx_queue cbs;
 420     register struct rx_packet *p, *np;
 421     int qlen = 0;
 422     SPLVAR;
 423
 424     osi_Assert(num_pkts >= 0);
 425     queue_Init(&cbs);
 426
 427     if (!num_pkts) {
 428         for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
 429             if (p->niovecs > 2) {
 430                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 431             }
 432             RX_FPQ_MARK_FREE(p);
 433         }
 434         if (!num_pkts)
 435             return 0;
 436     } else {
 437         for (queue_Scan(q, p, np, rx_packet)) {
 438             if (p->niovecs > 2) {
 439                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 440             }
 441             RX_FPQ_MARK_FREE(p);
 442         }
 443     }
 444
 445     if (qlen) {
 446         queue_SpliceAppend(q, &cbs);
 447         qlen += num_pkts;
 448     } else
 449         qlen = num_pkts;
 450
 451     NETPRI;
 452     MUTEX_ENTER(&rx_freePktQ_lock);
 453
 454     queue_SpliceAppend(&rx_freePacketQueue, q);
 455     rx_nFreePackets += qlen;
 456
 457     /* Wakeup anyone waiting for packets */
 458     rxi_PacketsUnWait();
 459
 460     MUTEX_EXIT(&rx_freePktQ_lock);
 461     USERPRI;
 462
 463     return num_pkts;
 464 }
 465 #endif /* RX_ENABLE_TSFPQ */
 466
 467 /* this one is kind of awful.
 468  * In rxkad, the packet has been all shortened, and everything, ready for
 469  * sending.  All of a sudden, we discover we need some of that space back.
 470  * This isn't terribly general, because it knows that the packets are only
 471  * rounded up to the EBS (userdata + security header).
 472  */
 473 int
 474 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 475 {
 476     int i;
 477     i = p->niovecs - 1;
 478     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 479         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 480             p->wirevec[i].iov_len += nb;
 481             return 0;
 482         }
 483     } else {
 484         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 485             p->wirevec[i].iov_len += nb;
 486             return 0;
 487         }
 488     }
 489
 490     return 0;
 491 }
 492
 493 /* get sufficient space to store nb bytes of data (or more), and hook
 494  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 495  * returns the number of bytes >0 which it failed to come up with.
 496  * Don't need to worry about locking on packet, since only
 497  * one thread can manipulate one at a time. Locking on continution
 498  * packets is handled by AllocPacketBufs */
 499 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 500 int
 501 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 502 {
 503     int i, nv;
 504     struct rx_queue q;
 505     register struct rx_packet *cb, *ncb;
 506
 507     /* compute the number of cbuf's we need */
 508     nv = nb / RX_CBUFFERSIZE;
 509     if ((nv * RX_CBUFFERSIZE) < nb)
 510         nv++;
 511     if ((nv + p->niovecs) > RX_MAXWVECS)
 512         nv = RX_MAXWVECS - p->niovecs;
 513     if (nv < 1)
 514         return nb;
 515
 516     /* allocate buffers */
 517     queue_Init(&q);
 518     nv = AllocPacketBufs(class, nv, &q);
 519
 520     /* setup packet iovs */
 521     for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
 522         queue_Remove(cb);
 523         p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 524         p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 525     }
 526
 527     nb -= (nv * RX_CBUFFERSIZE);
 528     p->length += (nv * RX_CBUFFERSIZE);
 529     p->niovecs += nv;
 530
 531     return nb;
 532 }
 533
 534 /* Add more packet buffers */
 535 #ifdef RX_ENABLE_TSFPQ
 536 void
 537 rxi_MorePackets(int apackets)
 538 {
 539     struct rx_packet *p, *e;
 540     register struct rx_ts_info_t * rx_ts_info;
 541     int getme;
 542     SPLVAR;
 543
 544     getme = apackets * sizeof(struct rx_packet);
 545     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 546
 547     PIN(p, getme);              /* XXXXX */
 548     memset((char *)p, 0, getme);
 549     RX_TS_INFO_GET(rx_ts_info);
 550
 551     for (e = p + apackets; p < e; p++) {
 552         RX_PACKET_IOV_INIT(p);
 553         p->niovecs = 2;
 554
 555         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 556     }
 557     rx_ts_info->_FPQ.delta += apackets;
 558
 559     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 560         NETPRI;
 561         MUTEX_ENTER(&rx_freePktQ_lock);
 562
 563         RX_TS_FPQ_LTOG(rx_ts_info);
 564         rxi_NeedMorePackets = FALSE;
 565         rxi_PacketsUnWait();
 566
 567         MUTEX_EXIT(&rx_freePktQ_lock);
 568         USERPRI;
 569     }
 570 }
 571 #else /* RX_ENABLE_TSFPQ */
 572 void
 573 rxi_MorePackets(int apackets)
 574 {
 575     struct rx_packet *p, *e;
 576     int getme;
 577     SPLVAR;
 578
 579     getme = apackets * sizeof(struct rx_packet);
 580     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 581
 582     PIN(p, getme);              /* XXXXX */
 583     memset((char *)p, 0, getme);
 584     NETPRI;
 585     MUTEX_ENTER(&rx_freePktQ_lock);
 586
 587     for (e = p + apackets; p < e; p++) {
 588         RX_PACKET_IOV_INIT(p);
 589         p->flags |= RX_PKTFLAG_FREE;
 590         p->niovecs = 2;
 591
 592         queue_Append(&rx_freePacketQueue, p);
 593     }
 594     rx_nFreePackets += apackets;
 595     rxi_NeedMorePackets = FALSE;
 596     rxi_PacketsUnWait();
 597
 598     MUTEX_EXIT(&rx_freePktQ_lock);
 599     USERPRI;
 600 }
 601 #endif /* RX_ENABLE_TSFPQ */
 602
 603 #ifdef RX_ENABLE_TSFPQ
 604 void
 605 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
 606 {
 607     struct rx_packet *p, *e;
 608     register struct rx_ts_info_t * rx_ts_info;
 609     int getme;
 610     SPLVAR;
 611
 612     getme = apackets * sizeof(struct rx_packet);
 613     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 614
 615     PIN(p, getme);              /* XXXXX */
 616     memset((char *)p, 0, getme);
 617     RX_TS_INFO_GET(rx_ts_info);
 618
 619     for (e = p + apackets; p < e; p++) {
 620         RX_PACKET_IOV_INIT(p);
 621         p->niovecs = 2;
 622
 623         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 624     }
 625     rx_ts_info->_FPQ.delta += apackets;
 626
 627     if (flush_global &&
 628         (num_keep_local < apackets)) {
 629         NETPRI;
 630         MUTEX_ENTER(&rx_freePktQ_lock);
 631
 632         RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
 633         rxi_NeedMorePackets = FALSE;
 634         rxi_PacketsUnWait();
 635
 636         MUTEX_EXIT(&rx_freePktQ_lock);
 637         USERPRI;
 638     }
 639 }
 640 #endif /* RX_ENABLE_TSFPQ */
 641
 642 #ifndef KERNEL
 643 /* Add more packet buffers */
 644 void
 645 rxi_MorePacketsNoLock(int apackets)
 646 {
 647     struct rx_packet *p, *e;
 648     int getme;
 649
 650     /* allocate enough packets that 1/4 of the packets will be able
 651      * to hold maximal amounts of data */
 652     apackets += (apackets / 4)
 653         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 654     getme = apackets * sizeof(struct rx_packet);
 655     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 656
 657     memset((char *)p, 0, getme);
 658
 659     for (e = p + apackets; p < e; p++) {
 660         RX_PACKET_IOV_INIT(p);
 661         p->flags |= RX_PKTFLAG_FREE;
 662         p->niovecs = 2;
 663
 664         queue_Append(&rx_freePacketQueue, p);
 665     }
 666
 667     rx_nFreePackets += apackets;
 668 #ifdef RX_ENABLE_TSFPQ
 669     /* TSFPQ patch also needs to keep track of total packets */
 670     MUTEX_ENTER(&rx_stats_mutex);
 671     rx_nPackets += apackets;
 672     RX_TS_FPQ_COMPUTE_LIMITS;
 673     MUTEX_EXIT(&rx_stats_mutex);
 674 #endif /* RX_ENABLE_TSFPQ */
 675     rxi_NeedMorePackets = FALSE;
 676     rxi_PacketsUnWait();
 677 }
 678 #endif /* !KERNEL */
 679
 680 void
 681 rxi_FreeAllPackets(void)
 682 {
 683     /* must be called at proper interrupt level, etcetera */
 684     /* MTUXXX need to free all Packets */
 685     osi_Free(rx_mallocedP,
 686              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 687     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 688 }
 689
 690 #ifdef RX_ENABLE_TSFPQ
 691 void
 692 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
 693 {
 694     register struct rx_ts_info_t * rx_ts_info;
 695     register int xfer;
 696     SPLVAR;
 697
 698     RX_TS_INFO_GET(rx_ts_info);
 699
 700     if (num_keep_local != rx_ts_info->_FPQ.len) {
 701         NETPRI;
 702         MUTEX_ENTER(&rx_freePktQ_lock);
 703         if (num_keep_local < rx_ts_info->_FPQ.len) {
 704             xfer = rx_ts_info->_FPQ.len - num_keep_local;
 705             RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
 706             rxi_PacketsUnWait();
 707         } else {
 708             xfer = num_keep_local - rx_ts_info->_FPQ.len;
 709             if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
 710                 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
 711             if (rx_nFreePackets < xfer) {
 712                 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
 713             }
 714             RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
 715         }
 716         MUTEX_EXIT(&rx_freePktQ_lock);
 717         USERPRI;
 718     }
 719 }
 720
 721 void
 722 rxi_FlushLocalPacketsTSFPQ(void)
 723 {
 724     rxi_AdjustLocalPacketsTSFPQ(0, 0);
 725 }
 726 #endif /* RX_ENABLE_TSFPQ */
 727
 728 /* Allocate more packets iff we need more continuation buffers */
 729 /* In kernel, can't page in memory with interrupts disabled, so we
 730  * don't use the event mechanism. */
 731 void
 732 rx_CheckPackets(void)
 733 {
 734     if (rxi_NeedMorePackets) {
 735         rxi_MorePackets(rx_initSendWindow);
 736     }
 737 }
 738
 739 /* In the packet freeing routine below, the assumption is that
 740    we want all of the packets to be used equally frequently, so that we
 741    don't get packet buffers paging out.  It would be just as valid to
 742    assume that we DO want them to page out if not many are being used.
 743    In any event, we assume the former, and append the packets to the end
 744    of the free list.  */
 745 /* This explanation is bogus.  The free list doesn't remain in any kind of
 746    useful order for afs_int32: the packets in use get pretty much randomly scattered
 747    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 748    must be stored so that packets which are adjacent in memory are adjacent in the
 749    free list.  An array springs rapidly to mind.
 750    */
 751
 752 /* Actually free the packet p. */
 753 #ifdef RX_ENABLE_TSFPQ
 754 void
 755 rxi_FreePacketNoLock(struct rx_packet *p)
 756 {
 757     register struct rx_ts_info_t * rx_ts_info;
 758     dpf(("Free %lx\n", (unsigned long)p));
 759
 760     RX_TS_INFO_GET(rx_ts_info);
 761     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 762     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 763         RX_TS_FPQ_LTOG(rx_ts_info);
 764     }
 765 }
 766 #else /* RX_ENABLE_TSFPQ */
 767 void
 768 rxi_FreePacketNoLock(struct rx_packet *p)
 769 {
 770     dpf(("Free %lx\n", (unsigned long)p));
 771
 772     RX_FPQ_MARK_FREE(p);
 773     rx_nFreePackets++;
 774     queue_Append(&rx_freePacketQueue, p);
 775 }
 776 #endif /* RX_ENABLE_TSFPQ */
 777
 778 #ifdef RX_ENABLE_TSFPQ
 779 void
 780 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
 781 {
 782     register struct rx_ts_info_t * rx_ts_info;
 783     dpf(("Free %lx\n", (unsigned long)p));
 784
 785     RX_TS_INFO_GET(rx_ts_info);
 786     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 787
 788     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 789         NETPRI;
 790         MUTEX_ENTER(&rx_freePktQ_lock);
 791
 792         RX_TS_FPQ_LTOG(rx_ts_info);
 793
 794         /* Wakeup anyone waiting for packets */
 795         rxi_PacketsUnWait();
 796
 797         MUTEX_EXIT(&rx_freePktQ_lock);
 798         USERPRI;
 799     }
 800 }
 801 #endif /* RX_ENABLE_TSFPQ */
 802
 803 /* free continuation buffers off a packet into a queue of buffers */
 804 static int
 805 rxi_FreeDataBufsToQueue(struct rx_packet *p, int first, struct rx_queue * q)
 806 {
 807     struct iovec *iov;
 808     struct rx_packet * cb;
 809     int count = 0;
 810
 811     if (first < 2)
 812         first = 2;
 813     for (; first < p->niovecs; first++, count++) {
 814         iov = &p->wirevec[first];
 815         if (!iov->iov_base)
 816             osi_Panic("rxi_PacketIOVToQueue: unexpected NULL iov");
 817         cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
 818         RX_FPQ_MARK_FREE(cb);
 819         queue_Append(q, cb);
 820     }
 821     p->length = 0;
 822     p->niovecs = 0;
 823
 824     return count;
 825 }
 826
 827 int
 828 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 829 {
 830     struct iovec *iov, *end;
 831
 832     if (first != 1)             /* MTUXXX */
 833         osi_Panic("FreeDataBufs 1: first must be 1");
 834     iov = &p->wirevec[1];
 835     end = iov + (p->niovecs - 1);
 836     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 837         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 838     for (iov++; iov < end; iov++) {
 839         if (!iov->iov_base)
 840             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 841         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 842     }
 843     p->length = 0;
 844     p->niovecs = 0;
 845
 846     return 0;
 847 }
 848
 849 #ifdef RX_ENABLE_TSFPQ
 850 int
 851 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, int first, int flush_global)
 852 {
 853     struct iovec *iov, *end;
 854     register struct rx_ts_info_t * rx_ts_info;
 855
 856     RX_TS_INFO_GET(rx_ts_info);
 857
 858     if (first != 1)             /* MTUXXX */
 859         osi_Panic("FreeDataBufs 1: first must be 1");
 860     iov = &p->wirevec[1];
 861     end = iov + (p->niovecs - 1);
 862     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 863         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 864     for (iov++; iov < end; iov++) {
 865         if (!iov->iov_base)
 866             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 867         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 868     }
 869     p->length = 0;
 870     p->niovecs = 0;
 871
 872     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 873         NETPRI;
 874         MUTEX_ENTER(&rx_freePktQ_lock);
 875
 876         RX_TS_FPQ_LTOG(rx_ts_info);
 877
 878         /* Wakeup anyone waiting for packets */
 879         rxi_PacketsUnWait();
 880
 881         MUTEX_EXIT(&rx_freePktQ_lock);
 882         USERPRI;
 883     }
 884     return 0;
 885 }
 886 #endif /* RX_ENABLE_TSFPQ */
 887
 888 int rxi_nBadIovecs = 0;
 889
 890 /* rxi_RestoreDataBufs
 891  *
 892  * Restore the correct sizes to the iovecs. Called when reusing a packet
 893  * for reading off the wire.
 894  */
 895 void
 896 rxi_RestoreDataBufs(struct rx_packet *p)
 897 {
 898     int i;
 899     struct iovec *iov = &p->wirevec[2];
 900
 901     RX_PACKET_IOV_INIT(p);
 902
 903     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 904         if (!iov->iov_base) {
 905             rxi_nBadIovecs++;
 906             p->niovecs = i;
 907             break;
 908         }
 909         iov->iov_len = RX_CBUFFERSIZE;
 910     }
 911 }
 912
 913 #ifdef RX_ENABLE_TSFPQ
 914 int
 915 rxi_TrimDataBufs(struct rx_packet *p, int first)
 916 {
 917     int length;
 918     struct iovec *iov, *end;
 919     register struct rx_ts_info_t * rx_ts_info;
 920     SPLVAR;
 921
 922     if (first != 1)
 923         osi_Panic("TrimDataBufs 1: first must be 1");
 924
 925     /* Skip over continuation buffers containing message data */
 926     iov = &p->wirevec[2];
 927     end = iov + (p->niovecs - 2);
 928     length = p->length - p->wirevec[1].iov_len;
 929     for (; iov < end && length > 0; iov++) {
 930         if (!iov->iov_base)
 931             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 932         length -= iov->iov_len;
 933     }
 934
 935     /* iov now points to the first empty data buffer. */
 936     if (iov >= end)
 937         return 0;
 938
 939     RX_TS_INFO_GET(rx_ts_info);
 940     for (; iov < end; iov++) {
 941         if (!iov->iov_base)
 942             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 943         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 944         p->niovecs--;
 945     }
 946     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 947         NETPRI;
 948         MUTEX_ENTER(&rx_freePktQ_lock);
 949
 950         RX_TS_FPQ_LTOG(rx_ts_info);
 951         rxi_PacketsUnWait();
 952
 953         MUTEX_EXIT(&rx_freePktQ_lock);
 954         USERPRI;
 955     }
 956
 957     return 0;
 958 }
 959 #else /* RX_ENABLE_TSFPQ */
 960 int
 961 rxi_TrimDataBufs(struct rx_packet *p, int first)
 962 {
 963     int length;
 964     struct iovec *iov, *end;
 965     SPLVAR;
 966
 967     if (first != 1)
 968         osi_Panic("TrimDataBufs 1: first must be 1");
 969
 970     /* Skip over continuation buffers containing message data */
 971     iov = &p->wirevec[2];
 972     end = iov + (p->niovecs - 2);
 973     length = p->length - p->wirevec[1].iov_len;
 974     for (; iov < end && length > 0; iov++) {
 975         if (!iov->iov_base)
 976             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 977         length -= iov->iov_len;
 978     }
 979
 980     /* iov now points to the first empty data buffer. */
 981     if (iov >= end)
 982         return 0;
 983
 984     NETPRI;
 985     MUTEX_ENTER(&rx_freePktQ_lock);
 986
 987     for (; iov < end; iov++) {
 988         if (!iov->iov_base)
 989             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 990         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 991         p->niovecs--;
 992     }
 993     rxi_PacketsUnWait();
 994
 995     MUTEX_EXIT(&rx_freePktQ_lock);
 996     USERPRI;
 997
 998     return 0;
 999 }
1000 #endif /* RX_ENABLE_TSFPQ */
1001
1002 /* Free the packet p.  P is assumed not to be on any queue, i.e.
1003  * remove it yourself first if you call this routine. */
1004 #ifdef RX_ENABLE_TSFPQ
1005 void
1006 rxi_FreePacket(struct rx_packet *p)
1007 {
1008     rxi_FreeDataBufsTSFPQ(p, 1, 0);
1009     rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1010 }
1011 #else /* RX_ENABLE_TSFPQ */
1012 void
1013 rxi_FreePacket(struct rx_packet *p)
1014 {
1015     SPLVAR;
1016
1017     NETPRI;
1018     MUTEX_ENTER(&rx_freePktQ_lock);
1019
1020     rxi_FreeDataBufsNoLock(p, 1);
1021     rxi_FreePacketNoLock(p);
1022     /* Wakeup anyone waiting for packets */
1023     rxi_PacketsUnWait();
1024
1025     MUTEX_EXIT(&rx_freePktQ_lock);
1026     USERPRI;
1027 }
1028 #endif /* RX_ENABLE_TSFPQ */
1029
1030 /* rxi_AllocPacket sets up p->length so it reflects the number of
1031  * bytes in the packet at this point, **not including** the header.
1032  * The header is absolutely necessary, besides, this is the way the
1033  * length field is usually used */
1034 #ifdef RX_ENABLE_TSFPQ
1035 struct rx_packet *
1036 rxi_AllocPacketNoLock(int class)
1037 {
1038     register struct rx_packet *p;
1039     register struct rx_ts_info_t * rx_ts_info;
1040
1041     RX_TS_INFO_GET(rx_ts_info);
1042
1043 #ifdef KERNEL
1044     if (rxi_OverQuota(class)) {
1045         rxi_NeedMorePackets = TRUE;
1046         MUTEX_ENTER(&rx_stats_mutex);
1047         switch (class) {
1048         case RX_PACKET_CLASS_RECEIVE:
1049             rx_stats.receivePktAllocFailures++;
1050             break;
1051         case RX_PACKET_CLASS_SEND:
1052             rx_stats.sendPktAllocFailures++;
1053             break;
1054         case RX_PACKET_CLASS_SPECIAL:
1055             rx_stats.specialPktAllocFailures++;
1056             break;
1057         case RX_PACKET_CLASS_RECV_CBUF:
1058             rx_stats.receiveCbufPktAllocFailures++;
1059             break;
1060         case RX_PACKET_CLASS_SEND_CBUF:
1061             rx_stats.sendCbufPktAllocFailures++;
1062             break;
1063         }
1064         MUTEX_EXIT(&rx_stats_mutex);
1065         return (struct rx_packet *)0;
1066     }
1067 #endif /* KERNEL */
1068
1069     MUTEX_ENTER(&rx_stats_mutex);
1070     rx_stats.packetRequests++;
1071     MUTEX_EXIT(&rx_stats_mutex);
1072
1073     if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1074
1075 #ifdef KERNEL
1076         if (queue_IsEmpty(&rx_freePacketQueue))
1077             osi_Panic("rxi_AllocPacket error");
1078 #else /* KERNEL */
1079         if (queue_IsEmpty(&rx_freePacketQueue))
1080             rxi_MorePacketsNoLock(rx_initSendWindow);
1081 #endif /* KERNEL */
1082
1083
1084         RX_TS_FPQ_GTOL(rx_ts_info);
1085     }
1086
1087     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1088
1089     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1090
1091
1092     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1093      * order to truncate outbound packets.  In the near future, may need
1094      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1095      */
1096     RX_PACKET_IOV_FULLINIT(p);
1097     return p;
1098 }
1099 #else /* RX_ENABLE_TSFPQ */
1100 struct rx_packet *
1101 rxi_AllocPacketNoLock(int class)
1102 {
1103     register struct rx_packet *p;
1104
1105 #ifdef KERNEL
1106     if (rxi_OverQuota(class)) {
1107         rxi_NeedMorePackets = TRUE;
1108         MUTEX_ENTER(&rx_stats_mutex);
1109         switch (class) {
1110         case RX_PACKET_CLASS_RECEIVE:
1111             rx_stats.receivePktAllocFailures++;
1112             break;
1113         case RX_PACKET_CLASS_SEND:
1114             rx_stats.sendPktAllocFailures++;
1115             break;
1116         case RX_PACKET_CLASS_SPECIAL:
1117             rx_stats.specialPktAllocFailures++;
1118             break;
1119         case RX_PACKET_CLASS_RECV_CBUF:
1120             rx_stats.receiveCbufPktAllocFailures++;
1121             break;
1122         case RX_PACKET_CLASS_SEND_CBUF:
1123             rx_stats.sendCbufPktAllocFailures++;
1124             break;
1125         }
1126         MUTEX_EXIT(&rx_stats_mutex);
1127         return (struct rx_packet *)0;
1128     }
1129 #endif /* KERNEL */
1130
1131     MUTEX_ENTER(&rx_stats_mutex);
1132     rx_stats.packetRequests++;
1133     MUTEX_EXIT(&rx_stats_mutex);
1134
1135 #ifdef KERNEL
1136     if (queue_IsEmpty(&rx_freePacketQueue))
1137         osi_Panic("rxi_AllocPacket error");
1138 #else /* KERNEL */
1139     if (queue_IsEmpty(&rx_freePacketQueue))
1140         rxi_MorePacketsNoLock(rx_initSendWindow);
1141 #endif /* KERNEL */
1142
1143     rx_nFreePackets--;
1144     p = queue_First(&rx_freePacketQueue, rx_packet);
1145     queue_Remove(p);
1146     RX_FPQ_MARK_USED(p);
1147
1148     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1149
1150
1151     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1152      * order to truncate outbound packets.  In the near future, may need
1153      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1154      */
1155     RX_PACKET_IOV_FULLINIT(p);
1156     return p;
1157 }
1158 #endif /* RX_ENABLE_TSFPQ */
1159
1160 #ifdef RX_ENABLE_TSFPQ
1161 struct rx_packet *
1162 rxi_AllocPacketTSFPQ(int class, int pull_global)
1163 {
1164     register struct rx_packet *p;
1165     register struct rx_ts_info_t * rx_ts_info;
1166
1167     RX_TS_INFO_GET(rx_ts_info);
1168
1169     MUTEX_ENTER(&rx_stats_mutex);
1170     rx_stats.packetRequests++;
1171     MUTEX_EXIT(&rx_stats_mutex);
1172
1173     if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1174         MUTEX_ENTER(&rx_freePktQ_lock);
1175
1176         if (queue_IsEmpty(&rx_freePacketQueue))
1177             rxi_MorePacketsNoLock(rx_initSendWindow);
1178
1179         RX_TS_FPQ_GTOL(rx_ts_info);
1180
1181         MUTEX_EXIT(&rx_freePktQ_lock);
1182     } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1183         return NULL;
1184     }
1185
1186     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1187
1188     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1189
1190     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1191      * order to truncate outbound packets.  In the near future, may need
1192      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1193      */
1194     RX_PACKET_IOV_FULLINIT(p);
1195     return p;
1196 }
1197 #endif /* RX_ENABLE_TSFPQ */
1198
1199 #ifdef RX_ENABLE_TSFPQ
1200 struct rx_packet *
1201 rxi_AllocPacket(int class)
1202 {
1203     register struct rx_packet *p;
1204
1205     p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1206     return p;
1207 }
1208 #else /* RX_ENABLE_TSFPQ */
1209 struct rx_packet *
1210 rxi_AllocPacket(int class)
1211 {
1212     register struct rx_packet *p;
1213
1214     MUTEX_ENTER(&rx_freePktQ_lock);
1215     p = rxi_AllocPacketNoLock(class);
1216     MUTEX_EXIT(&rx_freePktQ_lock);
1217     return p;
1218 }
1219 #endif /* RX_ENABLE_TSFPQ */
1220
1221 /* This guy comes up with as many buffers as it {takes,can get} given
1222  * the MTU for this call. It also sets the packet length before
1223  * returning.  caution: this is often called at NETPRI
1224  * Called with call locked.
1225  */
1226 struct rx_packet *
1227 rxi_AllocSendPacket(register struct rx_call *call, int want)
1228 {
1229     register struct rx_packet *p = (struct rx_packet *)0;
1230     register int mud;
1231     register unsigned delta;
1232
1233     SPLVAR;
1234     mud = call->MTU - RX_HEADER_SIZE;
1235     delta =
1236         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1237         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1238
1239 #ifdef RX_ENABLE_TSFPQ
1240     if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1241         want += delta;
1242         want = MIN(want, mud);
1243
1244         if ((unsigned)want > p->length)
1245             (void)rxi_AllocDataBuf(p, (want - p->length),
1246                                    RX_PACKET_CLASS_SEND_CBUF);
1247
1248         if ((unsigned)p->length > mud)
1249             p->length = mud;
1250
1251         if (delta >= p->length) {
1252             rxi_FreePacket(p);
1253             p = NULL;
1254         } else {
1255             p->length -= delta;
1256         }
1257         return p;
1258     }
1259 #endif /* RX_ENABLE_TSFPQ */
1260
1261     while (!(call->error)) {
1262         MUTEX_ENTER(&rx_freePktQ_lock);
1263         /* if an error occurred, or we get the packet we want, we're done */
1264         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1265             MUTEX_EXIT(&rx_freePktQ_lock);
1266
1267             want += delta;
1268             want = MIN(want, mud);
1269
1270             if ((unsigned)want > p->length)
1271                 (void)rxi_AllocDataBuf(p, (want - p->length),
1272                                        RX_PACKET_CLASS_SEND_CBUF);
1273
1274             if ((unsigned)p->length > mud)
1275                 p->length = mud;
1276
1277             if (delta >= p->length) {
1278                 rxi_FreePacket(p);
1279                 p = NULL;
1280             } else {
1281                 p->length -= delta;
1282             }
1283             break;
1284         }
1285
1286         /* no error occurred, and we didn't get a packet, so we sleep.
1287          * At this point, we assume that packets will be returned
1288          * sooner or later, as packets are acknowledged, and so we
1289          * just wait.  */
1290         NETPRI;
1291         call->flags |= RX_CALL_WAIT_PACKETS;
1292         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1293         MUTEX_EXIT(&call->lock);
1294         rx_waitingForPackets = 1;
1295
1296 #ifdef  RX_ENABLE_LOCKS
1297         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1298 #else
1299         osi_rxSleep(&rx_waitingForPackets);
1300 #endif
1301         MUTEX_EXIT(&rx_freePktQ_lock);
1302         MUTEX_ENTER(&call->lock);
1303         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1304         call->flags &= ~RX_CALL_WAIT_PACKETS;
1305         USERPRI;
1306     }
1307
1308     return p;
1309 }
1310
1311 #ifndef KERNEL
1312 #ifdef AFS_NT40_ENV
1313 /* Windows does not use file descriptors. */
1314 #define CountFDs(amax) 0
1315 #else
1316 /* count the number of used FDs */
1317 static int
1318 CountFDs(register int amax)
1319 {
1320     struct stat tstat;
1321     register int i, code;
1322     register int count;
1323
1324     count = 0;
1325     for (i = 0; i < amax; i++) {
1326         code = fstat(i, &tstat);
1327         if (code == 0)
1328             count++;
1329     }
1330     return count;
1331 }
1332 #endif /* AFS_NT40_ENV */
1333 #else /* KERNEL */
1334
1335 #define CountFDs(amax) amax
1336
1337 #endif /* KERNEL */
1338
1339 #if !defined(KERNEL) || defined(UKERNEL)
1340
1341 /* This function reads a single packet from the interface into the
1342  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
1343  * (host,port) of the sender are stored in the supplied variables, and
1344  * the data length of the packet is stored in the packet structure.
1345  * The header is decoded. */
1346 int
1347 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1348                u_short * port)
1349 {
1350     struct sockaddr_in from;
1351     int nbytes;
1352     afs_int32 rlen;
1353     register afs_int32 tlen, savelen;
1354     struct msghdr msg;
1355     rx_computelen(p, tlen);
1356     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
1357
1358     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
1359     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
1360                                  * it once in order to avoid races.  */
1361     tlen = rlen - tlen;
1362     if (tlen > 0) {
1363         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1364         if (tlen > 0) {
1365             tlen = rlen - tlen;
1366         } else
1367             tlen = rlen;
1368     } else
1369         tlen = rlen;
1370
1371     /* Extend the last iovec for padding, it's just to make sure that the
1372      * read doesn't return more data than we expect, and is done to get around
1373      * our problems caused by the lack of a length field in the rx header.
1374      * Use the extra buffer that follows the localdata in each packet
1375      * structure. */
1376     savelen = p->wirevec[p->niovecs - 1].iov_len;
1377     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1378
1379     memset((char *)&msg, 0, sizeof(msg));
1380     msg.msg_name = (char *)&from;
1381     msg.msg_namelen = sizeof(struct sockaddr_in);
1382     msg.msg_iov = p->wirevec;
1383     msg.msg_iovlen = p->niovecs;
1384     nbytes = rxi_Recvmsg(socket, &msg, 0);
1385
1386     /* restore the vec to its correct state */
1387     p->wirevec[p->niovecs - 1].iov_len = savelen;
1388
1389     p->length = (nbytes - RX_HEADER_SIZE);
1390     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
1391         if (nbytes < 0 && errno == EWOULDBLOCK) {
1392             MUTEX_ENTER(&rx_stats_mutex);
1393             rx_stats.noPacketOnRead++;
1394             MUTEX_EXIT(&rx_stats_mutex);
1395         } else if (nbytes <= 0) {
1396             MUTEX_ENTER(&rx_stats_mutex);
1397             rx_stats.bogusPacketOnRead++;
1398             rx_stats.bogusHost = from.sin_addr.s_addr;
1399             MUTEX_EXIT(&rx_stats_mutex);
1400             dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1401                  ntohs(from.sin_port), nbytes));
1402         }
1403         return 0;
1404     }
1405 #ifdef RXDEBUG
1406     else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1407                 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1408         rxi_DecodePacketHeader(p);
1409
1410         *host = from.sin_addr.s_addr;
1411         *port = from.sin_port;
1412
1413         dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1414               p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1415               p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1416               p->length));
1417         rxi_TrimDataBufs(p, 1);
1418         return 0;
1419     }
1420 #endif
1421     else {
1422         /* Extract packet header. */
1423         rxi_DecodePacketHeader(p);
1424
1425         *host = from.sin_addr.s_addr;
1426         *port = from.sin_port;
1427         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1428             struct rx_peer *peer;
1429             MUTEX_ENTER(&rx_stats_mutex);
1430             rx_stats.packetsRead[p->header.type - 1]++;
1431             MUTEX_EXIT(&rx_stats_mutex);
1432             /*
1433              * Try to look up this peer structure.  If it doesn't exist,
1434              * don't create a new one -
1435              * we don't keep count of the bytes sent/received if a peer
1436              * structure doesn't already exist.
1437              *
1438              * The peer/connection cleanup code assumes that there is 1 peer
1439              * per connection.  If we actually created a peer structure here
1440              * and this packet was an rxdebug packet, the peer structure would
1441              * never be cleaned up.
1442              */
1443             peer = rxi_FindPeer(*host, *port, 0, 0);
1444             /* Since this may not be associated with a connection,
1445              * it may have no refCount, meaning we could race with
1446              * ReapConnections
1447              */
1448             if (peer && (peer->refCount > 0)) {
1449                 MUTEX_ENTER(&peer->peer_lock);
1450                 hadd32(peer->bytesReceived, p->length);
1451                 MUTEX_EXIT(&peer->peer_lock);
1452             }
1453         }
1454
1455         /* Free any empty packet buffers at the end of this packet */
1456         rxi_TrimDataBufs(p, 1);
1457
1458         return 1;
1459     }
1460 }
1461
1462 #endif /* !KERNEL || UKERNEL */
1463
1464 /* This function splits off the first packet in a jumbo packet.
1465  * As of AFS 3.5, jumbograms contain more than one fixed size
1466  * packet, and the RX_JUMBO_PACKET flag is set in all but the
1467  * last packet header. All packets (except the last) are padded to
1468  * fall on RX_CBUFFERSIZE boundaries.
1469  * HACK: We store the length of the first n-1 packets in the
1470  * last two pad bytes. */
1471
1472 struct rx_packet *
1473 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1474                      int first)
1475 {
1476     struct rx_packet *np;
1477     struct rx_jumboHeader *jp;
1478     int niov, i;
1479     struct iovec *iov;
1480     int length;
1481     afs_uint32 temp;
1482
1483     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1484      * bytes in length. All but the first packet are preceded by
1485      * an abbreviated four byte header. The length of the last packet
1486      * is calculated from the size of the jumbogram. */
1487     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1488
1489     if ((int)p->length < length) {
1490         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1491         return NULL;
1492     }
1493     niov = p->niovecs - 2;
1494     if (niov < 1) {
1495         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1496         return NULL;
1497     }
1498     iov = &p->wirevec[2];
1499     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1500
1501     /* Get a pointer to the abbreviated packet header */
1502     jp = (struct rx_jumboHeader *)
1503         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1504
1505     /* Set up the iovecs for the next packet */
1506     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1507     np->wirevec[0].iov_len = sizeof(struct rx_header);
1508     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1509     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1510     np->niovecs = niov + 1;
1511     for (i = 2, iov++; i <= niov; i++, iov++) {
1512         np->wirevec[i] = *iov;
1513     }
1514     np->length = p->length - length;
1515     p->length = RX_JUMBOBUFFERSIZE;
1516     p->niovecs = 2;
1517
1518     /* Convert the jumbo packet header to host byte order */
1519     temp = ntohl(*(afs_uint32 *) jp);
1520     jp->flags = (u_char) (temp >> 24);
1521     jp->cksum = (u_short) (temp);
1522
1523     /* Fill in the packet header */
1524     np->header = p->header;
1525     np->header.serial = p->header.serial + 1;
1526     np->header.seq = p->header.seq + 1;
1527     np->header.flags = jp->flags;
1528     np->header.spare = jp->cksum;
1529
1530     return np;
1531 }
1532
1533 #ifndef KERNEL
1534 /* Send a udp datagram */
1535 int
1536 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1537             int length, int istack)
1538 {
1539     struct msghdr msg;
1540         int ret;
1541
1542     memset(&msg, 0, sizeof(msg));
1543     msg.msg_iov = dvec;
1544     msg.msg_iovlen = nvecs;
1545     msg.msg_name = addr;
1546     msg.msg_namelen = sizeof(struct sockaddr_in);
1547
1548     ret = rxi_Sendmsg(socket, &msg, 0);
1549
1550     return ret;
1551 }
1552 #elif !defined(UKERNEL)
1553 /*
1554  * message receipt is done in rxk_input or rx_put.
1555  */
1556
1557 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1558 /*
1559  * Copy an mblock to the contiguous area pointed to by cp.
1560  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1561  * but it doesn't really.
1562  * Returns the number of bytes not transferred.
1563  * The message is NOT changed.
1564  */
1565 static int
1566 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1567 {
1568     register int n;
1569
1570     for (; mp && len > 0; mp = mp->b_cont) {
1571         if (mp->b_datap->db_type != M_DATA) {
1572             return -1;
1573         }
1574         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1575         memcpy(cp, (char *)mp->b_rptr, n);
1576         cp += n;
1577         len -= n;
1578         mp->b_rptr += n;
1579     }
1580     return (len);
1581 }
1582
1583 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1584  * but it doesn't really.
1585  * This sucks, anyway, do it like m_cpy.... below
1586  */
1587 static int
1588 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1589            int niovs)
1590 {
1591     register int m, n, o, t, i;
1592
1593     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1594         if (mp->b_datap->db_type != M_DATA) {
1595             return -1;
1596         }
1597         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1598         len -= n;
1599         while (n) {
1600             if (!t) {
1601                 o = 0;
1602                 i++;
1603                 t = iovs[i].iov_len;
1604             }
1605             m = MIN(n, t);
1606             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1607             mp->b_rptr += m;
1608             o += m;
1609             t -= m;
1610             n -= m;
1611         }
1612     }
1613     return (len);
1614 }
1615
1616 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1617 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1618 #else
1619 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1620 static int
1621 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1622 {
1623     caddr_t p1, p2;
1624     unsigned int l1, l2, i, t;
1625
1626     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1627         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1628
1629     while (off && m)
1630         if (m->m_len <= off) {
1631             off -= m->m_len;
1632             m = m->m_next;
1633             continue;
1634         } else
1635             break;
1636
1637     if (m == NULL)
1638         return len;
1639
1640     p1 = mtod(m, caddr_t) + off;
1641     l1 = m->m_len - off;
1642     i = 0;
1643     p2 = iovs[0].iov_base;
1644     l2 = iovs[0].iov_len;
1645
1646     while (len) {
1647         t = MIN(l1, MIN(l2, (unsigned int)len));
1648         memcpy(p2, p1, t);
1649         p1 += t;
1650         p2 += t;
1651         l1 -= t;
1652         l2 -= t;
1653         len -= t;
1654         if (!l1) {
1655             m = m->m_next;
1656             if (!m)
1657                 break;
1658             p1 = mtod(m, caddr_t);
1659             l1 = m->m_len;
1660         }
1661         if (!l2) {
1662             if (++i >= niovs)
1663                 break;
1664             p2 = iovs[i].iov_base;
1665             l2 = iovs[i].iov_len;
1666         }
1667
1668     }
1669
1670     return len;
1671 }
1672 #endif /* LINUX */
1673 #endif /* AFS_SUN5_ENV */
1674
1675 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1676 int
1677 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1678 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1679      mblk_t *amb;
1680 #else
1681      struct mbuf *amb;
1682 #endif
1683      void (*free) ();
1684      struct rx_packet *phandle;
1685      int hdr_len, data_len;
1686 {
1687     register int code;
1688
1689     code =
1690         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1691                      phandle->niovecs);
1692     (*free) (amb);
1693
1694     return code;
1695 }
1696 #endif /* LINUX */
1697 #endif /*KERNEL && !UKERNEL */
1698
1699
1700 /* send a response to a debug packet */
1701
1702 struct rx_packet *
1703 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1704                        afs_int32 ahost, short aport, int istack)
1705 {
1706     struct rx_debugIn tin;
1707     afs_int32 tl;
1708     struct rx_serverQueueEntry *np, *nqe;
1709
1710     /*
1711      * Only respond to client-initiated Rx debug packets,
1712      * and clear the client flag in the response.
1713      */
1714     if (ap->header.flags & RX_CLIENT_INITIATED) {
1715         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1716         rxi_EncodePacketHeader(ap);
1717     } else {
1718         return ap;
1719     }
1720
1721     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1722     /* all done with packet, now set length to the truth, so we can
1723      * reuse this packet */
1724     rx_computelen(ap, ap->length);
1725
1726     tin.type = ntohl(tin.type);
1727     tin.index = ntohl(tin.index);
1728     switch (tin.type) {
1729     case RX_DEBUGI_GETSTATS:{
1730             struct rx_debugStats tstat;
1731
1732             /* get basic stats */
1733             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1734             tstat.version = RX_DEBUGI_VERSION;
1735 #ifndef RX_ENABLE_LOCKS
1736             tstat.waitingForPackets = rx_waitingForPackets;
1737 #endif
1738             MUTEX_ENTER(&rx_serverPool_lock);
1739             tstat.nFreePackets = htonl(rx_nFreePackets);
1740             tstat.callsExecuted = htonl(rxi_nCalls);
1741             tstat.packetReclaims = htonl(rx_packetReclaims);
1742             tstat.usedFDs = CountFDs(64);
1743             tstat.nWaiting = htonl(rx_nWaiting);
1744             tstat.nWaited = htonl(rx_nWaited);
1745             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1746                         tstat.idleThreads);
1747             MUTEX_EXIT(&rx_serverPool_lock);
1748             tstat.idleThreads = htonl(tstat.idleThreads);
1749             tl = sizeof(struct rx_debugStats) - ap->length;
1750             if (tl > 0)
1751                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1752
1753             if (tl <= 0) {
1754                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1755                                (char *)&tstat);
1756                 ap->length = sizeof(struct rx_debugStats);
1757                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1758                 rx_computelen(ap, ap->length);
1759             }
1760             break;
1761         }
1762
1763     case RX_DEBUGI_GETALLCONN:
1764     case RX_DEBUGI_GETCONN:{
1765             int i, j;
1766             register struct rx_connection *tc;
1767             struct rx_call *tcall;
1768             struct rx_debugConn tconn;
1769             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1770
1771
1772             tl = sizeof(struct rx_debugConn) - ap->length;
1773             if (tl > 0)
1774                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1775             if (tl > 0)
1776                 return ap;
1777
1778             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1779             /* get N'th (maybe) "interesting" connection info */
1780             for (i = 0; i < rx_hashTableSize; i++) {
1781 #if !defined(KERNEL)
1782                 /* the time complexity of the algorithm used here
1783                  * exponentially increses with the number of connections.
1784                  */
1785 #ifdef AFS_PTHREAD_ENV
1786                 pthread_yield();
1787 #else
1788                 (void)IOMGR_Poll();
1789 #endif
1790 #endif
1791                 MUTEX_ENTER(&rx_connHashTable_lock);
1792                 /* We might be slightly out of step since we are not
1793                  * locking each call, but this is only debugging output.
1794                  */
1795                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1796                     if ((all || rxi_IsConnInteresting(tc))
1797                         && tin.index-- <= 0) {
1798                         tconn.host = tc->peer->host;
1799                         tconn.port = tc->peer->port;
1800                         tconn.cid = htonl(tc->cid);
1801                         tconn.epoch = htonl(tc->epoch);
1802                         tconn.serial = htonl(tc->serial);
1803                         for (j = 0; j < RX_MAXCALLS; j++) {
1804                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1805                             if ((tcall = tc->call[j])) {
1806                                 tconn.callState[j] = tcall->state;
1807                                 tconn.callMode[j] = tcall->mode;
1808                                 tconn.callFlags[j] = tcall->flags;
1809                                 if (queue_IsNotEmpty(&tcall->rq))
1810                                     tconn.callOther[j] |= RX_OTHER_IN;
1811                                 if (queue_IsNotEmpty(&tcall->tq))
1812                                     tconn.callOther[j] |= RX_OTHER_OUT;
1813                             } else
1814                                 tconn.callState[j] = RX_STATE_NOTINIT;
1815                         }
1816
1817                         tconn.natMTU = htonl(tc->peer->natMTU);
1818                         tconn.error = htonl(tc->error);
1819                         tconn.flags = tc->flags;
1820                         tconn.type = tc->type;
1821                         tconn.securityIndex = tc->securityIndex;
1822                         if (tc->securityObject) {
1823                             RXS_GetStats(tc->securityObject, tc,
1824                                          &tconn.secStats);
1825 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1826 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1827                             DOHTONL(flags);
1828                             DOHTONL(expires);
1829                             DOHTONL(packetsReceived);
1830                             DOHTONL(packetsSent);
1831                             DOHTONL(bytesReceived);
1832                             DOHTONL(bytesSent);
1833                             for (i = 0;
1834                                  i <
1835                                  sizeof(tconn.secStats.spares) /
1836                                  sizeof(short); i++)
1837                                 DOHTONS(spares[i]);
1838                             for (i = 0;
1839                                  i <
1840                                  sizeof(tconn.secStats.sparel) /
1841                                  sizeof(afs_int32); i++)
1842                                 DOHTONL(sparel[i]);
1843                         }
1844
1845                         MUTEX_EXIT(&rx_connHashTable_lock);
1846                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1847                                        (char *)&tconn);
1848                         tl = ap->length;
1849                         ap->length = sizeof(struct rx_debugConn);
1850                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1851                                             istack);
1852                         ap->length = tl;
1853                         return ap;
1854                     }
1855                 }
1856                 MUTEX_EXIT(&rx_connHashTable_lock);
1857             }
1858             /* if we make it here, there are no interesting packets */
1859             tconn.cid = htonl(0xffffffff);      /* means end */
1860             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1861                            (char *)&tconn);
1862             tl = ap->length;
1863             ap->length = sizeof(struct rx_debugConn);
1864             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1865             ap->length = tl;
1866             break;
1867         }
1868
1869         /*
1870          * Pass back all the peer structures we have available
1871          */
1872
1873     case RX_DEBUGI_GETPEER:{
1874             int i;
1875             register struct rx_peer *tp;
1876             struct rx_debugPeer tpeer;
1877
1878
1879             tl = sizeof(struct rx_debugPeer) - ap->length;
1880             if (tl > 0)
1881                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1882             if (tl > 0)
1883                 return ap;
1884
1885             memset((char *)&tpeer, 0, sizeof(tpeer));
1886             for (i = 0; i < rx_hashTableSize; i++) {
1887 #if !defined(KERNEL)
1888                 /* the time complexity of the algorithm used here
1889                  * exponentially increses with the number of peers.
1890                  *
1891                  * Yielding after processing each hash table entry
1892                  * and dropping rx_peerHashTable_lock.
1893                  * also increases the risk that we will miss a new
1894                  * entry - but we are willing to live with this
1895                  * limitation since this is meant for debugging only
1896                  */
1897 #ifdef AFS_PTHREAD_ENV
1898                 pthread_yield();
1899 #else
1900                 (void)IOMGR_Poll();
1901 #endif
1902 #endif
1903                 MUTEX_ENTER(&rx_peerHashTable_lock);
1904                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1905                     if (tin.index-- <= 0) {
1906                         tpeer.host = tp->host;
1907                         tpeer.port = tp->port;
1908                         tpeer.ifMTU = htons(tp->ifMTU);
1909                         tpeer.idleWhen = htonl(tp->idleWhen);
1910                         tpeer.refCount = htons(tp->refCount);
1911                         tpeer.burstSize = tp->burstSize;
1912                         tpeer.burst = tp->burst;
1913                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1914                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1915                         tpeer.rtt = htonl(tp->rtt);
1916                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1917                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1918                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1919                         tpeer.nSent = htonl(tp->nSent);
1920                         tpeer.reSends = htonl(tp->reSends);
1921                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1922                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1923                         tpeer.rateFlag = htonl(tp->rateFlag);
1924                         tpeer.natMTU = htons(tp->natMTU);
1925                         tpeer.maxMTU = htons(tp->maxMTU);
1926                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1927                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1928                         tpeer.MTU = htons(tp->MTU);
1929                         tpeer.cwind = htons(tp->cwind);
1930                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1931                         tpeer.congestSeq = htons(tp->congestSeq);
1932                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1933                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1934                         tpeer.bytesReceived.high =
1935                             htonl(tp->bytesReceived.high);
1936                         tpeer.bytesReceived.low =
1937                             htonl(tp->bytesReceived.low);
1938
1939                         MUTEX_EXIT(&rx_peerHashTable_lock);
1940                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1941                                        (char *)&tpeer);
1942                         tl = ap->length;
1943                         ap->length = sizeof(struct rx_debugPeer);
1944                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1945                                             istack);
1946                         ap->length = tl;
1947                         return ap;
1948                     }
1949                 }
1950                 MUTEX_EXIT(&rx_peerHashTable_lock);
1951             }
1952             /* if we make it here, there are no interesting packets */
1953             tpeer.host = htonl(0xffffffff);     /* means end */
1954             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1955                            (char *)&tpeer);
1956             tl = ap->length;
1957             ap->length = sizeof(struct rx_debugPeer);
1958             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1959             ap->length = tl;
1960             break;
1961         }
1962
1963     case RX_DEBUGI_RXSTATS:{
1964             int i;
1965             afs_int32 *s;
1966
1967             tl = sizeof(rx_stats) - ap->length;
1968             if (tl > 0)
1969                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1970             if (tl > 0)
1971                 return ap;
1972
1973             /* Since its all int32s convert to network order with a loop. */
1974             MUTEX_ENTER(&rx_stats_mutex);
1975             s = (afs_int32 *) & rx_stats;
1976             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1977                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1978
1979             tl = ap->length;
1980             ap->length = sizeof(rx_stats);
1981             MUTEX_EXIT(&rx_stats_mutex);
1982             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1983             ap->length = tl;
1984             break;
1985         }
1986
1987     default:
1988         /* error response packet */
1989         tin.type = htonl(RX_DEBUGI_BADTYPE);
1990         tin.index = tin.type;
1991         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1992         tl = ap->length;
1993         ap->length = sizeof(struct rx_debugIn);
1994         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1995         ap->length = tl;
1996         break;
1997     }
1998     return ap;
1999 }
2000
2001 struct rx_packet *
2002 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2003                          afs_int32 ahost, short aport, int istack)
2004 {
2005     afs_int32 tl;
2006
2007     /*
2008      * Only respond to client-initiated version requests, and
2009      * clear that flag in the response.
2010      */
2011     if (ap->header.flags & RX_CLIENT_INITIATED) {
2012         char buf[66];
2013
2014         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2015         rxi_EncodePacketHeader(ap);
2016         memset(buf, 0, sizeof(buf));
2017         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2018         rx_packetwrite(ap, 0, 65, buf);
2019         tl = ap->length;
2020         ap->length = 65;
2021         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2022         ap->length = tl;
2023     }
2024
2025     return ap;
2026 }
2027
2028
2029 /* send a debug packet back to the sender */
2030 static void
2031 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2032                     afs_int32 ahost, short aport, afs_int32 istack)
2033 {
2034     struct sockaddr_in taddr;
2035     int i;
2036     int nbytes;
2037     int saven = 0;
2038     size_t savelen = 0;
2039 #ifdef KERNEL
2040     int waslocked = ISAFS_GLOCK();
2041 #endif
2042
2043     taddr.sin_family = AF_INET;
2044     taddr.sin_port = aport;
2045     taddr.sin_addr.s_addr = ahost;
2046 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2047     taddr.sin_len = sizeof(struct sockaddr_in);
2048 #endif
2049
2050     /* We need to trim the niovecs. */
2051     nbytes = apacket->length;
2052     for (i = 1; i < apacket->niovecs; i++) {
2053         if (nbytes <= apacket->wirevec[i].iov_len) {
2054             savelen = apacket->wirevec[i].iov_len;
2055             saven = apacket->niovecs;
2056             apacket->wirevec[i].iov_len = nbytes;
2057             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
2058         } else
2059             nbytes -= apacket->wirevec[i].iov_len;
2060     }
2061 #ifdef KERNEL
2062 #ifdef RX_KERNEL_TRACE
2063     if (ICL_SETACTIVE(afs_iclSetp)) {
2064         if (!waslocked)
2065             AFS_GLOCK();
2066         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2067                    "before osi_NetSend()");
2068         AFS_GUNLOCK();
2069     } else
2070 #else
2071     if (waslocked)
2072         AFS_GUNLOCK();
2073 #endif
2074 #endif
2075     /* debug packets are not reliably delivered, hence the cast below. */
2076     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2077                       apacket->length + RX_HEADER_SIZE, istack);
2078 #ifdef KERNEL
2079 #ifdef RX_KERNEL_TRACE
2080     if (ICL_SETACTIVE(afs_iclSetp)) {
2081         AFS_GLOCK();
2082         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2083                    "after osi_NetSend()");
2084         if (!waslocked)
2085             AFS_GUNLOCK();
2086     } else
2087 #else
2088     if (waslocked)
2089         AFS_GLOCK();
2090 #endif
2091 #endif
2092     if (saven) {                /* means we truncated the packet above. */
2093         apacket->wirevec[i - 1].iov_len = savelen;
2094         apacket->niovecs = saven;
2095     }
2096
2097 }
2098
2099 /* Send the packet to appropriate destination for the specified
2100  * call.  The header is first encoded and placed in the packet.
2101  */
2102 void
2103 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2104                struct rx_packet *p, int istack)
2105 {
2106 #if defined(KERNEL)
2107     int waslocked;
2108 #endif
2109     int code;
2110     struct sockaddr_in addr;
2111     register struct rx_peer *peer = conn->peer;
2112     osi_socket socket;
2113 #ifdef RXDEBUG
2114     char deliveryType = 'S';
2115 #endif
2116     /* The address we're sending the packet to */
2117     memset(&addr, 0, sizeof(addr));
2118     addr.sin_family = AF_INET;
2119     addr.sin_port = peer->port;
2120     addr.sin_addr.s_addr = peer->host;
2121
2122     /* This stuff should be revamped, I think, so that most, if not
2123      * all, of the header stuff is always added here.  We could
2124      * probably do away with the encode/decode routines. XXXXX */
2125
2126     /* Stamp each packet with a unique serial number.  The serial
2127      * number is maintained on a connection basis because some types
2128      * of security may be based on the serial number of the packet,
2129      * and security is handled on a per authenticated-connection
2130      * basis. */
2131     /* Pre-increment, to guarantee no zero serial number; a zero
2132      * serial number means the packet was never sent. */
2133     MUTEX_ENTER(&conn->conn_data_lock);
2134     p->header.serial = ++conn->serial;
2135     MUTEX_EXIT(&conn->conn_data_lock);
2136     /* This is so we can adjust retransmit time-outs better in the face of
2137      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2138      */
2139     if (p->firstSerial == 0) {
2140         p->firstSerial = p->header.serial;
2141     }
2142 #ifdef RXDEBUG
2143     /* If an output tracer function is defined, call it with the packet and
2144      * network address.  Note this function may modify its arguments. */
2145     if (rx_almostSent) {
2146         int drop = (*rx_almostSent) (p, &addr);
2147         /* drop packet if return value is non-zero? */
2148         if (drop)
2149             deliveryType = 'D'; /* Drop the packet */
2150     }
2151 #endif
2152
2153     /* Get network byte order header */
2154     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
2155                                  * touch ALL the fields */
2156
2157     /* Send the packet out on the same socket that related packets are being
2158      * received on */
2159     socket =
2160         (conn->type ==
2161          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2162
2163 #ifdef RXDEBUG
2164     /* Possibly drop this packet,  for testing purposes */
2165     if ((deliveryType == 'D')
2166         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2167             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2168         deliveryType = 'D';     /* Drop the packet */
2169     } else {
2170         deliveryType = 'S';     /* Send the packet */
2171 #endif /* RXDEBUG */
2172
2173         /* Loop until the packet is sent.  We'd prefer just to use a
2174          * blocking socket, but unfortunately the interface doesn't
2175          * allow us to have the socket block in send mode, and not
2176          * block in receive mode */
2177 #ifdef KERNEL
2178         waslocked = ISAFS_GLOCK();
2179 #ifdef RX_KERNEL_TRACE
2180         if (ICL_SETACTIVE(afs_iclSetp)) {
2181             if (!waslocked)
2182                 AFS_GLOCK();
2183             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2184                        "before osi_NetSend()");
2185             AFS_GUNLOCK();
2186         } else
2187 #else
2188         if (waslocked)
2189             AFS_GUNLOCK();
2190 #endif
2191 #endif
2192         if ((code =
2193              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2194                          p->length + RX_HEADER_SIZE, istack)) != 0) {
2195             /* send failed, so let's hurry up the resend, eh? */
2196             MUTEX_ENTER(&rx_stats_mutex);
2197             rx_stats.netSendFailures++;
2198             MUTEX_EXIT(&rx_stats_mutex);
2199             p->retryTime = p->timeSent; /* resend it very soon */
2200             clock_Addmsec(&(p->retryTime),
2201                           10 + (((afs_uint32) p->backoff) << 8));
2202
2203 #ifdef AFS_NT40_ENV
2204             /* Windows is nice -- it can tell us right away that we cannot
2205              * reach this recipient by returning an WSAEHOSTUNREACH error
2206              * code.  So, when this happens let's "down" the host NOW so
2207              * we don't sit around waiting for this host to timeout later.
2208              */
2209                 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2210                         call->lastReceiveTime = 0;
2211 #endif
2212 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2213             /* Linux is nice -- it can tell us right away that we cannot
2214              * reach this recipient by returning an ENETUNREACH error
2215              * code.  So, when this happens let's "down" the host NOW so
2216              * we don't sit around waiting for this host to timeout later.
2217              */
2218             if (call && code == -ENETUNREACH)
2219                 call->lastReceiveTime = 0;
2220 #endif
2221         }
2222 #ifdef KERNEL
2223 #ifdef RX_KERNEL_TRACE
2224         if (ICL_SETACTIVE(afs_iclSetp)) {
2225             AFS_GLOCK();
2226             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2227                        "after osi_NetSend()");
2228             if (!waslocked)
2229                 AFS_GUNLOCK();
2230         } else
2231 #else
2232         if (waslocked)
2233             AFS_GLOCK();
2234 #endif
2235 #endif
2236 #ifdef RXDEBUG
2237     }
2238     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2239 #endif
2240     MUTEX_ENTER(&rx_stats_mutex);
2241     rx_stats.packetsSent[p->header.type - 1]++;
2242     MUTEX_EXIT(&rx_stats_mutex);
2243     MUTEX_ENTER(&peer->peer_lock);
2244     hadd32(peer->bytesSent, p->length);
2245     MUTEX_EXIT(&peer->peer_lock);
2246 }
2247
2248 /* Send a list of packets to appropriate destination for the specified
2249  * connection.  The headers are first encoded and placed in the packets.
2250  */
2251 void
2252 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2253                    struct rx_packet **list, int len, int istack)
2254 {
2255 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2256     int waslocked;
2257 #endif
2258     struct sockaddr_in addr;
2259     register struct rx_peer *peer = conn->peer;
2260     osi_socket socket;
2261     struct rx_packet *p = NULL;
2262     struct iovec wirevec[RX_MAXIOVECS];
2263     int i, length, code;
2264     afs_uint32 serial;
2265     afs_uint32 temp;
2266     struct rx_jumboHeader *jp;
2267 #ifdef RXDEBUG
2268     char deliveryType = 'S';
2269 #endif
2270     /* The address we're sending the packet to */
2271     addr.sin_family = AF_INET;
2272     addr.sin_port = peer->port;
2273     addr.sin_addr.s_addr = peer->host;
2274
2275     if (len + 1 > RX_MAXIOVECS) {
2276         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2277     }
2278
2279     /*
2280      * Stamp the packets in this jumbogram with consecutive serial numbers
2281      */
2282     MUTEX_ENTER(&conn->conn_data_lock);
2283     serial = conn->serial;
2284     conn->serial += len;
2285     MUTEX_EXIT(&conn->conn_data_lock);
2286
2287
2288     /* This stuff should be revamped, I think, so that most, if not
2289      * all, of the header stuff is always added here.  We could
2290      * probably do away with the encode/decode routines. XXXXX */
2291
2292     jp = NULL;
2293     length = RX_HEADER_SIZE;
2294     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2295     wirevec[0].iov_len = RX_HEADER_SIZE;
2296     for (i = 0; i < len; i++) {
2297         p = list[i];
2298
2299         /* The whole 3.5 jumbogram scheme relies on packets fitting
2300          * in a single packet buffer. */
2301         if (p->niovecs > 2) {
2302             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2303         }
2304
2305         /* Set the RX_JUMBO_PACKET flags in all but the last packets
2306          * in this chunk.  */
2307         if (i < len - 1) {
2308             if (p->length != RX_JUMBOBUFFERSIZE) {
2309                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2310             }
2311             p->header.flags |= RX_JUMBO_PACKET;
2312             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2313             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2314         } else {
2315             wirevec[i + 1].iov_len = p->length;
2316             length += p->length;
2317         }
2318         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2319         if (jp != NULL) {
2320             /* Convert jumbo packet header to network byte order */
2321             temp = (afs_uint32) (p->header.flags) << 24;
2322             temp |= (afs_uint32) (p->header.spare);
2323             *(afs_uint32 *) jp = htonl(temp);
2324         }
2325         jp = (struct rx_jumboHeader *)
2326             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2327
2328         /* Stamp each packet with a unique serial number.  The serial
2329          * number is maintained on a connection basis because some types
2330          * of security may be based on the serial number of the packet,
2331          * and security is handled on a per authenticated-connection
2332          * basis. */
2333         /* Pre-increment, to guarantee no zero serial number; a zero
2334          * serial number means the packet was never sent. */
2335         p->header.serial = ++serial;
2336         /* This is so we can adjust retransmit time-outs better in the face of
2337          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2338          */
2339         if (p->firstSerial == 0) {
2340             p->firstSerial = p->header.serial;
2341         }
2342 #ifdef RXDEBUG
2343         /* If an output tracer function is defined, call it with the packet and
2344          * network address.  Note this function may modify its arguments. */
2345         if (rx_almostSent) {
2346             int drop = (*rx_almostSent) (p, &addr);
2347             /* drop packet if return value is non-zero? */
2348             if (drop)
2349                 deliveryType = 'D';     /* Drop the packet */
2350         }
2351 #endif
2352
2353         /* Get network byte order header */
2354         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
2355                                          * touch ALL the fields */
2356     }
2357
2358     /* Send the packet out on the same socket that related packets are being
2359      * received on */
2360     socket =
2361         (conn->type ==
2362          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2363
2364 #ifdef RXDEBUG
2365     /* Possibly drop this packet,  for testing purposes */
2366     if ((deliveryType == 'D')
2367         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2368             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2369         deliveryType = 'D';     /* Drop the packet */
2370     } else {
2371         deliveryType = 'S';     /* Send the packet */
2372 #endif /* RXDEBUG */
2373
2374         /* Loop until the packet is sent.  We'd prefer just to use a
2375          * blocking socket, but unfortunately the interface doesn't
2376          * allow us to have the socket block in send mode, and not
2377          * block in receive mode */
2378 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2379         waslocked = ISAFS_GLOCK();
2380         if (!istack && waslocked)
2381             AFS_GUNLOCK();
2382 #endif
2383         if ((code =
2384              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2385                          istack)) != 0) {
2386             /* send failed, so let's hurry up the resend, eh? */
2387             MUTEX_ENTER(&rx_stats_mutex);
2388             rx_stats.netSendFailures++;
2389             MUTEX_EXIT(&rx_stats_mutex);
2390             for (i = 0; i < len; i++) {
2391                 p = list[i];
2392                 p->retryTime = p->timeSent;     /* resend it very soon */
2393                 clock_Addmsec(&(p->retryTime),
2394                               10 + (((afs_uint32) p->backoff) << 8));
2395             }
2396 #ifdef AFS_NT40_ENV
2397             /* Windows is nice -- it can tell us right away that we cannot
2398              * reach this recipient by returning an WSAEHOSTUNREACH error
2399              * code.  So, when this happens let's "down" the host NOW so
2400              * we don't sit around waiting for this host to timeout later.
2401              */
2402             if (call && code == -1 && errno == WSAEHOSTUNREACH)
2403                 call->lastReceiveTime = 0;
2404 #endif
2405 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2406             /* Linux is nice -- it can tell us right away that we cannot
2407              * reach this recipient by returning an ENETUNREACH error
2408              * code.  So, when this happens let's "down" the host NOW so
2409              * we don't sit around waiting for this host to timeout later.
2410              */
2411             if (call && code == -ENETUNREACH)
2412                 call->lastReceiveTime = 0;
2413 #endif
2414         }
2415 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2416         if (!istack && waslocked)
2417             AFS_GLOCK();
2418 #endif
2419 #ifdef RXDEBUG
2420     }
2421
2422     assert(p != NULL);
2423
2424     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2425
2426 #endif
2427     MUTEX_ENTER(&rx_stats_mutex);
2428     rx_stats.packetsSent[p->header.type - 1]++;
2429     MUTEX_EXIT(&rx_stats_mutex);
2430     MUTEX_ENTER(&peer->peer_lock);
2431
2432     hadd32(peer->bytesSent, p->length);
2433     MUTEX_EXIT(&peer->peer_lock);
2434 }
2435
2436
2437 /* Send a "special" packet to the peer connection.  If call is
2438  * specified, then the packet is directed to a specific call channel
2439  * associated with the connection, otherwise it is directed to the
2440  * connection only. Uses optionalPacket if it is supplied, rather than
2441  * allocating a new packet buffer.  Nbytes is the length of the data
2442  * portion of the packet.  If data is non-null, nbytes of data are
2443  * copied into the packet.  Type is the type of the packet, as defined
2444  * in rx.h.  Bug: there's a lot of duplication between this and other
2445  * routines.  This needs to be cleaned up. */
2446 struct rx_packet *
2447 rxi_SendSpecial(register struct rx_call *call,
2448                 register struct rx_connection *conn,
2449                 struct rx_packet *optionalPacket, int type, char *data,
2450                 int nbytes, int istack)
2451 {
2452     /* Some of the following stuff should be common code for all
2453      * packet sends (it's repeated elsewhere) */
2454     register struct rx_packet *p;
2455     unsigned int i = 0;
2456     int savelen = 0, saven = 0;
2457     int channel, callNumber;
2458     if (call) {
2459         channel = call->channel;
2460         callNumber = *call->callNumber;
2461         /* BUSY packets refer to the next call on this connection */
2462         if (type == RX_PACKET_TYPE_BUSY) {
2463             callNumber++;
2464         }
2465     } else {
2466         channel = 0;
2467         callNumber = 0;
2468     }
2469     p = optionalPacket;
2470     if (!p) {
2471         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2472         if (!p)
2473             osi_Panic("rxi_SendSpecial failure");
2474     }
2475
2476     if (nbytes != -1)
2477         p->length = nbytes;
2478     else
2479         nbytes = p->length;
2480     p->header.serviceId = conn->serviceId;
2481     p->header.securityIndex = conn->securityIndex;
2482     p->header.cid = (conn->cid | channel);
2483     p->header.callNumber = callNumber;
2484     p->header.seq = 0;
2485     p->header.epoch = conn->epoch;
2486     p->header.type = type;
2487     p->header.flags = 0;
2488     if (conn->type == RX_CLIENT_CONNECTION)
2489         p->header.flags |= RX_CLIENT_INITIATED;
2490     if (data)
2491         rx_packetwrite(p, 0, nbytes, data);
2492
2493     for (i = 1; i < p->niovecs; i++) {
2494         if (nbytes <= p->wirevec[i].iov_len) {
2495             savelen = p->wirevec[i].iov_len;
2496             saven = p->niovecs;
2497             p->wirevec[i].iov_len = nbytes;
2498             p->niovecs = i + 1; /* so condition fails because i == niovecs */
2499         } else
2500             nbytes -= p->wirevec[i].iov_len;
2501     }
2502
2503     if (call)
2504         rxi_Send(call, p, istack);
2505     else
2506         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2507     if (saven) {                /* means we truncated the packet above.  We probably don't  */
2508         /* really need to do this, but it seems safer this way, given that  */
2509         /* sneaky optionalPacket... */
2510         p->wirevec[i - 1].iov_len = savelen;
2511         p->niovecs = saven;
2512     }
2513     if (!optionalPacket)
2514         rxi_FreePacket(p);
2515     return optionalPacket;
2516 }
2517
2518
2519 /* Encode the packet's header (from the struct header in the packet to
2520  * the net byte order representation in the wire representation of the
2521  * packet, which is what is actually sent out on the wire) */
2522 void
2523 rxi_EncodePacketHeader(register struct rx_packet *p)
2524 {
2525     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2526
2527     memset((char *)buf, 0, RX_HEADER_SIZE);
2528     *buf++ = htonl(p->header.epoch);
2529     *buf++ = htonl(p->header.cid);
2530     *buf++ = htonl(p->header.callNumber);
2531     *buf++ = htonl(p->header.seq);
2532     *buf++ = htonl(p->header.serial);
2533     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2534                    | (((afs_uint32) p->header.flags) << 16)
2535                    | (p->header.userStatus << 8) | p->header.securityIndex);
2536     /* Note: top 16 bits of this next word were reserved */
2537     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2538 }
2539
2540 /* Decode the packet's header (from net byte order to a struct header) */
2541 void
2542 rxi_DecodePacketHeader(register struct rx_packet *p)
2543 {
2544     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2545     afs_uint32 temp;
2546
2547     p->header.epoch = ntohl(*buf);
2548     buf++;
2549     p->header.cid = ntohl(*buf);
2550     buf++;
2551     p->header.callNumber = ntohl(*buf);
2552     buf++;
2553     p->header.seq = ntohl(*buf);
2554     buf++;
2555     p->header.serial = ntohl(*buf);
2556     buf++;
2557
2558     temp = ntohl(*buf);
2559     buf++;
2560
2561     /* C will truncate byte fields to bytes for me */
2562     p->header.type = temp >> 24;
2563     p->header.flags = temp >> 16;
2564     p->header.userStatus = temp >> 8;
2565     p->header.securityIndex = temp >> 0;
2566
2567     temp = ntohl(*buf);
2568     buf++;
2569
2570     p->header.serviceId = (temp & 0xffff);
2571     p->header.spare = temp >> 16;
2572     /* Note: top 16 bits of this last word are the security checksum */
2573 }
2574
2575 void
2576 rxi_PrepareSendPacket(register struct rx_call *call,
2577                       register struct rx_packet *p, register int last)
2578 {
2579     register struct rx_connection *conn = call->conn;
2580     int i, j;
2581     ssize_t len;                /* len must be a signed type; it can go negative */
2582
2583     p->flags &= ~RX_PKTFLAG_ACKED;
2584     p->header.cid = (conn->cid | call->channel);
2585     p->header.serviceId = conn->serviceId;
2586     p->header.securityIndex = conn->securityIndex;
2587
2588     /* No data packets on call 0. Where do these come from? */
2589     if (*call->callNumber == 0)
2590         *call->callNumber = 1;
2591
2592     p->header.callNumber = *call->callNumber;
2593     p->header.seq = call->tnext++;
2594     p->header.epoch = conn->epoch;
2595     p->header.type = RX_PACKET_TYPE_DATA;
2596     p->header.flags = 0;
2597     p->header.spare = 0;
2598     if (conn->type == RX_CLIENT_CONNECTION)
2599         p->header.flags |= RX_CLIENT_INITIATED;
2600
2601     if (last)
2602         p->header.flags |= RX_LAST_PACKET;
2603
2604     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2605     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2606     p->header.serial = 0;       /* Another way of saying never transmitted... */
2607     p->backoff = 0;
2608
2609     /* Now that we're sure this is the last data on the call, make sure
2610      * that the "length" and the sum of the iov_lens matches. */
2611     len = p->length + call->conn->securityHeaderSize;
2612
2613     for (i = 1; i < p->niovecs && len > 0; i++) {
2614         len -= p->wirevec[i].iov_len;
2615     }
2616     if (len > 0) {
2617         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2618     } else {
2619         struct rx_queue q;
2620         int nb;
2621
2622         queue_Init(&q);
2623
2624         /* Free any extra elements in the wirevec */
2625         for (j = MAX(2, i), nb = p->niovecs - j; j < p->niovecs; j++) {
2626             queue_Append(&q,RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2627         }
2628         if (nb)
2629             rxi_FreePackets(nb, &q);
2630
2631         p->niovecs = MAX(2, i);
2632         p->wirevec[MAX(2, i) - 1].iov_len += len;
2633     }
2634     RXS_PreparePacket(conn->securityObject, call, p);
2635 }
2636
2637 /* Given an interface MTU size, calculate an adjusted MTU size that
2638  * will make efficient use of the RX buffers when the peer is sending
2639  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2640 int
2641 rxi_AdjustIfMTU(int mtu)
2642 {
2643     int adjMTU;
2644     int frags;
2645
2646     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2647     if (mtu <= adjMTU) {
2648         return mtu;
2649     }
2650     mtu -= adjMTU;
2651     if (mtu <= 0) {
2652         return adjMTU;
2653     }
2654     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2655     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2656 }
2657
2658 /* Given an interface MTU size, and the peer's advertised max receive
2659  * size, calculate an adjisted maxMTU size that makes efficient use
2660  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2661 int
2662 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2663 {
2664     int maxMTU = mtu * rxi_nSendFrags;
2665     maxMTU = MIN(maxMTU, peerMaxMTU);
2666     return rxi_AdjustIfMTU(maxMTU);
2667 }
2668
2669 /* Given a packet size, figure out how many datagram packet will fit.
2670  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2671  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2672  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2673 int
2674 rxi_AdjustDgramPackets(int frags, int mtu)
2675 {
2676     int maxMTU;
2677     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2678         return 1;
2679     }
2680     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2681     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2682     /* subtract the size of the first and last packets */
2683     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2684     if (maxMTU < 0) {
2685         return 1;
2686     }
2687     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2688 }