src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_user.h"
  75 #include "rx_xmit_nt.h"
  76 #include <stdlib.h>
  77 #else
  78 #include <sys/socket.h>
  79 #include <netinet/in.h>
  80 #endif
  81 #include "rx_clock.h"
  82 #include "rx.h"
  83 #include "rx_queue.h"
  84 #ifdef  AFS_SUN5_ENV
  85 #include <sys/sysmacros.h>
  86 #endif
  87 #include "rx_packet.h"
  88 #include "rx_globals.h"
  89 #include <lwp.h>
  90 #include <assert.h>
  91 #include <string.h>
  92 #ifdef HAVE_UNISTD_H
  93 #include <unistd.h>
  94 #endif
  95 #endif /* KERNEL */
  96
  97 #ifdef RX_LOCKS_DB
  98 /* rxdb_fileID is used to identify the lock location, along with line#. */
  99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 100 #endif /* RX_LOCKS_DB */
 101 struct rx_packet *rx_mallocedP = 0;
 102
 103 extern char cml_version_number[];
 104 extern int (*rx_almostSent) ();
 105
 106 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
 107
 108 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 109                                 afs_int32 ahost, short aport,
 110                                 afs_int32 istack);
 111
 112 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
 113                                    afs_uint32 first,
 114                                    struct rx_queue * q);
 115 static int
 116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
 117
 118
 119 /* some rules about packets:
 120  * 1.  When a packet is allocated, the final iov_buf contains room for
 121  * a security trailer, but iov_len masks that fact.  If the security
 122  * package wants to add the trailer, it may do so, and then extend
 123  * iov_len appropriately.  For this reason, packet's niovecs and
 124  * iov_len fields should be accurate before calling PreparePacket.
 125 */
 126
 127 /* Preconditions:
 128  *        all packet buffers (iov_base) are integral multiples of
 129  *        the word size.
 130  *        offset is an integral multiple of the word size.
 131  */
 132 afs_int32
 133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 134 {
 135     unsigned int i;
 136     size_t l;
 137     for (l = 0, i = 1; i < packet->niovecs; i++) {
 138         if (l + packet->wirevec[i].iov_len > offset) {
 139             return
 140                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 141                                  (offset - l)));
 142         }
 143         l += packet->wirevec[i].iov_len;
 144     }
 145
 146     return 0;
 147 }
 148
 149 /* Preconditions:
 150  *        all packet buffers (iov_base) are integral multiples of the word size.
 151  *        offset is an integral multiple of the word size.
 152  */
 153 afs_int32
 154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 155 {
 156     unsigned int i;
 157     size_t l;
 158     for (l = 0, i = 1; i < packet->niovecs; i++) {
 159         if (l + packet->wirevec[i].iov_len > offset) {
 160             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 161                              (offset - l))) = data;
 162             return 0;
 163         }
 164         l += packet->wirevec[i].iov_len;
 165     }
 166
 167     return 0;
 168 }
 169
 170 /* Preconditions:
 171  *        all packet buffers (iov_base) are integral multiples of the
 172  *        word size.
 173  *        offset is an integral multiple of the word size.
 174  * Packet Invariants:
 175  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 176  */
 177 afs_int32
 178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 179                   char *out)
 180 {
 181     unsigned int i, j, l, r;
 182     for (l = 0, i = 1; i < packet->niovecs; i++) {
 183         if (l + packet->wirevec[i].iov_len > offset) {
 184             break;
 185         }
 186         l += packet->wirevec[i].iov_len;
 187     }
 188
 189     /* i is the iovec which contains the first little bit of data in which we
 190      * are interested.  l is the total length of everything prior to this iovec.
 191      * j is the number of bytes we can safely copy out of this iovec.
 192      * offset only applies to the first iovec.
 193      */
 194     r = resid;
 195     while ((resid > 0) && (i < packet->niovecs)) {
 196         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 197         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 198         resid -= j;
 199         out += j;
 200         l += packet->wirevec[i].iov_len;
 201         offset = l;
 202         i++;
 203     }
 204
 205     return (resid ? (r - resid) : r);
 206 }
 207
 208
 209 /* Preconditions:
 210  *        all packet buffers (iov_base) are integral multiples of the
 211  *        word size.
 212  *        offset is an integral multiple of the word size.
 213  */
 214 afs_int32
 215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 216 {
 217     int i, j, l, r;
 218     char *b;
 219
 220     for (l = 0, i = 1; i < packet->niovecs; i++) {
 221         if (l + packet->wirevec[i].iov_len > offset) {
 222             break;
 223         }
 224         l += packet->wirevec[i].iov_len;
 225     }
 226
 227     /* i is the iovec which contains the first little bit of data in which we
 228      * are interested.  l is the total length of everything prior to this iovec.
 229      * j is the number of bytes we can safely copy out of this iovec.
 230      * offset only applies to the first iovec.
 231      */
 232     r = resid;
 233     while ((resid > 0) && (i < RX_MAXWVECS)) {
 234         if (i >= packet->niovecs)
 235             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 236                 break;
 237
 238         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 239         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 240         memcpy(b, in, j);
 241         resid -= j;
 242         in += j;
 243         l += packet->wirevec[i].iov_len;
 244         offset = l;
 245         i++;
 246     }
 247
 248     return (resid ? (r - resid) : r);
 249 }
 250
 251 int
 252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
 253 {
 254     register struct rx_packet *p, *np;
 255
 256     num_pkts = AllocPacketBufs(class, num_pkts, q);
 257
 258     for (queue_Scan(q, p, np, rx_packet)) {
 259         RX_PACKET_IOV_FULLINIT(p);
 260     }
 261
 262     return num_pkts;
 263 }
 264
 265 #ifdef RX_ENABLE_TSFPQ
 266 static int
 267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 268 {
 269     register struct rx_packet *c;
 270     register struct rx_ts_info_t * rx_ts_info;
 271     int transfer, alloc;
 272     SPLVAR;
 273
 274     RX_TS_INFO_GET(rx_ts_info);
 275
 276     transfer = num_pkts - rx_ts_info->_FPQ.len;
 277     if (transfer > 0) {
 278         NETPRI;
 279         MUTEX_ENTER(&rx_freePktQ_lock);
 280
 281         if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
 282             transfer += rx_TSFPQGlobSize;
 283         } else if (transfer <= rx_nFreePackets) {
 284             transfer = rx_nFreePackets;
 285         } else {
 286             /* alloc enough for us, plus a few globs for other threads */
 287             alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
 288             rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
 289             transfer += rx_TSFPQGlobSize;
 290         }
 291
 292         RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
 293
 294         MUTEX_EXIT(&rx_freePktQ_lock);
 295         USERPRI;
 296     }
 297
 298     RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
 299
 300     return num_pkts;
 301 }
 302 #else /* RX_ENABLE_TSFPQ */
 303 static int
 304 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 305 {
 306     struct rx_packet *c;
 307     int i, overq = 0;
 308     SPLVAR;
 309
 310     NETPRI;
 311
 312     MUTEX_ENTER(&rx_freePktQ_lock);
 313
 314 #ifdef KERNEL
 315     for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
 316          num_pkts--, overq++);
 317
 318     if (overq) {
 319         rxi_NeedMorePackets = TRUE;
 320         MUTEX_ENTER(&rx_stats_mutex);
 321         switch (class) {
 322         case RX_PACKET_CLASS_RECEIVE:
 323             rx_stats.receivePktAllocFailures++;
 324             break;
 325         case RX_PACKET_CLASS_SEND:
 326             rx_stats.sendPktAllocFailures++;
 327             break;
 328         case RX_PACKET_CLASS_SPECIAL:
 329             rx_stats.specialPktAllocFailures++;
 330             break;
 331         case RX_PACKET_CLASS_RECV_CBUF:
 332             rx_stats.receiveCbufPktAllocFailures++;
 333             break;
 334         case RX_PACKET_CLASS_SEND_CBUF:
 335             rx_stats.sendCbufPktAllocFailures++;
 336             break;
 337         }
 338         MUTEX_EXIT(&rx_stats_mutex);
 339     }
 340
 341     if (rx_nFreePackets < num_pkts)
 342         num_pkts = rx_nFreePackets;
 343
 344     if (!num_pkts) {
 345         rxi_NeedMorePackets = TRUE;
 346         goto done;
 347     }
 348 #else /* KERNEL */
 349     if (rx_nFreePackets < num_pkts) {
 350         rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
 351     }
 352 #endif /* KERNEL */
 353
 354     for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
 355          i < num_pkts;
 356          i++, c=queue_Next(c, rx_packet)) {
 357         RX_FPQ_MARK_USED(c);
 358     }
 359
 360     queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
 361
 362     rx_nFreePackets -= num_pkts;
 363
 364 #ifdef KERNEL
 365   done:
 366 #endif
 367     MUTEX_EXIT(&rx_freePktQ_lock);
 368
 369     USERPRI;
 370     return num_pkts;
 371 }
 372 #endif /* RX_ENABLE_TSFPQ */
 373
 374 /*
 375  * Free a packet currently used as a continuation buffer
 376  */
 377 #ifdef RX_ENABLE_TSFPQ
 378 /* num_pkts=0 means queue length is unknown */
 379 int
 380 rxi_FreePackets(int num_pkts, struct rx_queue * q)
 381 {
 382     register struct rx_ts_info_t * rx_ts_info;
 383     register struct rx_packet *c, *nc;
 384     SPLVAR;
 385
 386     osi_Assert(num_pkts >= 0);
 387     RX_TS_INFO_GET(rx_ts_info);
 388
 389     if (!num_pkts) {
 390         for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
 391             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 392         }
 393     } else {
 394         for (queue_Scan(q, c, nc, rx_packet)) {
 395             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 396         }
 397     }
 398
 399     if (num_pkts) {
 400         RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
 401     }
 402
 403     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 404         NETPRI;
 405         MUTEX_ENTER(&rx_freePktQ_lock);
 406
 407         RX_TS_FPQ_LTOG(rx_ts_info);
 408
 409         /* Wakeup anyone waiting for packets */
 410         rxi_PacketsUnWait();
 411
 412         MUTEX_EXIT(&rx_freePktQ_lock);
 413         USERPRI;
 414     }
 415
 416     return num_pkts;
 417 }
 418 #else /* RX_ENABLE_TSFPQ */
 419 /* num_pkts=0 means queue length is unknown */
 420 int
 421 rxi_FreePackets(int num_pkts, struct rx_queue *q)
 422 {
 423     struct rx_queue cbs;
 424     register struct rx_packet *p, *np;
 425     int qlen = 0;
 426     SPLVAR;
 427
 428     osi_Assert(num_pkts >= 0);
 429     queue_Init(&cbs);
 430
 431     if (!num_pkts) {
 432         for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
 433             if (p->niovecs > 2) {
 434                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 435             }
 436             RX_FPQ_MARK_FREE(p);
 437         }
 438         if (!num_pkts)
 439             return 0;
 440     } else {
 441         for (queue_Scan(q, p, np, rx_packet)) {
 442             if (p->niovecs > 2) {
 443                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 444             }
 445             RX_FPQ_MARK_FREE(p);
 446         }
 447     }
 448
 449     if (qlen) {
 450         queue_SpliceAppend(q, &cbs);
 451         qlen += num_pkts;
 452     } else
 453         qlen = num_pkts;
 454
 455     NETPRI;
 456     MUTEX_ENTER(&rx_freePktQ_lock);
 457
 458     queue_SpliceAppend(&rx_freePacketQueue, q);
 459     rx_nFreePackets += qlen;
 460
 461     /* Wakeup anyone waiting for packets */
 462     rxi_PacketsUnWait();
 463
 464     MUTEX_EXIT(&rx_freePktQ_lock);
 465     USERPRI;
 466
 467     return num_pkts;
 468 }
 469 #endif /* RX_ENABLE_TSFPQ */
 470
 471 /* this one is kind of awful.
 472  * In rxkad, the packet has been all shortened, and everything, ready for
 473  * sending.  All of a sudden, we discover we need some of that space back.
 474  * This isn't terribly general, because it knows that the packets are only
 475  * rounded up to the EBS (userdata + security header).
 476  */
 477 int
 478 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 479 {
 480     int i;
 481     i = p->niovecs - 1;
 482     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 483         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 484             p->wirevec[i].iov_len += nb;
 485             return 0;
 486         }
 487     } else {
 488         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 489             p->wirevec[i].iov_len += nb;
 490             return 0;
 491         }
 492     }
 493
 494     return 0;
 495 }
 496
 497 /* get sufficient space to store nb bytes of data (or more), and hook
 498  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 499  * returns the number of bytes >0 which it failed to come up with.
 500  * Don't need to worry about locking on packet, since only
 501  * one thread can manipulate one at a time. Locking on continution
 502  * packets is handled by AllocPacketBufs */
 503 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 504 int
 505 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 506 {
 507     int i, nv;
 508     struct rx_queue q;
 509     register struct rx_packet *cb, *ncb;
 510
 511     /* compute the number of cbuf's we need */
 512     nv = nb / RX_CBUFFERSIZE;
 513     if ((nv * RX_CBUFFERSIZE) < nb)
 514         nv++;
 515     if ((nv + p->niovecs) > RX_MAXWVECS)
 516         nv = RX_MAXWVECS - p->niovecs;
 517     if (nv < 1)
 518         return nb;
 519
 520     /* allocate buffers */
 521     queue_Init(&q);
 522     nv = AllocPacketBufs(class, nv, &q);
 523
 524     /* setup packet iovs */
 525     for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
 526         queue_Remove(cb);
 527         p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 528         p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 529     }
 530
 531     nb -= (nv * RX_CBUFFERSIZE);
 532     p->length += (nv * RX_CBUFFERSIZE);
 533     p->niovecs += nv;
 534
 535     return nb;
 536 }
 537
 538 /* Add more packet buffers */
 539 #ifdef RX_ENABLE_TSFPQ
 540 void
 541 rxi_MorePackets(int apackets)
 542 {
 543     struct rx_packet *p, *e;
 544     register struct rx_ts_info_t * rx_ts_info;
 545     int getme;
 546     SPLVAR;
 547
 548     getme = apackets * sizeof(struct rx_packet);
 549     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 550
 551     PIN(p, getme);              /* XXXXX */
 552     memset((char *)p, 0, getme);
 553     RX_TS_INFO_GET(rx_ts_info);
 554
 555     for (e = p + apackets; p < e; p++) {
 556         RX_PACKET_IOV_INIT(p);
 557         p->niovecs = 2;
 558
 559         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 560     }
 561     rx_ts_info->_FPQ.delta += apackets;
 562
 563     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 564         NETPRI;
 565         MUTEX_ENTER(&rx_freePktQ_lock);
 566
 567         RX_TS_FPQ_LTOG(rx_ts_info);
 568         rxi_NeedMorePackets = FALSE;
 569         rxi_PacketsUnWait();
 570
 571         MUTEX_EXIT(&rx_freePktQ_lock);
 572         USERPRI;
 573     }
 574 }
 575 #else /* RX_ENABLE_TSFPQ */
 576 void
 577 rxi_MorePackets(int apackets)
 578 {
 579     struct rx_packet *p, *e;
 580     int getme;
 581     SPLVAR;
 582
 583     getme = apackets * sizeof(struct rx_packet);
 584     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 585
 586     PIN(p, getme);              /* XXXXX */
 587     memset((char *)p, 0, getme);
 588     NETPRI;
 589     MUTEX_ENTER(&rx_freePktQ_lock);
 590
 591     for (e = p + apackets; p < e; p++) {
 592         RX_PACKET_IOV_INIT(p);
 593         p->flags |= RX_PKTFLAG_FREE;
 594         p->niovecs = 2;
 595
 596         queue_Append(&rx_freePacketQueue, p);
 597     }
 598     rx_nFreePackets += apackets;
 599     rxi_NeedMorePackets = FALSE;
 600     rxi_PacketsUnWait();
 601
 602     MUTEX_EXIT(&rx_freePktQ_lock);
 603     USERPRI;
 604 }
 605 #endif /* RX_ENABLE_TSFPQ */
 606
 607 #ifdef RX_ENABLE_TSFPQ
 608 void
 609 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
 610 {
 611     struct rx_packet *p, *e;
 612     register struct rx_ts_info_t * rx_ts_info;
 613     int getme;
 614     SPLVAR;
 615
 616     getme = apackets * sizeof(struct rx_packet);
 617     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 618
 619     PIN(p, getme);              /* XXXXX */
 620     memset((char *)p, 0, getme);
 621     RX_TS_INFO_GET(rx_ts_info);
 622
 623     for (e = p + apackets; p < e; p++) {
 624         RX_PACKET_IOV_INIT(p);
 625         p->niovecs = 2;
 626
 627         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 628     }
 629     rx_ts_info->_FPQ.delta += apackets;
 630
 631     if (flush_global &&
 632         (num_keep_local < apackets)) {
 633         NETPRI;
 634         MUTEX_ENTER(&rx_freePktQ_lock);
 635
 636         RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
 637         rxi_NeedMorePackets = FALSE;
 638         rxi_PacketsUnWait();
 639
 640         MUTEX_EXIT(&rx_freePktQ_lock);
 641         USERPRI;
 642     }
 643 }
 644 #endif /* RX_ENABLE_TSFPQ */
 645
 646 #ifndef KERNEL
 647 /* Add more packet buffers */
 648 void
 649 rxi_MorePacketsNoLock(int apackets)
 650 {
 651     struct rx_packet *p, *e;
 652     int getme;
 653
 654     /* allocate enough packets that 1/4 of the packets will be able
 655      * to hold maximal amounts of data */
 656     apackets += (apackets / 4)
 657         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 658     getme = apackets * sizeof(struct rx_packet);
 659     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 660
 661     memset((char *)p, 0, getme);
 662
 663     for (e = p + apackets; p < e; p++) {
 664         RX_PACKET_IOV_INIT(p);
 665         p->flags |= RX_PKTFLAG_FREE;
 666         p->niovecs = 2;
 667
 668         queue_Append(&rx_freePacketQueue, p);
 669     }
 670
 671     rx_nFreePackets += apackets;
 672 #ifdef RX_ENABLE_TSFPQ
 673     /* TSFPQ patch also needs to keep track of total packets */
 674     MUTEX_ENTER(&rx_stats_mutex);
 675     rx_nPackets += apackets;
 676     RX_TS_FPQ_COMPUTE_LIMITS;
 677     MUTEX_EXIT(&rx_stats_mutex);
 678 #endif /* RX_ENABLE_TSFPQ */
 679     rxi_NeedMorePackets = FALSE;
 680     rxi_PacketsUnWait();
 681 }
 682 #endif /* !KERNEL */
 683
 684 void
 685 rxi_FreeAllPackets(void)
 686 {
 687     /* must be called at proper interrupt level, etcetera */
 688     /* MTUXXX need to free all Packets */
 689     osi_Free(rx_mallocedP,
 690              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 691     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 692 }
 693
 694 #ifdef RX_ENABLE_TSFPQ
 695 void
 696 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
 697 {
 698     register struct rx_ts_info_t * rx_ts_info;
 699     register int xfer;
 700     SPLVAR;
 701
 702     RX_TS_INFO_GET(rx_ts_info);
 703
 704     if (num_keep_local != rx_ts_info->_FPQ.len) {
 705         NETPRI;
 706         MUTEX_ENTER(&rx_freePktQ_lock);
 707         if (num_keep_local < rx_ts_info->_FPQ.len) {
 708             xfer = rx_ts_info->_FPQ.len - num_keep_local;
 709             RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
 710             rxi_PacketsUnWait();
 711         } else {
 712             xfer = num_keep_local - rx_ts_info->_FPQ.len;
 713             if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
 714                 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
 715             if (rx_nFreePackets < xfer) {
 716                 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
 717             }
 718             RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
 719         }
 720         MUTEX_EXIT(&rx_freePktQ_lock);
 721         USERPRI;
 722     }
 723 }
 724
 725 void
 726 rxi_FlushLocalPacketsTSFPQ(void)
 727 {
 728     rxi_AdjustLocalPacketsTSFPQ(0, 0);
 729 }
 730 #endif /* RX_ENABLE_TSFPQ */
 731
 732 /* Allocate more packets iff we need more continuation buffers */
 733 /* In kernel, can't page in memory with interrupts disabled, so we
 734  * don't use the event mechanism. */
 735 void
 736 rx_CheckPackets(void)
 737 {
 738     if (rxi_NeedMorePackets) {
 739         rxi_MorePackets(rx_initSendWindow);
 740     }
 741 }
 742
 743 /* In the packet freeing routine below, the assumption is that
 744    we want all of the packets to be used equally frequently, so that we
 745    don't get packet buffers paging out.  It would be just as valid to
 746    assume that we DO want them to page out if not many are being used.
 747    In any event, we assume the former, and append the packets to the end
 748    of the free list.  */
 749 /* This explanation is bogus.  The free list doesn't remain in any kind of
 750    useful order for afs_int32: the packets in use get pretty much randomly scattered
 751    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 752    must be stored so that packets which are adjacent in memory are adjacent in the
 753    free list.  An array springs rapidly to mind.
 754    */
 755
 756 /* Actually free the packet p. */
 757 #ifdef RX_ENABLE_TSFPQ
 758 void
 759 rxi_FreePacketNoLock(struct rx_packet *p)
 760 {
 761     register struct rx_ts_info_t * rx_ts_info;
 762     dpf(("Free %lx\n", (unsigned long)p));
 763
 764     RX_TS_INFO_GET(rx_ts_info);
 765     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 766     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 767         RX_TS_FPQ_LTOG(rx_ts_info);
 768     }
 769 }
 770 #else /* RX_ENABLE_TSFPQ */
 771 void
 772 rxi_FreePacketNoLock(struct rx_packet *p)
 773 {
 774     dpf(("Free %lx\n", (unsigned long)p));
 775
 776     RX_FPQ_MARK_FREE(p);
 777     rx_nFreePackets++;
 778     queue_Append(&rx_freePacketQueue, p);
 779 }
 780 #endif /* RX_ENABLE_TSFPQ */
 781
 782 #ifdef RX_ENABLE_TSFPQ
 783 void
 784 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
 785 {
 786     register struct rx_ts_info_t * rx_ts_info;
 787     dpf(("Free %lx\n", (unsigned long)p));
 788
 789     RX_TS_INFO_GET(rx_ts_info);
 790     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 791
 792     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 793         NETPRI;
 794         MUTEX_ENTER(&rx_freePktQ_lock);
 795
 796         RX_TS_FPQ_LTOG(rx_ts_info);
 797
 798         /* Wakeup anyone waiting for packets */
 799         rxi_PacketsUnWait();
 800
 801         MUTEX_EXIT(&rx_freePktQ_lock);
 802         USERPRI;
 803     }
 804 }
 805 #endif /* RX_ENABLE_TSFPQ */
 806
 807 /*
 808  * free continuation buffers off a packet into a queue
 809  *
 810  * [IN] p      -- packet from which continuation buffers will be freed
 811  * [IN] first  -- iovec offset of first continuation buffer to free
 812  * [IN] q      -- queue into which continuation buffers will be chained
 813  *
 814  * returns:
 815  *   number of continuation buffers freed
 816  */
 817 static int
 818 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
 819 {
 820     struct iovec *iov;
 821     struct rx_packet * cb;
 822     int count = 0;
 823
 824     for (first = MAX(2, first); first < p->niovecs; first++, count++) {
 825         iov = &p->wirevec[first];
 826         if (!iov->iov_base)
 827             osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
 828         cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
 829         RX_FPQ_MARK_FREE(cb);
 830         queue_Append(q, cb);
 831     }
 832     p->length = 0;
 833     p->niovecs = 0;
 834
 835     return count;
 836 }
 837
 838 /*
 839  * free packet continuation buffers into the global free packet pool
 840  *
 841  * [IN] p      -- packet from which to free continuation buffers
 842  * [IN] first  -- iovec offset of first continuation buffer to free
 843  *
 844  * returns:
 845  *   zero always
 846  */
 847 int
 848 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
 849 {
 850     struct iovec *iov;
 851
 852     for (first = MAX(2, first); first < p->niovecs; first++) {
 853         iov = &p->wirevec[first];
 854         if (!iov->iov_base)
 855             osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
 856         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 857     }
 858     p->length = 0;
 859     p->niovecs = 0;
 860
 861     return 0;
 862 }
 863
 864 #ifdef RX_ENABLE_TSFPQ
 865 /*
 866  * free packet continuation buffers into the thread-local free pool
 867  *
 868  * [IN] p             -- packet from which continuation buffers will be freed
 869  * [IN] first         -- iovec offset of first continuation buffer to free
 870  * [IN] flush_global  -- if nonzero, we will flush overquota packets to the
 871  *                       global free pool before returning
 872  *
 873  * returns:
 874  *   zero always
 875  */
 876 static int
 877 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
 878 {
 879     struct iovec *iov;
 880     register struct rx_ts_info_t * rx_ts_info;
 881
 882     RX_TS_INFO_GET(rx_ts_info);
 883
 884     for (first = MAX(2, first); first < p->niovecs; first++) {
 885         iov = &p->wirevec[first];
 886         if (!iov->iov_base)
 887             osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
 888         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 889     }
 890     p->length = 0;
 891     p->niovecs = 0;
 892
 893     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 894         NETPRI;
 895         MUTEX_ENTER(&rx_freePktQ_lock);
 896
 897         RX_TS_FPQ_LTOG(rx_ts_info);
 898
 899         /* Wakeup anyone waiting for packets */
 900         rxi_PacketsUnWait();
 901
 902         MUTEX_EXIT(&rx_freePktQ_lock);
 903         USERPRI;
 904     }
 905     return 0;
 906 }
 907 #endif /* RX_ENABLE_TSFPQ */
 908
 909 int rxi_nBadIovecs = 0;
 910
 911 /* rxi_RestoreDataBufs
 912  *
 913  * Restore the correct sizes to the iovecs. Called when reusing a packet
 914  * for reading off the wire.
 915  */
 916 void
 917 rxi_RestoreDataBufs(struct rx_packet *p)
 918 {
 919     int i;
 920     struct iovec *iov = &p->wirevec[2];
 921
 922     RX_PACKET_IOV_INIT(p);
 923
 924     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 925         if (!iov->iov_base) {
 926             rxi_nBadIovecs++;
 927             p->niovecs = i;
 928             break;
 929         }
 930         iov->iov_len = RX_CBUFFERSIZE;
 931     }
 932 }
 933
 934 #ifdef RX_ENABLE_TSFPQ
 935 int
 936 rxi_TrimDataBufs(struct rx_packet *p, int first)
 937 {
 938     int length;
 939     struct iovec *iov, *end;
 940     register struct rx_ts_info_t * rx_ts_info;
 941     SPLVAR;
 942
 943     if (first != 1)
 944         osi_Panic("TrimDataBufs 1: first must be 1");
 945
 946     /* Skip over continuation buffers containing message data */
 947     iov = &p->wirevec[2];
 948     end = iov + (p->niovecs - 2);
 949     length = p->length - p->wirevec[1].iov_len;
 950     for (; iov < end && length > 0; iov++) {
 951         if (!iov->iov_base)
 952             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 953         length -= iov->iov_len;
 954     }
 955
 956     /* iov now points to the first empty data buffer. */
 957     if (iov >= end)
 958         return 0;
 959
 960     RX_TS_INFO_GET(rx_ts_info);
 961     for (; iov < end; iov++) {
 962         if (!iov->iov_base)
 963             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 964         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 965         p->niovecs--;
 966     }
 967     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 968         NETPRI;
 969         MUTEX_ENTER(&rx_freePktQ_lock);
 970
 971         RX_TS_FPQ_LTOG(rx_ts_info);
 972         rxi_PacketsUnWait();
 973
 974         MUTEX_EXIT(&rx_freePktQ_lock);
 975         USERPRI;
 976     }
 977
 978     return 0;
 979 }
 980 #else /* RX_ENABLE_TSFPQ */
 981 int
 982 rxi_TrimDataBufs(struct rx_packet *p, int first)
 983 {
 984     int length;
 985     struct iovec *iov, *end;
 986     SPLVAR;
 987
 988     if (first != 1)
 989         osi_Panic("TrimDataBufs 1: first must be 1");
 990
 991     /* Skip over continuation buffers containing message data */
 992     iov = &p->wirevec[2];
 993     end = iov + (p->niovecs - 2);
 994     length = p->length - p->wirevec[1].iov_len;
 995     for (; iov < end && length > 0; iov++) {
 996         if (!iov->iov_base)
 997             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 998         length -= iov->iov_len;
 999     }
1000
1001     /* iov now points to the first empty data buffer. */
1002     if (iov >= end)
1003         return 0;
1004
1005     NETPRI;
1006     MUTEX_ENTER(&rx_freePktQ_lock);
1007
1008     for (; iov < end; iov++) {
1009         if (!iov->iov_base)
1010             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1011         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1012         p->niovecs--;
1013     }
1014     rxi_PacketsUnWait();
1015
1016     MUTEX_EXIT(&rx_freePktQ_lock);
1017     USERPRI;
1018
1019     return 0;
1020 }
1021 #endif /* RX_ENABLE_TSFPQ */
1022
1023 /* Free the packet p.  P is assumed not to be on any queue, i.e.
1024  * remove it yourself first if you call this routine. */
1025 #ifdef RX_ENABLE_TSFPQ
1026 void
1027 rxi_FreePacket(struct rx_packet *p)
1028 {
1029     rxi_FreeDataBufsTSFPQ(p, 2, 0);
1030     rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1031 }
1032 #else /* RX_ENABLE_TSFPQ */
1033 void
1034 rxi_FreePacket(struct rx_packet *p)
1035 {
1036     SPLVAR;
1037
1038     NETPRI;
1039     MUTEX_ENTER(&rx_freePktQ_lock);
1040
1041     rxi_FreeDataBufsNoLock(p, 2);
1042     rxi_FreePacketNoLock(p);
1043     /* Wakeup anyone waiting for packets */
1044     rxi_PacketsUnWait();
1045
1046     MUTEX_EXIT(&rx_freePktQ_lock);
1047     USERPRI;
1048 }
1049 #endif /* RX_ENABLE_TSFPQ */
1050
1051 /* rxi_AllocPacket sets up p->length so it reflects the number of
1052  * bytes in the packet at this point, **not including** the header.
1053  * The header is absolutely necessary, besides, this is the way the
1054  * length field is usually used */
1055 #ifdef RX_ENABLE_TSFPQ
1056 struct rx_packet *
1057 rxi_AllocPacketNoLock(int class)
1058 {
1059     register struct rx_packet *p;
1060     register struct rx_ts_info_t * rx_ts_info;
1061
1062     RX_TS_INFO_GET(rx_ts_info);
1063
1064 #ifdef KERNEL
1065     if (rxi_OverQuota(class)) {
1066         rxi_NeedMorePackets = TRUE;
1067         MUTEX_ENTER(&rx_stats_mutex);
1068         switch (class) {
1069         case RX_PACKET_CLASS_RECEIVE:
1070             rx_stats.receivePktAllocFailures++;
1071             break;
1072         case RX_PACKET_CLASS_SEND:
1073             rx_stats.sendPktAllocFailures++;
1074             break;
1075         case RX_PACKET_CLASS_SPECIAL:
1076             rx_stats.specialPktAllocFailures++;
1077             break;
1078         case RX_PACKET_CLASS_RECV_CBUF:
1079             rx_stats.receiveCbufPktAllocFailures++;
1080             break;
1081         case RX_PACKET_CLASS_SEND_CBUF:
1082             rx_stats.sendCbufPktAllocFailures++;
1083             break;
1084         }
1085         MUTEX_EXIT(&rx_stats_mutex);
1086         return (struct rx_packet *)0;
1087     }
1088 #endif /* KERNEL */
1089
1090     MUTEX_ENTER(&rx_stats_mutex);
1091     rx_stats.packetRequests++;
1092     MUTEX_EXIT(&rx_stats_mutex);
1093
1094     if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1095
1096 #ifdef KERNEL
1097         if (queue_IsEmpty(&rx_freePacketQueue))
1098             osi_Panic("rxi_AllocPacket error");
1099 #else /* KERNEL */
1100         if (queue_IsEmpty(&rx_freePacketQueue))
1101             rxi_MorePacketsNoLock(rx_initSendWindow);
1102 #endif /* KERNEL */
1103
1104
1105         RX_TS_FPQ_GTOL(rx_ts_info);
1106     }
1107
1108     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1109
1110     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1111
1112
1113     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1114      * order to truncate outbound packets.  In the near future, may need
1115      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1116      */
1117     RX_PACKET_IOV_FULLINIT(p);
1118     return p;
1119 }
1120 #else /* RX_ENABLE_TSFPQ */
1121 struct rx_packet *
1122 rxi_AllocPacketNoLock(int class)
1123 {
1124     register struct rx_packet *p;
1125
1126 #ifdef KERNEL
1127     if (rxi_OverQuota(class)) {
1128         rxi_NeedMorePackets = TRUE;
1129         MUTEX_ENTER(&rx_stats_mutex);
1130         switch (class) {
1131         case RX_PACKET_CLASS_RECEIVE:
1132             rx_stats.receivePktAllocFailures++;
1133             break;
1134         case RX_PACKET_CLASS_SEND:
1135             rx_stats.sendPktAllocFailures++;
1136             break;
1137         case RX_PACKET_CLASS_SPECIAL:
1138             rx_stats.specialPktAllocFailures++;
1139             break;
1140         case RX_PACKET_CLASS_RECV_CBUF:
1141             rx_stats.receiveCbufPktAllocFailures++;
1142             break;
1143         case RX_PACKET_CLASS_SEND_CBUF:
1144             rx_stats.sendCbufPktAllocFailures++;
1145             break;
1146         }
1147         MUTEX_EXIT(&rx_stats_mutex);
1148         return (struct rx_packet *)0;
1149     }
1150 #endif /* KERNEL */
1151
1152     MUTEX_ENTER(&rx_stats_mutex);
1153     rx_stats.packetRequests++;
1154     MUTEX_EXIT(&rx_stats_mutex);
1155
1156 #ifdef KERNEL
1157     if (queue_IsEmpty(&rx_freePacketQueue))
1158         osi_Panic("rxi_AllocPacket error");
1159 #else /* KERNEL */
1160     if (queue_IsEmpty(&rx_freePacketQueue))
1161         rxi_MorePacketsNoLock(rx_initSendWindow);
1162 #endif /* KERNEL */
1163
1164     rx_nFreePackets--;
1165     p = queue_First(&rx_freePacketQueue, rx_packet);
1166     queue_Remove(p);
1167     RX_FPQ_MARK_USED(p);
1168
1169     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1170
1171
1172     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1173      * order to truncate outbound packets.  In the near future, may need
1174      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1175      */
1176     RX_PACKET_IOV_FULLINIT(p);
1177     return p;
1178 }
1179 #endif /* RX_ENABLE_TSFPQ */
1180
1181 #ifdef RX_ENABLE_TSFPQ
1182 struct rx_packet *
1183 rxi_AllocPacketTSFPQ(int class, int pull_global)
1184 {
1185     register struct rx_packet *p;
1186     register struct rx_ts_info_t * rx_ts_info;
1187
1188     RX_TS_INFO_GET(rx_ts_info);
1189
1190     MUTEX_ENTER(&rx_stats_mutex);
1191     rx_stats.packetRequests++;
1192     MUTEX_EXIT(&rx_stats_mutex);
1193
1194     if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1195         MUTEX_ENTER(&rx_freePktQ_lock);
1196
1197         if (queue_IsEmpty(&rx_freePacketQueue))
1198             rxi_MorePacketsNoLock(rx_initSendWindow);
1199
1200         RX_TS_FPQ_GTOL(rx_ts_info);
1201
1202         MUTEX_EXIT(&rx_freePktQ_lock);
1203     } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1204         return NULL;
1205     }
1206
1207     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1208
1209     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1210
1211     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1212      * order to truncate outbound packets.  In the near future, may need
1213      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1214      */
1215     RX_PACKET_IOV_FULLINIT(p);
1216     return p;
1217 }
1218 #endif /* RX_ENABLE_TSFPQ */
1219
1220 #ifdef RX_ENABLE_TSFPQ
1221 struct rx_packet *
1222 rxi_AllocPacket(int class)
1223 {
1224     register struct rx_packet *p;
1225
1226     p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1227     return p;
1228 }
1229 #else /* RX_ENABLE_TSFPQ */
1230 struct rx_packet *
1231 rxi_AllocPacket(int class)
1232 {
1233     register struct rx_packet *p;
1234
1235     MUTEX_ENTER(&rx_freePktQ_lock);
1236     p = rxi_AllocPacketNoLock(class);
1237     MUTEX_EXIT(&rx_freePktQ_lock);
1238     return p;
1239 }
1240 #endif /* RX_ENABLE_TSFPQ */
1241
1242 /* This guy comes up with as many buffers as it {takes,can get} given
1243  * the MTU for this call. It also sets the packet length before
1244  * returning.  caution: this is often called at NETPRI
1245  * Called with call locked.
1246  */
1247 struct rx_packet *
1248 rxi_AllocSendPacket(register struct rx_call *call, int want)
1249 {
1250     register struct rx_packet *p = (struct rx_packet *)0;
1251     register int mud;
1252     register unsigned delta;
1253
1254     SPLVAR;
1255     mud = call->MTU - RX_HEADER_SIZE;
1256     delta =
1257         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1258         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1259
1260 #ifdef RX_ENABLE_TSFPQ
1261     if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1262         want += delta;
1263         want = MIN(want, mud);
1264
1265         if ((unsigned)want > p->length)
1266             (void)rxi_AllocDataBuf(p, (want - p->length),
1267                                    RX_PACKET_CLASS_SEND_CBUF);
1268
1269         if ((unsigned)p->length > mud)
1270             p->length = mud;
1271
1272         if (delta >= p->length) {
1273             rxi_FreePacket(p);
1274             p = NULL;
1275         } else {
1276             p->length -= delta;
1277         }
1278         return p;
1279     }
1280 #endif /* RX_ENABLE_TSFPQ */
1281
1282     while (!(call->error)) {
1283         MUTEX_ENTER(&rx_freePktQ_lock);
1284         /* if an error occurred, or we get the packet we want, we're done */
1285         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1286             MUTEX_EXIT(&rx_freePktQ_lock);
1287
1288             want += delta;
1289             want = MIN(want, mud);
1290
1291             if ((unsigned)want > p->length)
1292                 (void)rxi_AllocDataBuf(p, (want - p->length),
1293                                        RX_PACKET_CLASS_SEND_CBUF);
1294
1295             if ((unsigned)p->length > mud)
1296                 p->length = mud;
1297
1298             if (delta >= p->length) {
1299                 rxi_FreePacket(p);
1300                 p = NULL;
1301             } else {
1302                 p->length -= delta;
1303             }
1304             break;
1305         }
1306
1307         /* no error occurred, and we didn't get a packet, so we sleep.
1308          * At this point, we assume that packets will be returned
1309          * sooner or later, as packets are acknowledged, and so we
1310          * just wait.  */
1311         NETPRI;
1312         call->flags |= RX_CALL_WAIT_PACKETS;
1313         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1314         MUTEX_EXIT(&call->lock);
1315         rx_waitingForPackets = 1;
1316
1317 #ifdef  RX_ENABLE_LOCKS
1318         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1319 #else
1320         osi_rxSleep(&rx_waitingForPackets);
1321 #endif
1322         MUTEX_EXIT(&rx_freePktQ_lock);
1323         MUTEX_ENTER(&call->lock);
1324         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1325         call->flags &= ~RX_CALL_WAIT_PACKETS;
1326         USERPRI;
1327     }
1328
1329     return p;
1330 }
1331
1332 #ifndef KERNEL
1333 #ifdef AFS_NT40_ENV
1334 /* Windows does not use file descriptors. */
1335 #define CountFDs(amax) 0
1336 #else
1337 /* count the number of used FDs */
1338 static int
1339 CountFDs(register int amax)
1340 {
1341     struct stat tstat;
1342     register int i, code;
1343     register int count;
1344
1345     count = 0;
1346     for (i = 0; i < amax; i++) {
1347         code = fstat(i, &tstat);
1348         if (code == 0)
1349             count++;
1350     }
1351     return count;
1352 }
1353 #endif /* AFS_NT40_ENV */
1354 #else /* KERNEL */
1355
1356 #define CountFDs(amax) amax
1357
1358 #endif /* KERNEL */
1359
1360 #if !defined(KERNEL) || defined(UKERNEL)
1361
1362 /* This function reads a single packet from the interface into the
1363  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
1364  * (host,port) of the sender are stored in the supplied variables, and
1365  * the data length of the packet is stored in the packet structure.
1366  * The header is decoded. */
1367 int
1368 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1369                u_short * port)
1370 {
1371     struct sockaddr_in from;
1372     int nbytes;
1373     afs_int32 rlen;
1374     register afs_int32 tlen, savelen;
1375     struct msghdr msg;
1376     rx_computelen(p, tlen);
1377     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
1378
1379     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
1380     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
1381                                  * it once in order to avoid races.  */
1382     tlen = rlen - tlen;
1383     if (tlen > 0) {
1384         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1385         if (tlen > 0) {
1386             tlen = rlen - tlen;
1387         } else
1388             tlen = rlen;
1389     } else
1390         tlen = rlen;
1391
1392     /* Extend the last iovec for padding, it's just to make sure that the
1393      * read doesn't return more data than we expect, and is done to get around
1394      * our problems caused by the lack of a length field in the rx header.
1395      * Use the extra buffer that follows the localdata in each packet
1396      * structure. */
1397     savelen = p->wirevec[p->niovecs - 1].iov_len;
1398     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1399
1400     memset((char *)&msg, 0, sizeof(msg));
1401     msg.msg_name = (char *)&from;
1402     msg.msg_namelen = sizeof(struct sockaddr_in);
1403     msg.msg_iov = p->wirevec;
1404     msg.msg_iovlen = p->niovecs;
1405     nbytes = rxi_Recvmsg(socket, &msg, 0);
1406
1407     /* restore the vec to its correct state */
1408     p->wirevec[p->niovecs - 1].iov_len = savelen;
1409
1410     p->length = (nbytes - RX_HEADER_SIZE);
1411     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
1412         if (nbytes < 0 && errno == EWOULDBLOCK) {
1413             MUTEX_ENTER(&rx_stats_mutex);
1414             rx_stats.noPacketOnRead++;
1415             MUTEX_EXIT(&rx_stats_mutex);
1416         } else if (nbytes <= 0) {
1417             MUTEX_ENTER(&rx_stats_mutex);
1418             rx_stats.bogusPacketOnRead++;
1419             rx_stats.bogusHost = from.sin_addr.s_addr;
1420             MUTEX_EXIT(&rx_stats_mutex);
1421             dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1422                  ntohs(from.sin_port), nbytes));
1423         }
1424         return 0;
1425     }
1426 #ifdef RXDEBUG
1427     else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1428                 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1429         rxi_DecodePacketHeader(p);
1430
1431         *host = from.sin_addr.s_addr;
1432         *port = from.sin_port;
1433
1434         dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1435               p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1436               p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1437               p->length));
1438         rxi_TrimDataBufs(p, 1);
1439         return 0;
1440     }
1441 #endif
1442     else {
1443         /* Extract packet header. */
1444         rxi_DecodePacketHeader(p);
1445
1446         *host = from.sin_addr.s_addr;
1447         *port = from.sin_port;
1448         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1449             struct rx_peer *peer;
1450             MUTEX_ENTER(&rx_stats_mutex);
1451             rx_stats.packetsRead[p->header.type - 1]++;
1452             MUTEX_EXIT(&rx_stats_mutex);
1453             /*
1454              * Try to look up this peer structure.  If it doesn't exist,
1455              * don't create a new one -
1456              * we don't keep count of the bytes sent/received if a peer
1457              * structure doesn't already exist.
1458              *
1459              * The peer/connection cleanup code assumes that there is 1 peer
1460              * per connection.  If we actually created a peer structure here
1461              * and this packet was an rxdebug packet, the peer structure would
1462              * never be cleaned up.
1463              */
1464             peer = rxi_FindPeer(*host, *port, 0, 0);
1465             /* Since this may not be associated with a connection,
1466              * it may have no refCount, meaning we could race with
1467              * ReapConnections
1468              */
1469             if (peer && (peer->refCount > 0)) {
1470                 MUTEX_ENTER(&peer->peer_lock);
1471                 hadd32(peer->bytesReceived, p->length);
1472                 MUTEX_EXIT(&peer->peer_lock);
1473             }
1474         }
1475
1476         /* Free any empty packet buffers at the end of this packet */
1477         rxi_TrimDataBufs(p, 1);
1478
1479         return 1;
1480     }
1481 }
1482
1483 #endif /* !KERNEL || UKERNEL */
1484
1485 /* This function splits off the first packet in a jumbo packet.
1486  * As of AFS 3.5, jumbograms contain more than one fixed size
1487  * packet, and the RX_JUMBO_PACKET flag is set in all but the
1488  * last packet header. All packets (except the last) are padded to
1489  * fall on RX_CBUFFERSIZE boundaries.
1490  * HACK: We store the length of the first n-1 packets in the
1491  * last two pad bytes. */
1492
1493 struct rx_packet *
1494 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1495                      int first)
1496 {
1497     struct rx_packet *np;
1498     struct rx_jumboHeader *jp;
1499     int niov, i;
1500     struct iovec *iov;
1501     int length;
1502     afs_uint32 temp;
1503
1504     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1505      * bytes in length. All but the first packet are preceded by
1506      * an abbreviated four byte header. The length of the last packet
1507      * is calculated from the size of the jumbogram. */
1508     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1509
1510     if ((int)p->length < length) {
1511         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1512         return NULL;
1513     }
1514     niov = p->niovecs - 2;
1515     if (niov < 1) {
1516         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1517         return NULL;
1518     }
1519     iov = &p->wirevec[2];
1520     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1521
1522     /* Get a pointer to the abbreviated packet header */
1523     jp = (struct rx_jumboHeader *)
1524         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1525
1526     /* Set up the iovecs for the next packet */
1527     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1528     np->wirevec[0].iov_len = sizeof(struct rx_header);
1529     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1530     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1531     np->niovecs = niov + 1;
1532     for (i = 2, iov++; i <= niov; i++, iov++) {
1533         np->wirevec[i] = *iov;
1534     }
1535     np->length = p->length - length;
1536     p->length = RX_JUMBOBUFFERSIZE;
1537     p->niovecs = 2;
1538
1539     /* Convert the jumbo packet header to host byte order */
1540     temp = ntohl(*(afs_uint32 *) jp);
1541     jp->flags = (u_char) (temp >> 24);
1542     jp->cksum = (u_short) (temp);
1543
1544     /* Fill in the packet header */
1545     np->header = p->header;
1546     np->header.serial = p->header.serial + 1;
1547     np->header.seq = p->header.seq + 1;
1548     np->header.flags = jp->flags;
1549     np->header.spare = jp->cksum;
1550
1551     return np;
1552 }
1553
1554 #ifndef KERNEL
1555 /* Send a udp datagram */
1556 int
1557 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1558             int length, int istack)
1559 {
1560     struct msghdr msg;
1561         int ret;
1562
1563     memset(&msg, 0, sizeof(msg));
1564     msg.msg_iov = dvec;
1565     msg.msg_iovlen = nvecs;
1566     msg.msg_name = addr;
1567     msg.msg_namelen = sizeof(struct sockaddr_in);
1568
1569     ret = rxi_Sendmsg(socket, &msg, 0);
1570
1571     return ret;
1572 }
1573 #elif !defined(UKERNEL)
1574 /*
1575  * message receipt is done in rxk_input or rx_put.
1576  */
1577
1578 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1579 /*
1580  * Copy an mblock to the contiguous area pointed to by cp.
1581  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1582  * but it doesn't really.
1583  * Returns the number of bytes not transferred.
1584  * The message is NOT changed.
1585  */
1586 static int
1587 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1588 {
1589     register int n;
1590
1591     for (; mp && len > 0; mp = mp->b_cont) {
1592         if (mp->b_datap->db_type != M_DATA) {
1593             return -1;
1594         }
1595         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1596         memcpy(cp, (char *)mp->b_rptr, n);
1597         cp += n;
1598         len -= n;
1599         mp->b_rptr += n;
1600     }
1601     return (len);
1602 }
1603
1604 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1605  * but it doesn't really.
1606  * This sucks, anyway, do it like m_cpy.... below
1607  */
1608 static int
1609 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1610            int niovs)
1611 {
1612     register int m, n, o, t, i;
1613
1614     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1615         if (mp->b_datap->db_type != M_DATA) {
1616             return -1;
1617         }
1618         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1619         len -= n;
1620         while (n) {
1621             if (!t) {
1622                 o = 0;
1623                 i++;
1624                 t = iovs[i].iov_len;
1625             }
1626             m = MIN(n, t);
1627             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1628             mp->b_rptr += m;
1629             o += m;
1630             t -= m;
1631             n -= m;
1632         }
1633     }
1634     return (len);
1635 }
1636
1637 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1638 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1639 #else
1640 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1641 static int
1642 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1643 {
1644     caddr_t p1, p2;
1645     unsigned int l1, l2, i, t;
1646
1647     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1648         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1649
1650     while (off && m)
1651         if (m->m_len <= off) {
1652             off -= m->m_len;
1653             m = m->m_next;
1654             continue;
1655         } else
1656             break;
1657
1658     if (m == NULL)
1659         return len;
1660
1661     p1 = mtod(m, caddr_t) + off;
1662     l1 = m->m_len - off;
1663     i = 0;
1664     p2 = iovs[0].iov_base;
1665     l2 = iovs[0].iov_len;
1666
1667     while (len) {
1668         t = MIN(l1, MIN(l2, (unsigned int)len));
1669         memcpy(p2, p1, t);
1670         p1 += t;
1671         p2 += t;
1672         l1 -= t;
1673         l2 -= t;
1674         len -= t;
1675         if (!l1) {
1676             m = m->m_next;
1677             if (!m)
1678                 break;
1679             p1 = mtod(m, caddr_t);
1680             l1 = m->m_len;
1681         }
1682         if (!l2) {
1683             if (++i >= niovs)
1684                 break;
1685             p2 = iovs[i].iov_base;
1686             l2 = iovs[i].iov_len;
1687         }
1688
1689     }
1690
1691     return len;
1692 }
1693 #endif /* LINUX */
1694 #endif /* AFS_SUN5_ENV */
1695
1696 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1697 int
1698 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1699 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1700      mblk_t *amb;
1701 #else
1702      struct mbuf *amb;
1703 #endif
1704      void (*free) ();
1705      struct rx_packet *phandle;
1706      int hdr_len, data_len;
1707 {
1708     register int code;
1709
1710     code =
1711         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1712                      phandle->niovecs);
1713     (*free) (amb);
1714
1715     return code;
1716 }
1717 #endif /* LINUX */
1718 #endif /*KERNEL && !UKERNEL */
1719
1720
1721 /* send a response to a debug packet */
1722
1723 struct rx_packet *
1724 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1725                        afs_int32 ahost, short aport, int istack)
1726 {
1727     struct rx_debugIn tin;
1728     afs_int32 tl;
1729     struct rx_serverQueueEntry *np, *nqe;
1730
1731     /*
1732      * Only respond to client-initiated Rx debug packets,
1733      * and clear the client flag in the response.
1734      */
1735     if (ap->header.flags & RX_CLIENT_INITIATED) {
1736         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1737         rxi_EncodePacketHeader(ap);
1738     } else {
1739         return ap;
1740     }
1741
1742     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1743     /* all done with packet, now set length to the truth, so we can
1744      * reuse this packet */
1745     rx_computelen(ap, ap->length);
1746
1747     tin.type = ntohl(tin.type);
1748     tin.index = ntohl(tin.index);
1749     switch (tin.type) {
1750     case RX_DEBUGI_GETSTATS:{
1751             struct rx_debugStats tstat;
1752
1753             /* get basic stats */
1754             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1755             tstat.version = RX_DEBUGI_VERSION;
1756 #ifndef RX_ENABLE_LOCKS
1757             tstat.waitingForPackets = rx_waitingForPackets;
1758 #endif
1759             MUTEX_ENTER(&rx_serverPool_lock);
1760             tstat.nFreePackets = htonl(rx_nFreePackets);
1761             tstat.callsExecuted = htonl(rxi_nCalls);
1762             tstat.packetReclaims = htonl(rx_packetReclaims);
1763             tstat.usedFDs = CountFDs(64);
1764             tstat.nWaiting = htonl(rx_nWaiting);
1765             tstat.nWaited = htonl(rx_nWaited);
1766             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1767                         tstat.idleThreads);
1768             MUTEX_EXIT(&rx_serverPool_lock);
1769             tstat.idleThreads = htonl(tstat.idleThreads);
1770             tl = sizeof(struct rx_debugStats) - ap->length;
1771             if (tl > 0)
1772                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1773
1774             if (tl <= 0) {
1775                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1776                                (char *)&tstat);
1777                 ap->length = sizeof(struct rx_debugStats);
1778                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1779                 rx_computelen(ap, ap->length);
1780             }
1781             break;
1782         }
1783
1784     case RX_DEBUGI_GETALLCONN:
1785     case RX_DEBUGI_GETCONN:{
1786             int i, j;
1787             register struct rx_connection *tc;
1788             struct rx_call *tcall;
1789             struct rx_debugConn tconn;
1790             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1791
1792
1793             tl = sizeof(struct rx_debugConn) - ap->length;
1794             if (tl > 0)
1795                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1796             if (tl > 0)
1797                 return ap;
1798
1799             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1800             /* get N'th (maybe) "interesting" connection info */
1801             for (i = 0; i < rx_hashTableSize; i++) {
1802 #if !defined(KERNEL)
1803                 /* the time complexity of the algorithm used here
1804                  * exponentially increses with the number of connections.
1805                  */
1806 #ifdef AFS_PTHREAD_ENV
1807                 pthread_yield();
1808 #else
1809                 (void)IOMGR_Poll();
1810 #endif
1811 #endif
1812                 MUTEX_ENTER(&rx_connHashTable_lock);
1813                 /* We might be slightly out of step since we are not
1814                  * locking each call, but this is only debugging output.
1815                  */
1816                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1817                     if ((all || rxi_IsConnInteresting(tc))
1818                         && tin.index-- <= 0) {
1819                         tconn.host = tc->peer->host;
1820                         tconn.port = tc->peer->port;
1821                         tconn.cid = htonl(tc->cid);
1822                         tconn.epoch = htonl(tc->epoch);
1823                         tconn.serial = htonl(tc->serial);
1824                         for (j = 0; j < RX_MAXCALLS; j++) {
1825                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1826                             if ((tcall = tc->call[j])) {
1827                                 tconn.callState[j] = tcall->state;
1828                                 tconn.callMode[j] = tcall->mode;
1829                                 tconn.callFlags[j] = tcall->flags;
1830                                 if (queue_IsNotEmpty(&tcall->rq))
1831                                     tconn.callOther[j] |= RX_OTHER_IN;
1832                                 if (queue_IsNotEmpty(&tcall->tq))
1833                                     tconn.callOther[j] |= RX_OTHER_OUT;
1834                             } else
1835                                 tconn.callState[j] = RX_STATE_NOTINIT;
1836                         }
1837
1838                         tconn.natMTU = htonl(tc->peer->natMTU);
1839                         tconn.error = htonl(tc->error);
1840                         tconn.flags = tc->flags;
1841                         tconn.type = tc->type;
1842                         tconn.securityIndex = tc->securityIndex;
1843                         if (tc->securityObject) {
1844                             RXS_GetStats(tc->securityObject, tc,
1845                                          &tconn.secStats);
1846 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1847 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1848                             DOHTONL(flags);
1849                             DOHTONL(expires);
1850                             DOHTONL(packetsReceived);
1851                             DOHTONL(packetsSent);
1852                             DOHTONL(bytesReceived);
1853                             DOHTONL(bytesSent);
1854                             for (i = 0;
1855                                  i <
1856                                  sizeof(tconn.secStats.spares) /
1857                                  sizeof(short); i++)
1858                                 DOHTONS(spares[i]);
1859                             for (i = 0;
1860                                  i <
1861                                  sizeof(tconn.secStats.sparel) /
1862                                  sizeof(afs_int32); i++)
1863                                 DOHTONL(sparel[i]);
1864                         }
1865
1866                         MUTEX_EXIT(&rx_connHashTable_lock);
1867                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1868                                        (char *)&tconn);
1869                         tl = ap->length;
1870                         ap->length = sizeof(struct rx_debugConn);
1871                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1872                                             istack);
1873                         ap->length = tl;
1874                         return ap;
1875                     }
1876                 }
1877                 MUTEX_EXIT(&rx_connHashTable_lock);
1878             }
1879             /* if we make it here, there are no interesting packets */
1880             tconn.cid = htonl(0xffffffff);      /* means end */
1881             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1882                            (char *)&tconn);
1883             tl = ap->length;
1884             ap->length = sizeof(struct rx_debugConn);
1885             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1886             ap->length = tl;
1887             break;
1888         }
1889
1890         /*
1891          * Pass back all the peer structures we have available
1892          */
1893
1894     case RX_DEBUGI_GETPEER:{
1895             int i;
1896             register struct rx_peer *tp;
1897             struct rx_debugPeer tpeer;
1898
1899
1900             tl = sizeof(struct rx_debugPeer) - ap->length;
1901             if (tl > 0)
1902                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1903             if (tl > 0)
1904                 return ap;
1905
1906             memset((char *)&tpeer, 0, sizeof(tpeer));
1907             for (i = 0; i < rx_hashTableSize; i++) {
1908 #if !defined(KERNEL)
1909                 /* the time complexity of the algorithm used here
1910                  * exponentially increses with the number of peers.
1911                  *
1912                  * Yielding after processing each hash table entry
1913                  * and dropping rx_peerHashTable_lock.
1914                  * also increases the risk that we will miss a new
1915                  * entry - but we are willing to live with this
1916                  * limitation since this is meant for debugging only
1917                  */
1918 #ifdef AFS_PTHREAD_ENV
1919                 pthread_yield();
1920 #else
1921                 (void)IOMGR_Poll();
1922 #endif
1923 #endif
1924                 MUTEX_ENTER(&rx_peerHashTable_lock);
1925                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1926                     if (tin.index-- <= 0) {
1927                         tpeer.host = tp->host;
1928                         tpeer.port = tp->port;
1929                         tpeer.ifMTU = htons(tp->ifMTU);
1930                         tpeer.idleWhen = htonl(tp->idleWhen);
1931                         tpeer.refCount = htons(tp->refCount);
1932                         tpeer.burstSize = tp->burstSize;
1933                         tpeer.burst = tp->burst;
1934                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1935                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1936                         tpeer.rtt = htonl(tp->rtt);
1937                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1938                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1939                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1940                         tpeer.nSent = htonl(tp->nSent);
1941                         tpeer.reSends = htonl(tp->reSends);
1942                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1943                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1944                         tpeer.rateFlag = htonl(tp->rateFlag);
1945                         tpeer.natMTU = htons(tp->natMTU);
1946                         tpeer.maxMTU = htons(tp->maxMTU);
1947                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1948                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1949                         tpeer.MTU = htons(tp->MTU);
1950                         tpeer.cwind = htons(tp->cwind);
1951                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1952                         tpeer.congestSeq = htons(tp->congestSeq);
1953                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1954                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1955                         tpeer.bytesReceived.high =
1956                             htonl(tp->bytesReceived.high);
1957                         tpeer.bytesReceived.low =
1958                             htonl(tp->bytesReceived.low);
1959
1960                         MUTEX_EXIT(&rx_peerHashTable_lock);
1961                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1962                                        (char *)&tpeer);
1963                         tl = ap->length;
1964                         ap->length = sizeof(struct rx_debugPeer);
1965                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1966                                             istack);
1967                         ap->length = tl;
1968                         return ap;
1969                     }
1970                 }
1971                 MUTEX_EXIT(&rx_peerHashTable_lock);
1972             }
1973             /* if we make it here, there are no interesting packets */
1974             tpeer.host = htonl(0xffffffff);     /* means end */
1975             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1976                            (char *)&tpeer);
1977             tl = ap->length;
1978             ap->length = sizeof(struct rx_debugPeer);
1979             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1980             ap->length = tl;
1981             break;
1982         }
1983
1984     case RX_DEBUGI_RXSTATS:{
1985             int i;
1986             afs_int32 *s;
1987
1988             tl = sizeof(rx_stats) - ap->length;
1989             if (tl > 0)
1990                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1991             if (tl > 0)
1992                 return ap;
1993
1994             /* Since its all int32s convert to network order with a loop. */
1995             MUTEX_ENTER(&rx_stats_mutex);
1996             s = (afs_int32 *) & rx_stats;
1997             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1998                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1999
2000             tl = ap->length;
2001             ap->length = sizeof(rx_stats);
2002             MUTEX_EXIT(&rx_stats_mutex);
2003             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2004             ap->length = tl;
2005             break;
2006         }
2007
2008     default:
2009         /* error response packet */
2010         tin.type = htonl(RX_DEBUGI_BADTYPE);
2011         tin.index = tin.type;
2012         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2013         tl = ap->length;
2014         ap->length = sizeof(struct rx_debugIn);
2015         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2016         ap->length = tl;
2017         break;
2018     }
2019     return ap;
2020 }
2021
2022 struct rx_packet *
2023 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2024                          afs_int32 ahost, short aport, int istack)
2025 {
2026     afs_int32 tl;
2027
2028     /*
2029      * Only respond to client-initiated version requests, and
2030      * clear that flag in the response.
2031      */
2032     if (ap->header.flags & RX_CLIENT_INITIATED) {
2033         char buf[66];
2034
2035         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2036         rxi_EncodePacketHeader(ap);
2037         memset(buf, 0, sizeof(buf));
2038         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2039         rx_packetwrite(ap, 0, 65, buf);
2040         tl = ap->length;
2041         ap->length = 65;
2042         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2043         ap->length = tl;
2044     }
2045
2046     return ap;
2047 }
2048
2049
2050 /* send a debug packet back to the sender */
2051 static void
2052 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2053                     afs_int32 ahost, short aport, afs_int32 istack)
2054 {
2055     struct sockaddr_in taddr;
2056     int i;
2057     int nbytes;
2058     int saven = 0;
2059     size_t savelen = 0;
2060 #ifdef KERNEL
2061     int waslocked = ISAFS_GLOCK();
2062 #endif
2063
2064     taddr.sin_family = AF_INET;
2065     taddr.sin_port = aport;
2066     taddr.sin_addr.s_addr = ahost;
2067 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2068     taddr.sin_len = sizeof(struct sockaddr_in);
2069 #endif
2070
2071     /* We need to trim the niovecs. */
2072     nbytes = apacket->length;
2073     for (i = 1; i < apacket->niovecs; i++) {
2074         if (nbytes <= apacket->wirevec[i].iov_len) {
2075             savelen = apacket->wirevec[i].iov_len;
2076             saven = apacket->niovecs;
2077             apacket->wirevec[i].iov_len = nbytes;
2078             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
2079         } else
2080             nbytes -= apacket->wirevec[i].iov_len;
2081     }
2082 #ifdef KERNEL
2083 #ifdef RX_KERNEL_TRACE
2084     if (ICL_SETACTIVE(afs_iclSetp)) {
2085         if (!waslocked)
2086             AFS_GLOCK();
2087         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2088                    "before osi_NetSend()");
2089         AFS_GUNLOCK();
2090     } else
2091 #else
2092     if (waslocked)
2093         AFS_GUNLOCK();
2094 #endif
2095 #endif
2096     /* debug packets are not reliably delivered, hence the cast below. */
2097     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2098                       apacket->length + RX_HEADER_SIZE, istack);
2099 #ifdef KERNEL
2100 #ifdef RX_KERNEL_TRACE
2101     if (ICL_SETACTIVE(afs_iclSetp)) {
2102         AFS_GLOCK();
2103         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2104                    "after osi_NetSend()");
2105         if (!waslocked)
2106             AFS_GUNLOCK();
2107     } else
2108 #else
2109     if (waslocked)
2110         AFS_GLOCK();
2111 #endif
2112 #endif
2113     if (saven) {                /* means we truncated the packet above. */
2114         apacket->wirevec[i - 1].iov_len = savelen;
2115         apacket->niovecs = saven;
2116     }
2117
2118 }
2119
2120 /* Send the packet to appropriate destination for the specified
2121  * call.  The header is first encoded and placed in the packet.
2122  */
2123 void
2124 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2125                struct rx_packet *p, int istack)
2126 {
2127 #if defined(KERNEL)
2128     int waslocked;
2129 #endif
2130     int code;
2131     struct sockaddr_in addr;
2132     register struct rx_peer *peer = conn->peer;
2133     osi_socket socket;
2134 #ifdef RXDEBUG
2135     char deliveryType = 'S';
2136 #endif
2137     /* The address we're sending the packet to */
2138     memset(&addr, 0, sizeof(addr));
2139     addr.sin_family = AF_INET;
2140     addr.sin_port = peer->port;
2141     addr.sin_addr.s_addr = peer->host;
2142
2143     /* This stuff should be revamped, I think, so that most, if not
2144      * all, of the header stuff is always added here.  We could
2145      * probably do away with the encode/decode routines. XXXXX */
2146
2147     /* Stamp each packet with a unique serial number.  The serial
2148      * number is maintained on a connection basis because some types
2149      * of security may be based on the serial number of the packet,
2150      * and security is handled on a per authenticated-connection
2151      * basis. */
2152     /* Pre-increment, to guarantee no zero serial number; a zero
2153      * serial number means the packet was never sent. */
2154     MUTEX_ENTER(&conn->conn_data_lock);
2155     p->header.serial = ++conn->serial;
2156     MUTEX_EXIT(&conn->conn_data_lock);
2157     /* This is so we can adjust retransmit time-outs better in the face of
2158      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2159      */
2160     if (p->firstSerial == 0) {
2161         p->firstSerial = p->header.serial;
2162     }
2163 #ifdef RXDEBUG
2164     /* If an output tracer function is defined, call it with the packet and
2165      * network address.  Note this function may modify its arguments. */
2166     if (rx_almostSent) {
2167         int drop = (*rx_almostSent) (p, &addr);
2168         /* drop packet if return value is non-zero? */
2169         if (drop)
2170             deliveryType = 'D'; /* Drop the packet */
2171     }
2172 #endif
2173
2174     /* Get network byte order header */
2175     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
2176                                  * touch ALL the fields */
2177
2178     /* Send the packet out on the same socket that related packets are being
2179      * received on */
2180     socket =
2181         (conn->type ==
2182          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2183
2184 #ifdef RXDEBUG
2185     /* Possibly drop this packet,  for testing purposes */
2186     if ((deliveryType == 'D')
2187         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2188             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2189         deliveryType = 'D';     /* Drop the packet */
2190     } else {
2191         deliveryType = 'S';     /* Send the packet */
2192 #endif /* RXDEBUG */
2193
2194         /* Loop until the packet is sent.  We'd prefer just to use a
2195          * blocking socket, but unfortunately the interface doesn't
2196          * allow us to have the socket block in send mode, and not
2197          * block in receive mode */
2198 #ifdef KERNEL
2199         waslocked = ISAFS_GLOCK();
2200 #ifdef RX_KERNEL_TRACE
2201         if (ICL_SETACTIVE(afs_iclSetp)) {
2202             if (!waslocked)
2203                 AFS_GLOCK();
2204             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2205                        "before osi_NetSend()");
2206             AFS_GUNLOCK();
2207         } else
2208 #else
2209         if (waslocked)
2210             AFS_GUNLOCK();
2211 #endif
2212 #endif
2213         if ((code =
2214              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2215                          p->length + RX_HEADER_SIZE, istack)) != 0) {
2216             /* send failed, so let's hurry up the resend, eh? */
2217             MUTEX_ENTER(&rx_stats_mutex);
2218             rx_stats.netSendFailures++;
2219             MUTEX_EXIT(&rx_stats_mutex);
2220             p->retryTime = p->timeSent; /* resend it very soon */
2221             clock_Addmsec(&(p->retryTime),
2222                           10 + (((afs_uint32) p->backoff) << 8));
2223             /* Some systems are nice and tell us right away that we cannot
2224              * reach this recipient by returning an error code.
2225              * So, when this happens let's "down" the host NOW so
2226              * we don't sit around waiting for this host to timeout later.
2227              */
2228             if (call &&
2229 #ifdef AFS_NT40_ENV
2230                 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2231 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2232                 code == -ENETUNREACH
2233 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2234                 code == EHOSTUNREACH
2235 #else
2236                 0
2237 #endif
2238                 )
2239                 call->lastReceiveTime = 0;
2240         }
2241 #ifdef KERNEL
2242 #ifdef RX_KERNEL_TRACE
2243         if (ICL_SETACTIVE(afs_iclSetp)) {
2244             AFS_GLOCK();
2245             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2246                        "after osi_NetSend()");
2247             if (!waslocked)
2248                 AFS_GUNLOCK();
2249         } else
2250 #else
2251         if (waslocked)
2252             AFS_GLOCK();
2253 #endif
2254 #endif
2255 #ifdef RXDEBUG
2256     }
2257     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2258 #endif
2259     MUTEX_ENTER(&rx_stats_mutex);
2260     rx_stats.packetsSent[p->header.type - 1]++;
2261     MUTEX_EXIT(&rx_stats_mutex);
2262     MUTEX_ENTER(&peer->peer_lock);
2263     hadd32(peer->bytesSent, p->length);
2264     MUTEX_EXIT(&peer->peer_lock);
2265 }
2266
2267 /* Send a list of packets to appropriate destination for the specified
2268  * connection.  The headers are first encoded and placed in the packets.
2269  */
2270 void
2271 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2272                    struct rx_packet **list, int len, int istack)
2273 {
2274 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2275     int waslocked;
2276 #endif
2277     struct sockaddr_in addr;
2278     register struct rx_peer *peer = conn->peer;
2279     osi_socket socket;
2280     struct rx_packet *p = NULL;
2281     struct iovec wirevec[RX_MAXIOVECS];
2282     int i, length, code;
2283     afs_uint32 serial;
2284     afs_uint32 temp;
2285     struct rx_jumboHeader *jp;
2286 #ifdef RXDEBUG
2287     char deliveryType = 'S';
2288 #endif
2289     /* The address we're sending the packet to */
2290     addr.sin_family = AF_INET;
2291     addr.sin_port = peer->port;
2292     addr.sin_addr.s_addr = peer->host;
2293
2294     if (len + 1 > RX_MAXIOVECS) {
2295         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2296     }
2297
2298     /*
2299      * Stamp the packets in this jumbogram with consecutive serial numbers
2300      */
2301     MUTEX_ENTER(&conn->conn_data_lock);
2302     serial = conn->serial;
2303     conn->serial += len;
2304     MUTEX_EXIT(&conn->conn_data_lock);
2305
2306
2307     /* This stuff should be revamped, I think, so that most, if not
2308      * all, of the header stuff is always added here.  We could
2309      * probably do away with the encode/decode routines. XXXXX */
2310
2311     jp = NULL;
2312     length = RX_HEADER_SIZE;
2313     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2314     wirevec[0].iov_len = RX_HEADER_SIZE;
2315     for (i = 0; i < len; i++) {
2316         p = list[i];
2317
2318         /* The whole 3.5 jumbogram scheme relies on packets fitting
2319          * in a single packet buffer. */
2320         if (p->niovecs > 2) {
2321             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2322         }
2323
2324         /* Set the RX_JUMBO_PACKET flags in all but the last packets
2325          * in this chunk.  */
2326         if (i < len - 1) {
2327             if (p->length != RX_JUMBOBUFFERSIZE) {
2328                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2329             }
2330             p->header.flags |= RX_JUMBO_PACKET;
2331             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2332             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2333         } else {
2334             wirevec[i + 1].iov_len = p->length;
2335             length += p->length;
2336         }
2337         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2338         if (jp != NULL) {
2339             /* Convert jumbo packet header to network byte order */
2340             temp = (afs_uint32) (p->header.flags) << 24;
2341             temp |= (afs_uint32) (p->header.spare);
2342             *(afs_uint32 *) jp = htonl(temp);
2343         }
2344         jp = (struct rx_jumboHeader *)
2345             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2346
2347         /* Stamp each packet with a unique serial number.  The serial
2348          * number is maintained on a connection basis because some types
2349          * of security may be based on the serial number of the packet,
2350          * and security is handled on a per authenticated-connection
2351          * basis. */
2352         /* Pre-increment, to guarantee no zero serial number; a zero
2353          * serial number means the packet was never sent. */
2354         p->header.serial = ++serial;
2355         /* This is so we can adjust retransmit time-outs better in the face of
2356          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2357          */
2358         if (p->firstSerial == 0) {
2359             p->firstSerial = p->header.serial;
2360         }
2361 #ifdef RXDEBUG
2362         /* If an output tracer function is defined, call it with the packet and
2363          * network address.  Note this function may modify its arguments. */
2364         if (rx_almostSent) {
2365             int drop = (*rx_almostSent) (p, &addr);
2366             /* drop packet if return value is non-zero? */
2367             if (drop)
2368                 deliveryType = 'D';     /* Drop the packet */
2369         }
2370 #endif
2371
2372         /* Get network byte order header */
2373         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
2374                                          * touch ALL the fields */
2375     }
2376
2377     /* Send the packet out on the same socket that related packets are being
2378      * received on */
2379     socket =
2380         (conn->type ==
2381          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2382
2383 #ifdef RXDEBUG
2384     /* Possibly drop this packet,  for testing purposes */
2385     if ((deliveryType == 'D')
2386         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2387             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2388         deliveryType = 'D';     /* Drop the packet */
2389     } else {
2390         deliveryType = 'S';     /* Send the packet */
2391 #endif /* RXDEBUG */
2392
2393         /* Loop until the packet is sent.  We'd prefer just to use a
2394          * blocking socket, but unfortunately the interface doesn't
2395          * allow us to have the socket block in send mode, and not
2396          * block in receive mode */
2397 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2398         waslocked = ISAFS_GLOCK();
2399         if (!istack && waslocked)
2400             AFS_GUNLOCK();
2401 #endif
2402         if ((code =
2403              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2404                          istack)) != 0) {
2405             /* send failed, so let's hurry up the resend, eh? */
2406             MUTEX_ENTER(&rx_stats_mutex);
2407             rx_stats.netSendFailures++;
2408             MUTEX_EXIT(&rx_stats_mutex);
2409             for (i = 0; i < len; i++) {
2410                 p = list[i];
2411                 p->retryTime = p->timeSent;     /* resend it very soon */
2412                 clock_Addmsec(&(p->retryTime),
2413                               10 + (((afs_uint32) p->backoff) << 8));
2414             }
2415             /* Some systems are nice and tell us right away that we cannot
2416              * reach this recipient by returning an error code.
2417              * So, when this happens let's "down" the host NOW so
2418              * we don't sit around waiting for this host to timeout later.
2419              */
2420             if (call &&
2421 #ifdef AFS_NT40_ENV
2422                 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2423 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2424                 code == -ENETUNREACH
2425 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2426                 code == EHOSTUNREACH
2427 #else
2428                 0
2429 #endif
2430                 )
2431                 call->lastReceiveTime = 0;
2432         }
2433 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2434         if (!istack && waslocked)
2435             AFS_GLOCK();
2436 #endif
2437 #ifdef RXDEBUG
2438     }
2439
2440     assert(p != NULL);
2441
2442     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2443
2444 #endif
2445     MUTEX_ENTER(&rx_stats_mutex);
2446     rx_stats.packetsSent[p->header.type - 1]++;
2447     MUTEX_EXIT(&rx_stats_mutex);
2448     MUTEX_ENTER(&peer->peer_lock);
2449
2450     hadd32(peer->bytesSent, p->length);
2451     MUTEX_EXIT(&peer->peer_lock);
2452 }
2453
2454
2455 /* Send a "special" packet to the peer connection.  If call is
2456  * specified, then the packet is directed to a specific call channel
2457  * associated with the connection, otherwise it is directed to the
2458  * connection only. Uses optionalPacket if it is supplied, rather than
2459  * allocating a new packet buffer.  Nbytes is the length of the data
2460  * portion of the packet.  If data is non-null, nbytes of data are
2461  * copied into the packet.  Type is the type of the packet, as defined
2462  * in rx.h.  Bug: there's a lot of duplication between this and other
2463  * routines.  This needs to be cleaned up. */
2464 struct rx_packet *
2465 rxi_SendSpecial(register struct rx_call *call,
2466                 register struct rx_connection *conn,
2467                 struct rx_packet *optionalPacket, int type, char *data,
2468                 int nbytes, int istack)
2469 {
2470     /* Some of the following stuff should be common code for all
2471      * packet sends (it's repeated elsewhere) */
2472     register struct rx_packet *p;
2473     unsigned int i = 0;
2474     int savelen = 0, saven = 0;
2475     int channel, callNumber;
2476     if (call) {
2477         channel = call->channel;
2478         callNumber = *call->callNumber;
2479         /* BUSY packets refer to the next call on this connection */
2480         if (type == RX_PACKET_TYPE_BUSY) {
2481             callNumber++;
2482         }
2483     } else {
2484         channel = 0;
2485         callNumber = 0;
2486     }
2487     p = optionalPacket;
2488     if (!p) {
2489         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2490         if (!p)
2491             osi_Panic("rxi_SendSpecial failure");
2492     }
2493
2494     if (nbytes != -1)
2495         p->length = nbytes;
2496     else
2497         nbytes = p->length;
2498     p->header.serviceId = conn->serviceId;
2499     p->header.securityIndex = conn->securityIndex;
2500     p->header.cid = (conn->cid | channel);
2501     p->header.callNumber = callNumber;
2502     p->header.seq = 0;
2503     p->header.epoch = conn->epoch;
2504     p->header.type = type;
2505     p->header.flags = 0;
2506     if (conn->type == RX_CLIENT_CONNECTION)
2507         p->header.flags |= RX_CLIENT_INITIATED;
2508     if (data)
2509         rx_packetwrite(p, 0, nbytes, data);
2510
2511     for (i = 1; i < p->niovecs; i++) {
2512         if (nbytes <= p->wirevec[i].iov_len) {
2513             savelen = p->wirevec[i].iov_len;
2514             saven = p->niovecs;
2515             p->wirevec[i].iov_len = nbytes;
2516             p->niovecs = i + 1; /* so condition fails because i == niovecs */
2517         } else
2518             nbytes -= p->wirevec[i].iov_len;
2519     }
2520
2521     if (call)
2522         rxi_Send(call, p, istack);
2523     else
2524         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2525     if (saven) {                /* means we truncated the packet above.  We probably don't  */
2526         /* really need to do this, but it seems safer this way, given that  */
2527         /* sneaky optionalPacket... */
2528         p->wirevec[i - 1].iov_len = savelen;
2529         p->niovecs = saven;
2530     }
2531     if (!optionalPacket)
2532         rxi_FreePacket(p);
2533     return optionalPacket;
2534 }
2535
2536
2537 /* Encode the packet's header (from the struct header in the packet to
2538  * the net byte order representation in the wire representation of the
2539  * packet, which is what is actually sent out on the wire) */
2540 void
2541 rxi_EncodePacketHeader(register struct rx_packet *p)
2542 {
2543     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2544
2545     memset((char *)buf, 0, RX_HEADER_SIZE);
2546     *buf++ = htonl(p->header.epoch);
2547     *buf++ = htonl(p->header.cid);
2548     *buf++ = htonl(p->header.callNumber);
2549     *buf++ = htonl(p->header.seq);
2550     *buf++ = htonl(p->header.serial);
2551     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2552                    | (((afs_uint32) p->header.flags) << 16)
2553                    | (p->header.userStatus << 8) | p->header.securityIndex);
2554     /* Note: top 16 bits of this next word were reserved */
2555     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2556 }
2557
2558 /* Decode the packet's header (from net byte order to a struct header) */
2559 void
2560 rxi_DecodePacketHeader(register struct rx_packet *p)
2561 {
2562     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2563     afs_uint32 temp;
2564
2565     p->header.epoch = ntohl(*buf);
2566     buf++;
2567     p->header.cid = ntohl(*buf);
2568     buf++;
2569     p->header.callNumber = ntohl(*buf);
2570     buf++;
2571     p->header.seq = ntohl(*buf);
2572     buf++;
2573     p->header.serial = ntohl(*buf);
2574     buf++;
2575
2576     temp = ntohl(*buf);
2577     buf++;
2578
2579     /* C will truncate byte fields to bytes for me */
2580     p->header.type = temp >> 24;
2581     p->header.flags = temp >> 16;
2582     p->header.userStatus = temp >> 8;
2583     p->header.securityIndex = temp >> 0;
2584
2585     temp = ntohl(*buf);
2586     buf++;
2587
2588     p->header.serviceId = (temp & 0xffff);
2589     p->header.spare = temp >> 16;
2590     /* Note: top 16 bits of this last word are the security checksum */
2591 }
2592
2593 void
2594 rxi_PrepareSendPacket(register struct rx_call *call,
2595                       register struct rx_packet *p, register int last)
2596 {
2597     register struct rx_connection *conn = call->conn;
2598     int i, j;
2599     ssize_t len;                /* len must be a signed type; it can go negative */
2600
2601     p->flags &= ~RX_PKTFLAG_ACKED;
2602     p->header.cid = (conn->cid | call->channel);
2603     p->header.serviceId = conn->serviceId;
2604     p->header.securityIndex = conn->securityIndex;
2605
2606     /* No data packets on call 0. Where do these come from? */
2607     if (*call->callNumber == 0)
2608         *call->callNumber = 1;
2609
2610     p->header.callNumber = *call->callNumber;
2611     p->header.seq = call->tnext++;
2612     p->header.epoch = conn->epoch;
2613     p->header.type = RX_PACKET_TYPE_DATA;
2614     p->header.flags = 0;
2615     p->header.spare = 0;
2616     if (conn->type == RX_CLIENT_CONNECTION)
2617         p->header.flags |= RX_CLIENT_INITIATED;
2618
2619     if (last)
2620         p->header.flags |= RX_LAST_PACKET;
2621
2622     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2623     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2624     p->header.serial = 0;       /* Another way of saying never transmitted... */
2625     p->backoff = 0;
2626
2627     /* Now that we're sure this is the last data on the call, make sure
2628      * that the "length" and the sum of the iov_lens matches. */
2629     len = p->length + call->conn->securityHeaderSize;
2630
2631     for (i = 1; i < p->niovecs && len > 0; i++) {
2632         len -= p->wirevec[i].iov_len;
2633     }
2634     if (len > 0) {
2635         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2636     } else if (i < p->niovecs) {
2637         /* Free any extra elements in the wirevec */
2638 #if defined(RX_ENABLE_TSFPQ)
2639         rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2640 #else /* !RX_ENABLE_TSFPQ */
2641         MUTEX_ENTER(&rx_freePktQ_lock);
2642         rxi_FreeDataBufsNoLock(p, i);
2643         MUTEX_EXIT(&rx_freePktQ_lock);
2644 #endif /* !RX_ENABLE_TSFPQ */
2645
2646         p->niovecs = i;
2647     }
2648     p->wirevec[i - 1].iov_len += len;
2649     RXS_PreparePacket(conn->securityObject, call, p);
2650 }
2651
2652 /* Given an interface MTU size, calculate an adjusted MTU size that
2653  * will make efficient use of the RX buffers when the peer is sending
2654  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2655 int
2656 rxi_AdjustIfMTU(int mtu)
2657 {
2658     int adjMTU;
2659     int frags;
2660
2661     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2662     if (mtu <= adjMTU) {
2663         return mtu;
2664     }
2665     mtu -= adjMTU;
2666     if (mtu <= 0) {
2667         return adjMTU;
2668     }
2669     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2670     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2671 }
2672
2673 /* Given an interface MTU size, and the peer's advertised max receive
2674  * size, calculate an adjisted maxMTU size that makes efficient use
2675  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2676 int
2677 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2678 {
2679     int maxMTU = mtu * rxi_nSendFrags;
2680     maxMTU = MIN(maxMTU, peerMaxMTU);
2681     return rxi_AdjustIfMTU(maxMTU);
2682 }
2683
2684 /* Given a packet size, figure out how many datagram packet will fit.
2685  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2686  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2687  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2688 int
2689 rxi_AdjustDgramPackets(int frags, int mtu)
2690 {
2691     int maxMTU;
2692     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2693         return 1;
2694     }
2695     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2696     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2697     /* subtract the size of the first and last packets */
2698     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2699     if (maxMTU < 0) {
2700         return 1;
2701     }
2702     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2703 }