src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_user.h"
  75 #include "rx_xmit_nt.h"
  76 #include <stdlib.h>
  77 #else
  78 #include <sys/socket.h>
  79 #include <netinet/in.h>
  80 #endif
  81 #include "rx_clock.h"
  82 #include "rx.h"
  83 #include "rx_queue.h"
  84 #ifdef  AFS_SUN5_ENV
  85 #include <sys/sysmacros.h>
  86 #endif
  87 #include "rx_packet.h"
  88 #include "rx_globals.h"
  89 #include <lwp.h>
  90 #include <assert.h>
  91 #include <string.h>
  92 #ifdef HAVE_UNISTD_H
  93 #include <unistd.h>
  94 #endif
  95 #endif /* KERNEL */
  96
  97 #ifdef RX_LOCKS_DB
  98 /* rxdb_fileID is used to identify the lock location, along with line#. */
  99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 100 #endif /* RX_LOCKS_DB */
 101 struct rx_packet *rx_mallocedP = 0;
 102
 103 extern char cml_version_number[];
 104
 105 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
 106
 107 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 108                                 afs_int32 ahost, short aport,
 109                                 afs_int32 istack);
 110
 111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
 112                                    afs_uint32 first,
 113                                    struct rx_queue * q);
 114 #ifdef RX_ENABLE_TSFPQ
 115 static int
 116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
 117 #endif
 118
 119 /* some rules about packets:
 120  * 1.  When a packet is allocated, the final iov_buf contains room for
 121  * a security trailer, but iov_len masks that fact.  If the security
 122  * package wants to add the trailer, it may do so, and then extend
 123  * iov_len appropriately.  For this reason, packet's niovecs and
 124  * iov_len fields should be accurate before calling PreparePacket.
 125 */
 126
 127 /* Preconditions:
 128  *        all packet buffers (iov_base) are integral multiples of
 129  *        the word size.
 130  *        offset is an integral multiple of the word size.
 131  */
 132 afs_int32
 133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 134 {
 135     unsigned int i;
 136     size_t l;
 137     for (l = 0, i = 1; i < packet->niovecs; i++) {
 138         if (l + packet->wirevec[i].iov_len > offset) {
 139             return
 140                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 141                                  (offset - l)));
 142         }
 143         l += packet->wirevec[i].iov_len;
 144     }
 145
 146     return 0;
 147 }
 148
 149 /* Preconditions:
 150  *        all packet buffers (iov_base) are integral multiples of the word size.
 151  *        offset is an integral multiple of the word size.
 152  */
 153 afs_int32
 154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 155 {
 156     unsigned int i;
 157     size_t l;
 158     for (l = 0, i = 1; i < packet->niovecs; i++) {
 159         if (l + packet->wirevec[i].iov_len > offset) {
 160             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 161                              (offset - l))) = data;
 162             return 0;
 163         }
 164         l += packet->wirevec[i].iov_len;
 165     }
 166
 167     return 0;
 168 }
 169
 170 /* Preconditions:
 171  *        all packet buffers (iov_base) are integral multiples of the
 172  *        word size.
 173  *        offset is an integral multiple of the word size.
 174  * Packet Invariants:
 175  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 176  */
 177 afs_int32
 178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 179                   char *out)
 180 {
 181     unsigned int i, j, l, r;
 182     for (l = 0, i = 1; i < packet->niovecs; i++) {
 183         if (l + packet->wirevec[i].iov_len > offset) {
 184             break;
 185         }
 186         l += packet->wirevec[i].iov_len;
 187     }
 188
 189     /* i is the iovec which contains the first little bit of data in which we
 190      * are interested.  l is the total length of everything prior to this iovec.
 191      * j is the number of bytes we can safely copy out of this iovec.
 192      * offset only applies to the first iovec.
 193      */
 194     r = resid;
 195     while ((resid > 0) && (i < packet->niovecs)) {
 196         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 197         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 198         resid -= j;
 199         out += j;
 200         l += packet->wirevec[i].iov_len;
 201         offset = l;
 202         i++;
 203     }
 204
 205     return (resid ? (r - resid) : r);
 206 }
 207
 208
 209 /* Preconditions:
 210  *        all packet buffers (iov_base) are integral multiples of the
 211  *        word size.
 212  *        offset is an integral multiple of the word size.
 213  */
 214 afs_int32
 215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 216 {
 217     int i, j, l, r;
 218     char *b;
 219
 220     for (l = 0, i = 1; i < packet->niovecs; i++) {
 221         if (l + packet->wirevec[i].iov_len > offset) {
 222             break;
 223         }
 224         l += packet->wirevec[i].iov_len;
 225     }
 226
 227     /* i is the iovec which contains the first little bit of data in which we
 228      * are interested.  l is the total length of everything prior to this iovec.
 229      * j is the number of bytes we can safely copy out of this iovec.
 230      * offset only applies to the first iovec.
 231      */
 232     r = resid;
 233     while ((resid > 0) && (i < RX_MAXWVECS)) {
 234         if (i >= packet->niovecs)
 235             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 236                 break;
 237
 238         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 239         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 240         memcpy(b, in, j);
 241         resid -= j;
 242         in += j;
 243         l += packet->wirevec[i].iov_len;
 244         offset = l;
 245         i++;
 246     }
 247
 248     return (resid ? (r - resid) : r);
 249 }
 250
 251 int
 252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
 253 {
 254     register struct rx_packet *p, *np;
 255
 256     num_pkts = AllocPacketBufs(class, num_pkts, q);
 257
 258     for (queue_Scan(q, p, np, rx_packet)) {
 259         RX_PACKET_IOV_FULLINIT(p);
 260     }
 261
 262     return num_pkts;
 263 }
 264
 265 #ifdef RX_ENABLE_TSFPQ
 266 static int
 267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 268 {
 269     register struct rx_ts_info_t * rx_ts_info;
 270     int transfer, alloc;
 271     SPLVAR;
 272
 273     RX_TS_INFO_GET(rx_ts_info);
 274
 275     transfer = num_pkts - rx_ts_info->_FPQ.len;
 276     if (transfer > 0) {
 277         NETPRI;
 278         MUTEX_ENTER(&rx_freePktQ_lock);
 279
 280         if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
 281             transfer += rx_TSFPQGlobSize;
 282         } else if (transfer <= rx_nFreePackets) {
 283             transfer = rx_nFreePackets;
 284         } else {
 285             /* alloc enough for us, plus a few globs for other threads */
 286             alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
 287             rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
 288             transfer = rx_TSFPQGlobSize;
 289         }
 290
 291         RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
 292
 293         MUTEX_EXIT(&rx_freePktQ_lock);
 294         USERPRI;
 295     }
 296
 297     RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
 298
 299     return num_pkts;
 300 }
 301 #else /* RX_ENABLE_TSFPQ */
 302 static int
 303 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 304 {
 305     struct rx_packet *c;
 306     int i;
 307 #ifdef KERNEL
 308     int overq = 0;
 309 #endif
 310     SPLVAR;
 311
 312     NETPRI;
 313
 314     MUTEX_ENTER(&rx_freePktQ_lock);
 315
 316 #ifdef KERNEL
 317     for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
 318          num_pkts--, overq++);
 319
 320     if (overq) {
 321         rxi_NeedMorePackets = TRUE;
 322         switch (class) {
 323         case RX_PACKET_CLASS_RECEIVE:
 324             rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
 325             break;
 326         case RX_PACKET_CLASS_SEND:
 327             rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
 328             break;
 329         case RX_PACKET_CLASS_SPECIAL:
 330             rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
 331             break;
 332         case RX_PACKET_CLASS_RECV_CBUF:
 333             rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
 334             break;
 335         case RX_PACKET_CLASS_SEND_CBUF:
 336             rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
 337             break;
 338         }
 339     }
 340
 341     if (rx_nFreePackets < num_pkts)
 342         num_pkts = rx_nFreePackets;
 343
 344     if (!num_pkts) {
 345         rxi_NeedMorePackets = TRUE;
 346         goto done;
 347     }
 348 #else /* KERNEL */
 349     if (rx_nFreePackets < num_pkts) {
 350         rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
 351     }
 352 #endif /* KERNEL */
 353
 354     for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
 355          i < num_pkts;
 356          i++, c=queue_Next(c, rx_packet)) {
 357         RX_FPQ_MARK_USED(c);
 358     }
 359
 360     queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
 361
 362     rx_nFreePackets -= num_pkts;
 363
 364 #ifdef KERNEL
 365   done:
 366 #endif
 367     MUTEX_EXIT(&rx_freePktQ_lock);
 368
 369     USERPRI;
 370     return num_pkts;
 371 }
 372 #endif /* RX_ENABLE_TSFPQ */
 373
 374 /*
 375  * Free a packet currently used as a continuation buffer
 376  */
 377 #ifdef RX_ENABLE_TSFPQ
 378 /* num_pkts=0 means queue length is unknown */
 379 int
 380 rxi_FreePackets(int num_pkts, struct rx_queue * q)
 381 {
 382     register struct rx_ts_info_t * rx_ts_info;
 383     register struct rx_packet *c, *nc;
 384     SPLVAR;
 385
 386     osi_Assert(num_pkts >= 0);
 387     RX_TS_INFO_GET(rx_ts_info);
 388
 389     if (!num_pkts) {
 390         for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
 391             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 392         }
 393     } else {
 394         for (queue_Scan(q, c, nc, rx_packet)) {
 395             rxi_FreeDataBufsTSFPQ(c, 2, 0);
 396         }
 397     }
 398
 399     if (num_pkts) {
 400         RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
 401     }
 402
 403     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 404         NETPRI;
 405         MUTEX_ENTER(&rx_freePktQ_lock);
 406
 407         RX_TS_FPQ_LTOG(rx_ts_info);
 408
 409         /* Wakeup anyone waiting for packets */
 410         rxi_PacketsUnWait();
 411
 412         MUTEX_EXIT(&rx_freePktQ_lock);
 413         USERPRI;
 414     }
 415
 416     return num_pkts;
 417 }
 418 #else /* RX_ENABLE_TSFPQ */
 419 /* num_pkts=0 means queue length is unknown */
 420 int
 421 rxi_FreePackets(int num_pkts, struct rx_queue *q)
 422 {
 423     struct rx_queue cbs;
 424     register struct rx_packet *p, *np;
 425     int qlen = 0;
 426     SPLVAR;
 427
 428     osi_Assert(num_pkts >= 0);
 429     queue_Init(&cbs);
 430
 431     if (!num_pkts) {
 432         for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
 433             if (p->niovecs > 2) {
 434                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 435             }
 436             RX_FPQ_MARK_FREE(p);
 437         }
 438         if (!num_pkts)
 439             return 0;
 440     } else {
 441         for (queue_Scan(q, p, np, rx_packet)) {
 442             if (p->niovecs > 2) {
 443                 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
 444             }
 445             RX_FPQ_MARK_FREE(p);
 446         }
 447     }
 448
 449     if (qlen) {
 450         queue_SpliceAppend(q, &cbs);
 451         qlen += num_pkts;
 452     } else
 453         qlen = num_pkts;
 454
 455     NETPRI;
 456     MUTEX_ENTER(&rx_freePktQ_lock);
 457
 458     queue_SpliceAppend(&rx_freePacketQueue, q);
 459     rx_nFreePackets += qlen;
 460
 461     /* Wakeup anyone waiting for packets */
 462     rxi_PacketsUnWait();
 463
 464     MUTEX_EXIT(&rx_freePktQ_lock);
 465     USERPRI;
 466
 467     return num_pkts;
 468 }
 469 #endif /* RX_ENABLE_TSFPQ */
 470
 471 /* this one is kind of awful.
 472  * In rxkad, the packet has been all shortened, and everything, ready for
 473  * sending.  All of a sudden, we discover we need some of that space back.
 474  * This isn't terribly general, because it knows that the packets are only
 475  * rounded up to the EBS (userdata + security header).
 476  */
 477 int
 478 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 479 {
 480     int i;
 481     i = p->niovecs - 1;
 482     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 483         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 484             p->wirevec[i].iov_len += nb;
 485             return 0;
 486         }
 487     } else {
 488         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 489             p->wirevec[i].iov_len += nb;
 490             return 0;
 491         }
 492     }
 493
 494     return 0;
 495 }
 496
 497 /* get sufficient space to store nb bytes of data (or more), and hook
 498  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 499  * returns the number of bytes >0 which it failed to come up with.
 500  * Don't need to worry about locking on packet, since only
 501  * one thread can manipulate one at a time. Locking on continution
 502  * packets is handled by AllocPacketBufs */
 503 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 504 int
 505 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 506 {
 507     int i, nv;
 508     struct rx_queue q;
 509     register struct rx_packet *cb, *ncb;
 510
 511     /* compute the number of cbuf's we need */
 512     nv = nb / RX_CBUFFERSIZE;
 513     if ((nv * RX_CBUFFERSIZE) < nb)
 514         nv++;
 515     if ((nv + p->niovecs) > RX_MAXWVECS)
 516         nv = RX_MAXWVECS - p->niovecs;
 517     if (nv < 1)
 518         return nb;
 519
 520     /* allocate buffers */
 521     queue_Init(&q);
 522     nv = AllocPacketBufs(class, nv, &q);
 523
 524     /* setup packet iovs */
 525     for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
 526         queue_Remove(cb);
 527         p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 528         p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 529     }
 530
 531     nb -= (nv * RX_CBUFFERSIZE);
 532     p->length += (nv * RX_CBUFFERSIZE);
 533     p->niovecs += nv;
 534
 535     return nb;
 536 }
 537
 538 /* Add more packet buffers */
 539 #ifdef RX_ENABLE_TSFPQ
 540 void
 541 rxi_MorePackets(int apackets)
 542 {
 543     struct rx_packet *p, *e;
 544     register struct rx_ts_info_t * rx_ts_info;
 545     int getme;
 546     SPLVAR;
 547
 548     getme = apackets * sizeof(struct rx_packet);
 549     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 550     osi_Assert(p);
 551
 552     PIN(p, getme);              /* XXXXX */
 553     memset((char *)p, 0, getme);
 554     RX_TS_INFO_GET(rx_ts_info);
 555
 556     for (e = p + apackets; p < e; p++) {
 557         RX_PACKET_IOV_INIT(p);
 558         p->niovecs = 2;
 559
 560         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 561     }
 562     rx_ts_info->_FPQ.delta += apackets;
 563
 564     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 565         NETPRI;
 566         MUTEX_ENTER(&rx_freePktQ_lock);
 567
 568         RX_TS_FPQ_LTOG(rx_ts_info);
 569         rxi_NeedMorePackets = FALSE;
 570         rxi_PacketsUnWait();
 571
 572         MUTEX_EXIT(&rx_freePktQ_lock);
 573         USERPRI;
 574     }
 575 }
 576 #else /* RX_ENABLE_TSFPQ */
 577 void
 578 rxi_MorePackets(int apackets)
 579 {
 580     struct rx_packet *p, *e;
 581     int getme;
 582     SPLVAR;
 583
 584     getme = apackets * sizeof(struct rx_packet);
 585     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 586     osi_Assert(p);
 587
 588     PIN(p, getme);              /* XXXXX */
 589     memset((char *)p, 0, getme);
 590     NETPRI;
 591     MUTEX_ENTER(&rx_freePktQ_lock);
 592
 593     for (e = p + apackets; p < e; p++) {
 594         RX_PACKET_IOV_INIT(p);
 595         p->flags |= RX_PKTFLAG_FREE;
 596         p->niovecs = 2;
 597
 598         queue_Append(&rx_freePacketQueue, p);
 599     }
 600     rx_nFreePackets += apackets;
 601     rxi_NeedMorePackets = FALSE;
 602     rxi_PacketsUnWait();
 603
 604     MUTEX_EXIT(&rx_freePktQ_lock);
 605     USERPRI;
 606 }
 607 #endif /* RX_ENABLE_TSFPQ */
 608
 609 #ifdef RX_ENABLE_TSFPQ
 610 void
 611 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
 612 {
 613     struct rx_packet *p, *e;
 614     register struct rx_ts_info_t * rx_ts_info;
 615     int getme;
 616     SPLVAR;
 617
 618     getme = apackets * sizeof(struct rx_packet);
 619     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 620
 621     PIN(p, getme);              /* XXXXX */
 622     memset((char *)p, 0, getme);
 623     RX_TS_INFO_GET(rx_ts_info);
 624
 625     for (e = p + apackets; p < e; p++) {
 626         RX_PACKET_IOV_INIT(p);
 627         p->niovecs = 2;
 628
 629         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 630     }
 631     rx_ts_info->_FPQ.delta += apackets;
 632
 633     if (flush_global &&
 634         (num_keep_local < apackets)) {
 635         NETPRI;
 636         MUTEX_ENTER(&rx_freePktQ_lock);
 637
 638         RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
 639         rxi_NeedMorePackets = FALSE;
 640         rxi_PacketsUnWait();
 641
 642         MUTEX_EXIT(&rx_freePktQ_lock);
 643         USERPRI;
 644     }
 645 }
 646 #endif /* RX_ENABLE_TSFPQ */
 647
 648 #ifndef KERNEL
 649 /* Add more packet buffers */
 650 void
 651 rxi_MorePacketsNoLock(int apackets)
 652 {
 653     struct rx_packet *p, *e;
 654     int getme;
 655
 656     /* allocate enough packets that 1/4 of the packets will be able
 657      * to hold maximal amounts of data */
 658     apackets += (apackets / 4)
 659         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 660     do {
 661         getme = apackets * sizeof(struct rx_packet);
 662         p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 663         if (p == NULL) {
 664             apackets -= apackets / 4;
 665             osi_Assert(apackets > 0);
 666         }
 667     } while(p == NULL);
 668     memset((char *)p, 0, getme);
 669
 670     for (e = p + apackets; p < e; p++) {
 671         RX_PACKET_IOV_INIT(p);
 672         p->flags |= RX_PKTFLAG_FREE;
 673         p->niovecs = 2;
 674
 675         queue_Append(&rx_freePacketQueue, p);
 676     }
 677
 678     rx_nFreePackets += apackets;
 679 #ifdef RX_ENABLE_TSFPQ
 680     /* TSFPQ patch also needs to keep track of total packets */
 681     MUTEX_ENTER(&rx_stats_mutex);
 682     rx_nPackets += apackets;
 683     RX_TS_FPQ_COMPUTE_LIMITS;
 684     MUTEX_EXIT(&rx_stats_mutex);
 685 #endif /* RX_ENABLE_TSFPQ */
 686     rxi_NeedMorePackets = FALSE;
 687     rxi_PacketsUnWait();
 688 }
 689 #endif /* !KERNEL */
 690
 691 void
 692 rxi_FreeAllPackets(void)
 693 {
 694     /* must be called at proper interrupt level, etcetera */
 695     /* MTUXXX need to free all Packets */
 696     osi_Free(rx_mallocedP,
 697              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 698     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 699 }
 700
 701 #ifdef RX_ENABLE_TSFPQ
 702 void
 703 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
 704 {
 705     register struct rx_ts_info_t * rx_ts_info;
 706     register int xfer;
 707     SPLVAR;
 708
 709     RX_TS_INFO_GET(rx_ts_info);
 710
 711     if (num_keep_local != rx_ts_info->_FPQ.len) {
 712         NETPRI;
 713         MUTEX_ENTER(&rx_freePktQ_lock);
 714         if (num_keep_local < rx_ts_info->_FPQ.len) {
 715             xfer = rx_ts_info->_FPQ.len - num_keep_local;
 716             RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
 717             rxi_PacketsUnWait();
 718         } else {
 719             xfer = num_keep_local - rx_ts_info->_FPQ.len;
 720             if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
 721                 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
 722             if (rx_nFreePackets < xfer) {
 723                 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
 724             }
 725             RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
 726         }
 727         MUTEX_EXIT(&rx_freePktQ_lock);
 728         USERPRI;
 729     }
 730 }
 731
 732 void
 733 rxi_FlushLocalPacketsTSFPQ(void)
 734 {
 735     rxi_AdjustLocalPacketsTSFPQ(0, 0);
 736 }
 737 #endif /* RX_ENABLE_TSFPQ */
 738
 739 /* Allocate more packets iff we need more continuation buffers */
 740 /* In kernel, can't page in memory with interrupts disabled, so we
 741  * don't use the event mechanism. */
 742 void
 743 rx_CheckPackets(void)
 744 {
 745     if (rxi_NeedMorePackets) {
 746         rxi_MorePackets(rx_initSendWindow);
 747     }
 748 }
 749
 750 /* In the packet freeing routine below, the assumption is that
 751    we want all of the packets to be used equally frequently, so that we
 752    don't get packet buffers paging out.  It would be just as valid to
 753    assume that we DO want them to page out if not many are being used.
 754    In any event, we assume the former, and append the packets to the end
 755    of the free list.  */
 756 /* This explanation is bogus.  The free list doesn't remain in any kind of
 757    useful order for afs_int32: the packets in use get pretty much randomly scattered
 758    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 759    must be stored so that packets which are adjacent in memory are adjacent in the
 760    free list.  An array springs rapidly to mind.
 761    */
 762
 763 /* Actually free the packet p. */
 764 #ifdef RX_ENABLE_TSFPQ
 765 void
 766 rxi_FreePacketNoLock(struct rx_packet *p)
 767 {
 768     register struct rx_ts_info_t * rx_ts_info;
 769     dpf(("Free %lx\n", (unsigned long)p));
 770
 771     RX_TS_INFO_GET(rx_ts_info);
 772     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 773     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 774         RX_TS_FPQ_LTOG(rx_ts_info);
 775     }
 776 }
 777 #else /* RX_ENABLE_TSFPQ */
 778 void
 779 rxi_FreePacketNoLock(struct rx_packet *p)
 780 {
 781     dpf(("Free %lx\n", (unsigned long)p));
 782
 783     RX_FPQ_MARK_FREE(p);
 784     rx_nFreePackets++;
 785     queue_Append(&rx_freePacketQueue, p);
 786 }
 787 #endif /* RX_ENABLE_TSFPQ */
 788
 789 #ifdef RX_ENABLE_TSFPQ
 790 void
 791 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
 792 {
 793     register struct rx_ts_info_t * rx_ts_info;
 794     dpf(("Free %lx\n", (unsigned long)p));
 795
 796     RX_TS_INFO_GET(rx_ts_info);
 797     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 798
 799     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 800         NETPRI;
 801         MUTEX_ENTER(&rx_freePktQ_lock);
 802
 803         RX_TS_FPQ_LTOG(rx_ts_info);
 804
 805         /* Wakeup anyone waiting for packets */
 806         rxi_PacketsUnWait();
 807
 808         MUTEX_EXIT(&rx_freePktQ_lock);
 809         USERPRI;
 810     }
 811 }
 812 #endif /* RX_ENABLE_TSFPQ */
 813
 814 /*
 815  * free continuation buffers off a packet into a queue
 816  *
 817  * [IN] p      -- packet from which continuation buffers will be freed
 818  * [IN] first  -- iovec offset of first continuation buffer to free
 819  * [IN] q      -- queue into which continuation buffers will be chained
 820  *
 821  * returns:
 822  *   number of continuation buffers freed
 823  */
 824 #ifndef RX_ENABLE_TSFPQ
 825 static int
 826 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
 827 {
 828     struct iovec *iov;
 829     struct rx_packet * cb;
 830     int count = 0;
 831
 832     for (first = MAX(2, first); first < p->niovecs; first++, count++) {
 833         iov = &p->wirevec[first];
 834         if (!iov->iov_base)
 835             osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
 836         cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
 837         RX_FPQ_MARK_FREE(cb);
 838         queue_Append(q, cb);
 839     }
 840     p->length = 0;
 841     p->niovecs = 0;
 842
 843     return count;
 844 }
 845 #endif
 846
 847 /*
 848  * free packet continuation buffers into the global free packet pool
 849  *
 850  * [IN] p      -- packet from which to free continuation buffers
 851  * [IN] first  -- iovec offset of first continuation buffer to free
 852  *
 853  * returns:
 854  *   zero always
 855  */
 856 int
 857 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
 858 {
 859     struct iovec *iov;
 860
 861     for (first = MAX(2, first); first < p->niovecs; first++) {
 862         iov = &p->wirevec[first];
 863         if (!iov->iov_base)
 864             osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
 865         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 866     }
 867     p->length = 0;
 868     p->niovecs = 0;
 869
 870     return 0;
 871 }
 872
 873 #ifdef RX_ENABLE_TSFPQ
 874 /*
 875  * free packet continuation buffers into the thread-local free pool
 876  *
 877  * [IN] p             -- packet from which continuation buffers will be freed
 878  * [IN] first         -- iovec offset of first continuation buffer to free
 879  * [IN] flush_global  -- if nonzero, we will flush overquota packets to the
 880  *                       global free pool before returning
 881  *
 882  * returns:
 883  *   zero always
 884  */
 885 static int
 886 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
 887 {
 888     struct iovec *iov;
 889     register struct rx_ts_info_t * rx_ts_info;
 890
 891     RX_TS_INFO_GET(rx_ts_info);
 892
 893     for (first = MAX(2, first); first < p->niovecs; first++) {
 894         iov = &p->wirevec[first];
 895         if (!iov->iov_base)
 896             osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
 897         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 898     }
 899     p->length = 0;
 900     p->niovecs = 0;
 901
 902     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 903         NETPRI;
 904         MUTEX_ENTER(&rx_freePktQ_lock);
 905
 906         RX_TS_FPQ_LTOG(rx_ts_info);
 907
 908         /* Wakeup anyone waiting for packets */
 909         rxi_PacketsUnWait();
 910
 911         MUTEX_EXIT(&rx_freePktQ_lock);
 912         USERPRI;
 913     }
 914     return 0;
 915 }
 916 #endif /* RX_ENABLE_TSFPQ */
 917
 918 int rxi_nBadIovecs = 0;
 919
 920 /* rxi_RestoreDataBufs
 921  *
 922  * Restore the correct sizes to the iovecs. Called when reusing a packet
 923  * for reading off the wire.
 924  */
 925 void
 926 rxi_RestoreDataBufs(struct rx_packet *p)
 927 {
 928     int i;
 929     struct iovec *iov = &p->wirevec[2];
 930
 931     RX_PACKET_IOV_INIT(p);
 932
 933     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 934         if (!iov->iov_base) {
 935             rxi_nBadIovecs++;
 936             p->niovecs = i;
 937             break;
 938         }
 939         iov->iov_len = RX_CBUFFERSIZE;
 940     }
 941 }
 942
 943 #ifdef RX_ENABLE_TSFPQ
 944 int
 945 rxi_TrimDataBufs(struct rx_packet *p, int first)
 946 {
 947     int length;
 948     struct iovec *iov, *end;
 949     register struct rx_ts_info_t * rx_ts_info;
 950     SPLVAR;
 951
 952     if (first != 1)
 953         osi_Panic("TrimDataBufs 1: first must be 1");
 954
 955     /* Skip over continuation buffers containing message data */
 956     iov = &p->wirevec[2];
 957     end = iov + (p->niovecs - 2);
 958     length = p->length - p->wirevec[1].iov_len;
 959     for (; iov < end && length > 0; iov++) {
 960         if (!iov->iov_base)
 961             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 962         length -= iov->iov_len;
 963     }
 964
 965     /* iov now points to the first empty data buffer. */
 966     if (iov >= end)
 967         return 0;
 968
 969     RX_TS_INFO_GET(rx_ts_info);
 970     for (; iov < end; iov++) {
 971         if (!iov->iov_base)
 972             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 973         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 974         p->niovecs--;
 975     }
 976     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 977         NETPRI;
 978         MUTEX_ENTER(&rx_freePktQ_lock);
 979
 980         RX_TS_FPQ_LTOG(rx_ts_info);
 981         rxi_PacketsUnWait();
 982
 983         MUTEX_EXIT(&rx_freePktQ_lock);
 984         USERPRI;
 985     }
 986
 987     return 0;
 988 }
 989 #else /* RX_ENABLE_TSFPQ */
 990 int
 991 rxi_TrimDataBufs(struct rx_packet *p, int first)
 992 {
 993     int length;
 994     struct iovec *iov, *end;
 995     SPLVAR;
 996
 997     if (first != 1)
 998         osi_Panic("TrimDataBufs 1: first must be 1");
 999
1000     /* Skip over continuation buffers containing message data */
1001     iov = &p->wirevec[2];
1002     end = iov + (p->niovecs - 2);
1003     length = p->length - p->wirevec[1].iov_len;
1004     for (; iov < end && length > 0; iov++) {
1005         if (!iov->iov_base)
1006             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1007         length -= iov->iov_len;
1008     }
1009
1010     /* iov now points to the first empty data buffer. */
1011     if (iov >= end)
1012         return 0;
1013
1014     NETPRI;
1015     MUTEX_ENTER(&rx_freePktQ_lock);
1016
1017     for (; iov < end; iov++) {
1018         if (!iov->iov_base)
1019             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1020         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1021         p->niovecs--;
1022     }
1023     rxi_PacketsUnWait();
1024
1025     MUTEX_EXIT(&rx_freePktQ_lock);
1026     USERPRI;
1027
1028     return 0;
1029 }
1030 #endif /* RX_ENABLE_TSFPQ */
1031
1032 /* Free the packet p.  P is assumed not to be on any queue, i.e.
1033  * remove it yourself first if you call this routine. */
1034 #ifdef RX_ENABLE_TSFPQ
1035 void
1036 rxi_FreePacket(struct rx_packet *p)
1037 {
1038     rxi_FreeDataBufsTSFPQ(p, 2, 0);
1039     rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1040 }
1041 #else /* RX_ENABLE_TSFPQ */
1042 void
1043 rxi_FreePacket(struct rx_packet *p)
1044 {
1045     SPLVAR;
1046
1047     NETPRI;
1048     MUTEX_ENTER(&rx_freePktQ_lock);
1049
1050     rxi_FreeDataBufsNoLock(p, 2);
1051     rxi_FreePacketNoLock(p);
1052     /* Wakeup anyone waiting for packets */
1053     rxi_PacketsUnWait();
1054
1055     MUTEX_EXIT(&rx_freePktQ_lock);
1056     USERPRI;
1057 }
1058 #endif /* RX_ENABLE_TSFPQ */
1059
1060 /* rxi_AllocPacket sets up p->length so it reflects the number of
1061  * bytes in the packet at this point, **not including** the header.
1062  * The header is absolutely necessary, besides, this is the way the
1063  * length field is usually used */
1064 #ifdef RX_ENABLE_TSFPQ
1065 struct rx_packet *
1066 rxi_AllocPacketNoLock(int class)
1067 {
1068     register struct rx_packet *p;
1069     register struct rx_ts_info_t * rx_ts_info;
1070
1071     RX_TS_INFO_GET(rx_ts_info);
1072
1073 #ifdef KERNEL
1074     if (rxi_OverQuota(class)) {
1075         rxi_NeedMorePackets = TRUE;
1076         switch (class) {
1077         case RX_PACKET_CLASS_RECEIVE:
1078             rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1079             break;
1080         case RX_PACKET_CLASS_SEND:
1081             rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1082             break;
1083         case RX_PACKET_CLASS_SPECIAL:
1084             rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1085             break;
1086         case RX_PACKET_CLASS_RECV_CBUF:
1087             rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1088             break;
1089         case RX_PACKET_CLASS_SEND_CBUF:
1090             rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1091             break;
1092         }
1093         return (struct rx_packet *)0;
1094     }
1095 #endif /* KERNEL */
1096
1097     rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1098     if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1099
1100 #ifdef KERNEL
1101         if (queue_IsEmpty(&rx_freePacketQueue))
1102             osi_Panic("rxi_AllocPacket error");
1103 #else /* KERNEL */
1104         if (queue_IsEmpty(&rx_freePacketQueue))
1105             rxi_MorePacketsNoLock(rx_initSendWindow);
1106 #endif /* KERNEL */
1107
1108
1109         RX_TS_FPQ_GTOL(rx_ts_info);
1110     }
1111
1112     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1113
1114     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1115
1116
1117     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1118      * order to truncate outbound packets.  In the near future, may need
1119      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1120      */
1121     RX_PACKET_IOV_FULLINIT(p);
1122     return p;
1123 }
1124 #else /* RX_ENABLE_TSFPQ */
1125 struct rx_packet *
1126 rxi_AllocPacketNoLock(int class)
1127 {
1128     register struct rx_packet *p;
1129
1130 #ifdef KERNEL
1131     if (rxi_OverQuota(class)) {
1132         rxi_NeedMorePackets = TRUE;
1133         switch (class) {
1134         case RX_PACKET_CLASS_RECEIVE:
1135             rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1136             break;
1137         case RX_PACKET_CLASS_SEND:
1138             rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1139             break;
1140         case RX_PACKET_CLASS_SPECIAL:
1141             rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1142             break;
1143         case RX_PACKET_CLASS_RECV_CBUF:
1144             rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1145             break;
1146         case RX_PACKET_CLASS_SEND_CBUF:
1147             rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1148             break;
1149         }
1150         return (struct rx_packet *)0;
1151     }
1152 #endif /* KERNEL */
1153
1154     rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1155
1156 #ifdef KERNEL
1157     if (queue_IsEmpty(&rx_freePacketQueue))
1158         osi_Panic("rxi_AllocPacket error");
1159 #else /* KERNEL */
1160     if (queue_IsEmpty(&rx_freePacketQueue))
1161         rxi_MorePacketsNoLock(rx_initSendWindow);
1162 #endif /* KERNEL */
1163
1164     rx_nFreePackets--;
1165     p = queue_First(&rx_freePacketQueue, rx_packet);
1166     queue_Remove(p);
1167     RX_FPQ_MARK_USED(p);
1168
1169     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1170
1171
1172     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1173      * order to truncate outbound packets.  In the near future, may need
1174      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1175      */
1176     RX_PACKET_IOV_FULLINIT(p);
1177     return p;
1178 }
1179 #endif /* RX_ENABLE_TSFPQ */
1180
1181 #ifdef RX_ENABLE_TSFPQ
1182 struct rx_packet *
1183 rxi_AllocPacketTSFPQ(int class, int pull_global)
1184 {
1185     register struct rx_packet *p;
1186     register struct rx_ts_info_t * rx_ts_info;
1187
1188     RX_TS_INFO_GET(rx_ts_info);
1189
1190     rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1191     if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1192         MUTEX_ENTER(&rx_freePktQ_lock);
1193
1194         if (queue_IsEmpty(&rx_freePacketQueue))
1195             rxi_MorePacketsNoLock(rx_initSendWindow);
1196
1197         RX_TS_FPQ_GTOL(rx_ts_info);
1198
1199         MUTEX_EXIT(&rx_freePktQ_lock);
1200     } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1201         return NULL;
1202     }
1203
1204     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1205
1206     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1207
1208     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1209      * order to truncate outbound packets.  In the near future, may need
1210      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1211      */
1212     RX_PACKET_IOV_FULLINIT(p);
1213     return p;
1214 }
1215 #endif /* RX_ENABLE_TSFPQ */
1216
1217 #ifdef RX_ENABLE_TSFPQ
1218 struct rx_packet *
1219 rxi_AllocPacket(int class)
1220 {
1221     register struct rx_packet *p;
1222
1223     p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1224     return p;
1225 }
1226 #else /* RX_ENABLE_TSFPQ */
1227 struct rx_packet *
1228 rxi_AllocPacket(int class)
1229 {
1230     register struct rx_packet *p;
1231
1232     MUTEX_ENTER(&rx_freePktQ_lock);
1233     p = rxi_AllocPacketNoLock(class);
1234     MUTEX_EXIT(&rx_freePktQ_lock);
1235     return p;
1236 }
1237 #endif /* RX_ENABLE_TSFPQ */
1238
1239 /* This guy comes up with as many buffers as it {takes,can get} given
1240  * the MTU for this call. It also sets the packet length before
1241  * returning.  caution: this is often called at NETPRI
1242  * Called with call locked.
1243  */
1244 struct rx_packet *
1245 rxi_AllocSendPacket(register struct rx_call *call, int want)
1246 {
1247     register struct rx_packet *p = (struct rx_packet *)0;
1248     register int mud;
1249     register unsigned delta;
1250
1251     SPLVAR;
1252     mud = call->MTU - RX_HEADER_SIZE;
1253     delta =
1254         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1255         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1256
1257 #ifdef RX_ENABLE_TSFPQ
1258     if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1259         want += delta;
1260         want = MIN(want, mud);
1261
1262         if ((unsigned)want > p->length)
1263             (void)rxi_AllocDataBuf(p, (want - p->length),
1264                                    RX_PACKET_CLASS_SEND_CBUF);
1265
1266         if ((unsigned)p->length > mud)
1267             p->length = mud;
1268
1269         if (delta >= p->length) {
1270             rxi_FreePacket(p);
1271             p = NULL;
1272         } else {
1273             p->length -= delta;
1274         }
1275         return p;
1276     }
1277 #endif /* RX_ENABLE_TSFPQ */
1278
1279     while (!(call->error)) {
1280         MUTEX_ENTER(&rx_freePktQ_lock);
1281         /* if an error occurred, or we get the packet we want, we're done */
1282         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1283             MUTEX_EXIT(&rx_freePktQ_lock);
1284
1285             want += delta;
1286             want = MIN(want, mud);
1287
1288             if ((unsigned)want > p->length)
1289                 (void)rxi_AllocDataBuf(p, (want - p->length),
1290                                        RX_PACKET_CLASS_SEND_CBUF);
1291
1292             if ((unsigned)p->length > mud)
1293                 p->length = mud;
1294
1295             if (delta >= p->length) {
1296                 rxi_FreePacket(p);
1297                 p = NULL;
1298             } else {
1299                 p->length -= delta;
1300             }
1301             break;
1302         }
1303
1304         /* no error occurred, and we didn't get a packet, so we sleep.
1305          * At this point, we assume that packets will be returned
1306          * sooner or later, as packets are acknowledged, and so we
1307          * just wait.  */
1308         NETPRI;
1309         call->flags |= RX_CALL_WAIT_PACKETS;
1310         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1311         MUTEX_EXIT(&call->lock);
1312         rx_waitingForPackets = 1;
1313
1314 #ifdef  RX_ENABLE_LOCKS
1315         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1316 #else
1317         osi_rxSleep(&rx_waitingForPackets);
1318 #endif
1319         MUTEX_EXIT(&rx_freePktQ_lock);
1320         MUTEX_ENTER(&call->lock);
1321         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1322         call->flags &= ~RX_CALL_WAIT_PACKETS;
1323         USERPRI;
1324     }
1325
1326     return p;
1327 }
1328
1329 #ifndef KERNEL
1330 #ifdef AFS_NT40_ENV
1331 /* Windows does not use file descriptors. */
1332 #define CountFDs(amax) 0
1333 #else
1334 /* count the number of used FDs */
1335 static int
1336 CountFDs(register int amax)
1337 {
1338     struct stat tstat;
1339     register int i, code;
1340     register int count;
1341
1342     count = 0;
1343     for (i = 0; i < amax; i++) {
1344         code = fstat(i, &tstat);
1345         if (code == 0)
1346             count++;
1347     }
1348     return count;
1349 }
1350 #endif /* AFS_NT40_ENV */
1351 #else /* KERNEL */
1352
1353 #define CountFDs(amax) amax
1354
1355 #endif /* KERNEL */
1356
1357 #if !defined(KERNEL) || defined(UKERNEL)
1358
1359 /* This function reads a single packet from the interface into the
1360  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
1361  * (host,port) of the sender are stored in the supplied variables, and
1362  * the data length of the packet is stored in the packet structure.
1363  * The header is decoded. */
1364 int
1365 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1366                u_short * port)
1367 {
1368     struct sockaddr_in from;
1369     int nbytes;
1370     afs_int32 rlen;
1371     register afs_int32 tlen, savelen;
1372     struct msghdr msg;
1373     rx_computelen(p, tlen);
1374     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
1375
1376     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
1377     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
1378                                  * it once in order to avoid races.  */
1379     tlen = rlen - tlen;
1380     if (tlen > 0) {
1381         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1382         if (tlen > 0) {
1383             tlen = rlen - tlen;
1384         } else
1385             tlen = rlen;
1386     } else
1387         tlen = rlen;
1388
1389     /* Extend the last iovec for padding, it's just to make sure that the
1390      * read doesn't return more data than we expect, and is done to get around
1391      * our problems caused by the lack of a length field in the rx header.
1392      * Use the extra buffer that follows the localdata in each packet
1393      * structure. */
1394     savelen = p->wirevec[p->niovecs - 1].iov_len;
1395     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1396
1397     memset((char *)&msg, 0, sizeof(msg));
1398     msg.msg_name = (char *)&from;
1399     msg.msg_namelen = sizeof(struct sockaddr_in);
1400     msg.msg_iov = p->wirevec;
1401     msg.msg_iovlen = p->niovecs;
1402     nbytes = rxi_Recvmsg(socket, &msg, 0);
1403
1404     /* restore the vec to its correct state */
1405     p->wirevec[p->niovecs - 1].iov_len = savelen;
1406
1407     p->length = (nbytes - RX_HEADER_SIZE);
1408     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
1409         if (nbytes < 0 && errno == EWOULDBLOCK) {
1410             rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1411         } else if (nbytes <= 0) {
1412             MUTEX_ENTER(&rx_stats_mutex);
1413             rx_stats.bogusPacketOnRead++;
1414             rx_stats.bogusHost = from.sin_addr.s_addr;
1415             MUTEX_EXIT(&rx_stats_mutex);
1416             dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1417                  ntohs(from.sin_port), nbytes));
1418         }
1419         return 0;
1420     }
1421 #ifdef RXDEBUG
1422     else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1423                 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1424         rxi_DecodePacketHeader(p);
1425
1426         *host = from.sin_addr.s_addr;
1427         *port = from.sin_port;
1428
1429         dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1430               p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1431               p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1432               p->length));
1433         rxi_TrimDataBufs(p, 1);
1434         return 0;
1435     }
1436 #endif
1437     else {
1438         /* Extract packet header. */
1439         rxi_DecodePacketHeader(p);
1440
1441         *host = from.sin_addr.s_addr;
1442         *port = from.sin_port;
1443         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1444             struct rx_peer *peer;
1445             rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1446             /*
1447              * Try to look up this peer structure.  If it doesn't exist,
1448              * don't create a new one -
1449              * we don't keep count of the bytes sent/received if a peer
1450              * structure doesn't already exist.
1451              *
1452              * The peer/connection cleanup code assumes that there is 1 peer
1453              * per connection.  If we actually created a peer structure here
1454              * and this packet was an rxdebug packet, the peer structure would
1455              * never be cleaned up.
1456              */
1457             peer = rxi_FindPeer(*host, *port, 0, 0);
1458             /* Since this may not be associated with a connection,
1459              * it may have no refCount, meaning we could race with
1460              * ReapConnections
1461              */
1462             if (peer && (peer->refCount > 0)) {
1463                 MUTEX_ENTER(&peer->peer_lock);
1464                 hadd32(peer->bytesReceived, p->length);
1465                 MUTEX_EXIT(&peer->peer_lock);
1466             }
1467         }
1468
1469         /* Free any empty packet buffers at the end of this packet */
1470         rxi_TrimDataBufs(p, 1);
1471
1472         return 1;
1473     }
1474 }
1475
1476 #endif /* !KERNEL || UKERNEL */
1477
1478 /* This function splits off the first packet in a jumbo packet.
1479  * As of AFS 3.5, jumbograms contain more than one fixed size
1480  * packet, and the RX_JUMBO_PACKET flag is set in all but the
1481  * last packet header. All packets (except the last) are padded to
1482  * fall on RX_CBUFFERSIZE boundaries.
1483  * HACK: We store the length of the first n-1 packets in the
1484  * last two pad bytes. */
1485
1486 struct rx_packet *
1487 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1488                      int first)
1489 {
1490     struct rx_packet *np;
1491     struct rx_jumboHeader *jp;
1492     int niov, i;
1493     struct iovec *iov;
1494     int length;
1495     afs_uint32 temp;
1496
1497     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1498      * bytes in length. All but the first packet are preceded by
1499      * an abbreviated four byte header. The length of the last packet
1500      * is calculated from the size of the jumbogram. */
1501     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1502
1503     if ((int)p->length < length) {
1504         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1505         return NULL;
1506     }
1507     niov = p->niovecs - 2;
1508     if (niov < 1) {
1509         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1510         return NULL;
1511     }
1512     iov = &p->wirevec[2];
1513     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1514
1515     /* Get a pointer to the abbreviated packet header */
1516     jp = (struct rx_jumboHeader *)
1517         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1518
1519     /* Set up the iovecs for the next packet */
1520     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1521     np->wirevec[0].iov_len = sizeof(struct rx_header);
1522     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1523     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1524     np->niovecs = niov + 1;
1525     for (i = 2, iov++; i <= niov; i++, iov++) {
1526         np->wirevec[i] = *iov;
1527     }
1528     np->length = p->length - length;
1529     p->length = RX_JUMBOBUFFERSIZE;
1530     p->niovecs = 2;
1531
1532     /* Convert the jumbo packet header to host byte order */
1533     temp = ntohl(*(afs_uint32 *) jp);
1534     jp->flags = (u_char) (temp >> 24);
1535     jp->cksum = (u_short) (temp);
1536
1537     /* Fill in the packet header */
1538     np->header = p->header;
1539     np->header.serial = p->header.serial + 1;
1540     np->header.seq = p->header.seq + 1;
1541     np->header.flags = jp->flags;
1542     np->header.spare = jp->cksum;
1543
1544     return np;
1545 }
1546
1547 #ifndef KERNEL
1548 /* Send a udp datagram */
1549 int
1550 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1551             int length, int istack)
1552 {
1553     struct msghdr msg;
1554         int ret;
1555
1556     memset(&msg, 0, sizeof(msg));
1557     msg.msg_iov = dvec;
1558     msg.msg_iovlen = nvecs;
1559     msg.msg_name = addr;
1560     msg.msg_namelen = sizeof(struct sockaddr_in);
1561
1562     ret = rxi_Sendmsg(socket, &msg, 0);
1563
1564     return ret;
1565 }
1566 #elif !defined(UKERNEL)
1567 /*
1568  * message receipt is done in rxk_input or rx_put.
1569  */
1570
1571 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1572 /*
1573  * Copy an mblock to the contiguous area pointed to by cp.
1574  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1575  * but it doesn't really.
1576  * Returns the number of bytes not transferred.
1577  * The message is NOT changed.
1578  */
1579 static int
1580 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1581 {
1582     register int n;
1583
1584     for (; mp && len > 0; mp = mp->b_cont) {
1585         if (mp->b_datap->db_type != M_DATA) {
1586             return -1;
1587         }
1588         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1589         memcpy(cp, (char *)mp->b_rptr, n);
1590         cp += n;
1591         len -= n;
1592         mp->b_rptr += n;
1593     }
1594     return (len);
1595 }
1596
1597 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1598  * but it doesn't really.
1599  * This sucks, anyway, do it like m_cpy.... below
1600  */
1601 static int
1602 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1603            int niovs)
1604 {
1605     register int m, n, o, t, i;
1606
1607     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1608         if (mp->b_datap->db_type != M_DATA) {
1609             return -1;
1610         }
1611         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1612         len -= n;
1613         while (n) {
1614             if (!t) {
1615                 o = 0;
1616                 i++;
1617                 t = iovs[i].iov_len;
1618             }
1619             m = MIN(n, t);
1620             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1621             mp->b_rptr += m;
1622             o += m;
1623             t -= m;
1624             n -= m;
1625         }
1626     }
1627     return (len);
1628 }
1629
1630 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1631 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1632 #else
1633 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1634 static int
1635 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1636 {
1637     caddr_t p1, p2;
1638     unsigned int l1, l2, i, t;
1639
1640     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1641         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1642
1643     while (off && m)
1644         if (m->m_len <= off) {
1645             off -= m->m_len;
1646             m = m->m_next;
1647             continue;
1648         } else
1649             break;
1650
1651     if (m == NULL)
1652         return len;
1653
1654     p1 = mtod(m, caddr_t) + off;
1655     l1 = m->m_len - off;
1656     i = 0;
1657     p2 = iovs[0].iov_base;
1658     l2 = iovs[0].iov_len;
1659
1660     while (len) {
1661         t = MIN(l1, MIN(l2, (unsigned int)len));
1662         memcpy(p2, p1, t);
1663         p1 += t;
1664         p2 += t;
1665         l1 -= t;
1666         l2 -= t;
1667         len -= t;
1668         if (!l1) {
1669             m = m->m_next;
1670             if (!m)
1671                 break;
1672             p1 = mtod(m, caddr_t);
1673             l1 = m->m_len;
1674         }
1675         if (!l2) {
1676             if (++i >= niovs)
1677                 break;
1678             p2 = iovs[i].iov_base;
1679             l2 = iovs[i].iov_len;
1680         }
1681
1682     }
1683
1684     return len;
1685 }
1686 #endif /* LINUX */
1687 #endif /* AFS_SUN5_ENV */
1688
1689 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1690 int
1691 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1692 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1693      mblk_t *amb;
1694 #else
1695      struct mbuf *amb;
1696 #endif
1697      void (*free) ();
1698      struct rx_packet *phandle;
1699      int hdr_len, data_len;
1700 {
1701     register int code;
1702
1703     code =
1704         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1705                      phandle->niovecs);
1706     (*free) (amb);
1707
1708     return code;
1709 }
1710 #endif /* LINUX */
1711 #endif /*KERNEL && !UKERNEL */
1712
1713
1714 /* send a response to a debug packet */
1715
1716 struct rx_packet *
1717 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1718                        afs_int32 ahost, short aport, int istack)
1719 {
1720     struct rx_debugIn tin;
1721     afs_int32 tl;
1722     struct rx_serverQueueEntry *np, *nqe;
1723
1724     /*
1725      * Only respond to client-initiated Rx debug packets,
1726      * and clear the client flag in the response.
1727      */
1728     if (ap->header.flags & RX_CLIENT_INITIATED) {
1729         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1730         rxi_EncodePacketHeader(ap);
1731     } else {
1732         return ap;
1733     }
1734
1735     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1736     /* all done with packet, now set length to the truth, so we can
1737      * reuse this packet */
1738     rx_computelen(ap, ap->length);
1739
1740     tin.type = ntohl(tin.type);
1741     tin.index = ntohl(tin.index);
1742     switch (tin.type) {
1743     case RX_DEBUGI_GETSTATS:{
1744             struct rx_debugStats tstat;
1745
1746             /* get basic stats */
1747             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1748             tstat.version = RX_DEBUGI_VERSION;
1749 #ifndef RX_ENABLE_LOCKS
1750             tstat.waitingForPackets = rx_waitingForPackets;
1751 #endif
1752             MUTEX_ENTER(&rx_serverPool_lock);
1753             tstat.nFreePackets = htonl(rx_nFreePackets);
1754             tstat.callsExecuted = htonl(rxi_nCalls);
1755             tstat.packetReclaims = htonl(rx_packetReclaims);
1756             tstat.usedFDs = CountFDs(64);
1757             tstat.nWaiting = htonl(rx_nWaiting);
1758             tstat.nWaited = htonl(rx_nWaited);
1759             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1760                         tstat.idleThreads);
1761             MUTEX_EXIT(&rx_serverPool_lock);
1762             tstat.idleThreads = htonl(tstat.idleThreads);
1763             tl = sizeof(struct rx_debugStats) - ap->length;
1764             if (tl > 0)
1765                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1766
1767             if (tl <= 0) {
1768                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1769                                (char *)&tstat);
1770                 ap->length = sizeof(struct rx_debugStats);
1771                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1772                 rx_computelen(ap, ap->length);
1773             }
1774             break;
1775         }
1776
1777     case RX_DEBUGI_GETALLCONN:
1778     case RX_DEBUGI_GETCONN:{
1779             int i, j;
1780             register struct rx_connection *tc;
1781             struct rx_call *tcall;
1782             struct rx_debugConn tconn;
1783             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1784
1785
1786             tl = sizeof(struct rx_debugConn) - ap->length;
1787             if (tl > 0)
1788                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1789             if (tl > 0)
1790                 return ap;
1791
1792             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1793             /* get N'th (maybe) "interesting" connection info */
1794             for (i = 0; i < rx_hashTableSize; i++) {
1795 #if !defined(KERNEL)
1796                 /* the time complexity of the algorithm used here
1797                  * exponentially increses with the number of connections.
1798                  */
1799 #ifdef AFS_PTHREAD_ENV
1800                 pthread_yield();
1801 #else
1802                 (void)IOMGR_Poll();
1803 #endif
1804 #endif
1805                 MUTEX_ENTER(&rx_connHashTable_lock);
1806                 /* We might be slightly out of step since we are not
1807                  * locking each call, but this is only debugging output.
1808                  */
1809                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1810                     if ((all || rxi_IsConnInteresting(tc))
1811                         && tin.index-- <= 0) {
1812                         tconn.host = tc->peer->host;
1813                         tconn.port = tc->peer->port;
1814                         tconn.cid = htonl(tc->cid);
1815                         tconn.epoch = htonl(tc->epoch);
1816                         tconn.serial = htonl(tc->serial);
1817                         for (j = 0; j < RX_MAXCALLS; j++) {
1818                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1819                             if ((tcall = tc->call[j])) {
1820                                 tconn.callState[j] = tcall->state;
1821                                 tconn.callMode[j] = tcall->mode;
1822                                 tconn.callFlags[j] = tcall->flags;
1823                                 if (queue_IsNotEmpty(&tcall->rq))
1824                                     tconn.callOther[j] |= RX_OTHER_IN;
1825                                 if (queue_IsNotEmpty(&tcall->tq))
1826                                     tconn.callOther[j] |= RX_OTHER_OUT;
1827                             } else
1828                                 tconn.callState[j] = RX_STATE_NOTINIT;
1829                         }
1830
1831                         tconn.natMTU = htonl(tc->peer->natMTU);
1832                         tconn.error = htonl(tc->error);
1833                         tconn.flags = tc->flags;
1834                         tconn.type = tc->type;
1835                         tconn.securityIndex = tc->securityIndex;
1836                         if (tc->securityObject) {
1837                             RXS_GetStats(tc->securityObject, tc,
1838                                          &tconn.secStats);
1839 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1840 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1841                             DOHTONL(flags);
1842                             DOHTONL(expires);
1843                             DOHTONL(packetsReceived);
1844                             DOHTONL(packetsSent);
1845                             DOHTONL(bytesReceived);
1846                             DOHTONL(bytesSent);
1847                             for (i = 0;
1848                                  i <
1849                                  sizeof(tconn.secStats.spares) /
1850                                  sizeof(short); i++)
1851                                 DOHTONS(spares[i]);
1852                             for (i = 0;
1853                                  i <
1854                                  sizeof(tconn.secStats.sparel) /
1855                                  sizeof(afs_int32); i++)
1856                                 DOHTONL(sparel[i]);
1857                         }
1858
1859                         MUTEX_EXIT(&rx_connHashTable_lock);
1860                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1861                                        (char *)&tconn);
1862                         tl = ap->length;
1863                         ap->length = sizeof(struct rx_debugConn);
1864                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1865                                             istack);
1866                         ap->length = tl;
1867                         return ap;
1868                     }
1869                 }
1870                 MUTEX_EXIT(&rx_connHashTable_lock);
1871             }
1872             /* if we make it here, there are no interesting packets */
1873             tconn.cid = htonl(0xffffffff);      /* means end */
1874             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1875                            (char *)&tconn);
1876             tl = ap->length;
1877             ap->length = sizeof(struct rx_debugConn);
1878             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1879             ap->length = tl;
1880             break;
1881         }
1882
1883         /*
1884          * Pass back all the peer structures we have available
1885          */
1886
1887     case RX_DEBUGI_GETPEER:{
1888             int i;
1889             register struct rx_peer *tp;
1890             struct rx_debugPeer tpeer;
1891
1892
1893             tl = sizeof(struct rx_debugPeer) - ap->length;
1894             if (tl > 0)
1895                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1896             if (tl > 0)
1897                 return ap;
1898
1899             memset((char *)&tpeer, 0, sizeof(tpeer));
1900             for (i = 0; i < rx_hashTableSize; i++) {
1901 #if !defined(KERNEL)
1902                 /* the time complexity of the algorithm used here
1903                  * exponentially increses with the number of peers.
1904                  *
1905                  * Yielding after processing each hash table entry
1906                  * and dropping rx_peerHashTable_lock.
1907                  * also increases the risk that we will miss a new
1908                  * entry - but we are willing to live with this
1909                  * limitation since this is meant for debugging only
1910                  */
1911 #ifdef AFS_PTHREAD_ENV
1912                 pthread_yield();
1913 #else
1914                 (void)IOMGR_Poll();
1915 #endif
1916 #endif
1917                 MUTEX_ENTER(&rx_peerHashTable_lock);
1918                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1919                     if (tin.index-- <= 0) {
1920                         tpeer.host = tp->host;
1921                         tpeer.port = tp->port;
1922                         tpeer.ifMTU = htons(tp->ifMTU);
1923                         tpeer.idleWhen = htonl(tp->idleWhen);
1924                         tpeer.refCount = htons(tp->refCount);
1925                         tpeer.burstSize = tp->burstSize;
1926                         tpeer.burst = tp->burst;
1927                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1928                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1929                         tpeer.rtt = htonl(tp->rtt);
1930                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1931                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1932                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1933                         tpeer.nSent = htonl(tp->nSent);
1934                         tpeer.reSends = htonl(tp->reSends);
1935                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1936                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1937                         tpeer.rateFlag = htonl(tp->rateFlag);
1938                         tpeer.natMTU = htons(tp->natMTU);
1939                         tpeer.maxMTU = htons(tp->maxMTU);
1940                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1941                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1942                         tpeer.MTU = htons(tp->MTU);
1943                         tpeer.cwind = htons(tp->cwind);
1944                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1945                         tpeer.congestSeq = htons(tp->congestSeq);
1946                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1947                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1948                         tpeer.bytesReceived.high =
1949                             htonl(tp->bytesReceived.high);
1950                         tpeer.bytesReceived.low =
1951                             htonl(tp->bytesReceived.low);
1952
1953                         MUTEX_EXIT(&rx_peerHashTable_lock);
1954                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1955                                        (char *)&tpeer);
1956                         tl = ap->length;
1957                         ap->length = sizeof(struct rx_debugPeer);
1958                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1959                                             istack);
1960                         ap->length = tl;
1961                         return ap;
1962                     }
1963                 }
1964                 MUTEX_EXIT(&rx_peerHashTable_lock);
1965             }
1966             /* if we make it here, there are no interesting packets */
1967             tpeer.host = htonl(0xffffffff);     /* means end */
1968             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1969                            (char *)&tpeer);
1970             tl = ap->length;
1971             ap->length = sizeof(struct rx_debugPeer);
1972             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1973             ap->length = tl;
1974             break;
1975         }
1976
1977     case RX_DEBUGI_RXSTATS:{
1978             int i;
1979             afs_int32 *s;
1980
1981             tl = sizeof(rx_stats) - ap->length;
1982             if (tl > 0)
1983                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1984             if (tl > 0)
1985                 return ap;
1986
1987             /* Since its all int32s convert to network order with a loop. */
1988             MUTEX_ENTER(&rx_stats_mutex);
1989             s = (afs_int32 *) & rx_stats;
1990             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1991                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1992
1993             tl = ap->length;
1994             ap->length = sizeof(rx_stats);
1995             MUTEX_EXIT(&rx_stats_mutex);
1996             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1997             ap->length = tl;
1998             break;
1999         }
2000
2001     default:
2002         /* error response packet */
2003         tin.type = htonl(RX_DEBUGI_BADTYPE);
2004         tin.index = tin.type;
2005         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2006         tl = ap->length;
2007         ap->length = sizeof(struct rx_debugIn);
2008         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2009         ap->length = tl;
2010         break;
2011     }
2012     return ap;
2013 }
2014
2015 struct rx_packet *
2016 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2017                          afs_int32 ahost, short aport, int istack)
2018 {
2019     afs_int32 tl;
2020
2021     /*
2022      * Only respond to client-initiated version requests, and
2023      * clear that flag in the response.
2024      */
2025     if (ap->header.flags & RX_CLIENT_INITIATED) {
2026         char buf[66];
2027
2028         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2029         rxi_EncodePacketHeader(ap);
2030         memset(buf, 0, sizeof(buf));
2031         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2032         rx_packetwrite(ap, 0, 65, buf);
2033         tl = ap->length;
2034         ap->length = 65;
2035         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2036         ap->length = tl;
2037     }
2038
2039     return ap;
2040 }
2041
2042
2043 /* send a debug packet back to the sender */
2044 static void
2045 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2046                     afs_int32 ahost, short aport, afs_int32 istack)
2047 {
2048     struct sockaddr_in taddr;
2049     int i;
2050     int nbytes;
2051     int saven = 0;
2052     size_t savelen = 0;
2053 #ifdef KERNEL
2054     int waslocked = ISAFS_GLOCK();
2055 #endif
2056
2057     taddr.sin_family = AF_INET;
2058     taddr.sin_port = aport;
2059     taddr.sin_addr.s_addr = ahost;
2060 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2061     taddr.sin_len = sizeof(struct sockaddr_in);
2062 #endif
2063
2064     /* We need to trim the niovecs. */
2065     nbytes = apacket->length;
2066     for (i = 1; i < apacket->niovecs; i++) {
2067         if (nbytes <= apacket->wirevec[i].iov_len) {
2068             savelen = apacket->wirevec[i].iov_len;
2069             saven = apacket->niovecs;
2070             apacket->wirevec[i].iov_len = nbytes;
2071             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
2072         } else
2073             nbytes -= apacket->wirevec[i].iov_len;
2074     }
2075 #ifdef KERNEL
2076 #ifdef RX_KERNEL_TRACE
2077     if (ICL_SETACTIVE(afs_iclSetp)) {
2078         if (!waslocked)
2079             AFS_GLOCK();
2080         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2081                    "before osi_NetSend()");
2082         AFS_GUNLOCK();
2083     } else
2084 #else
2085     if (waslocked)
2086         AFS_GUNLOCK();
2087 #endif
2088 #endif
2089     /* debug packets are not reliably delivered, hence the cast below. */
2090     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2091                       apacket->length + RX_HEADER_SIZE, istack);
2092 #ifdef KERNEL
2093 #ifdef RX_KERNEL_TRACE
2094     if (ICL_SETACTIVE(afs_iclSetp)) {
2095         AFS_GLOCK();
2096         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2097                    "after osi_NetSend()");
2098         if (!waslocked)
2099             AFS_GUNLOCK();
2100     } else
2101 #else
2102     if (waslocked)
2103         AFS_GLOCK();
2104 #endif
2105 #endif
2106     if (saven) {                /* means we truncated the packet above. */
2107         apacket->wirevec[i - 1].iov_len = savelen;
2108         apacket->niovecs = saven;
2109     }
2110
2111 }
2112
2113 /* Send the packet to appropriate destination for the specified
2114  * call.  The header is first encoded and placed in the packet.
2115  */
2116 void
2117 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2118                struct rx_packet *p, int istack)
2119 {
2120 #if defined(KERNEL)
2121     int waslocked;
2122 #endif
2123     int code;
2124     struct sockaddr_in addr;
2125     register struct rx_peer *peer = conn->peer;
2126     osi_socket socket;
2127 #ifdef RXDEBUG
2128     char deliveryType = 'S';
2129 #endif
2130     /* The address we're sending the packet to */
2131     memset(&addr, 0, sizeof(addr));
2132     addr.sin_family = AF_INET;
2133     addr.sin_port = peer->port;
2134     addr.sin_addr.s_addr = peer->host;
2135
2136     /* This stuff should be revamped, I think, so that most, if not
2137      * all, of the header stuff is always added here.  We could
2138      * probably do away with the encode/decode routines. XXXXX */
2139
2140     /* Stamp each packet with a unique serial number.  The serial
2141      * number is maintained on a connection basis because some types
2142      * of security may be based on the serial number of the packet,
2143      * and security is handled on a per authenticated-connection
2144      * basis. */
2145     /* Pre-increment, to guarantee no zero serial number; a zero
2146      * serial number means the packet was never sent. */
2147     MUTEX_ENTER(&conn->conn_data_lock);
2148     p->header.serial = ++conn->serial;
2149     MUTEX_EXIT(&conn->conn_data_lock);
2150     /* This is so we can adjust retransmit time-outs better in the face of
2151      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2152      */
2153     if (p->firstSerial == 0) {
2154         p->firstSerial = p->header.serial;
2155     }
2156 #ifdef RXDEBUG
2157     /* If an output tracer function is defined, call it with the packet and
2158      * network address.  Note this function may modify its arguments. */
2159     if (rx_almostSent) {
2160         int drop = (*rx_almostSent) (p, &addr);
2161         /* drop packet if return value is non-zero? */
2162         if (drop)
2163             deliveryType = 'D'; /* Drop the packet */
2164     }
2165 #endif
2166
2167     /* Get network byte order header */
2168     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
2169                                  * touch ALL the fields */
2170
2171     /* Send the packet out on the same socket that related packets are being
2172      * received on */
2173     socket =
2174         (conn->type ==
2175          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2176
2177 #ifdef RXDEBUG
2178     /* Possibly drop this packet,  for testing purposes */
2179     if ((deliveryType == 'D')
2180         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2181             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2182         deliveryType = 'D';     /* Drop the packet */
2183     } else {
2184         deliveryType = 'S';     /* Send the packet */
2185 #endif /* RXDEBUG */
2186
2187         /* Loop until the packet is sent.  We'd prefer just to use a
2188          * blocking socket, but unfortunately the interface doesn't
2189          * allow us to have the socket block in send mode, and not
2190          * block in receive mode */
2191 #ifdef KERNEL
2192         waslocked = ISAFS_GLOCK();
2193 #ifdef RX_KERNEL_TRACE
2194         if (ICL_SETACTIVE(afs_iclSetp)) {
2195             if (!waslocked)
2196                 AFS_GLOCK();
2197             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2198                        "before osi_NetSend()");
2199             AFS_GUNLOCK();
2200         } else
2201 #else
2202         if (waslocked)
2203             AFS_GUNLOCK();
2204 #endif
2205 #endif
2206         if ((code =
2207              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2208                          p->length + RX_HEADER_SIZE, istack)) != 0) {
2209             /* send failed, so let's hurry up the resend, eh? */
2210             rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2211             p->retryTime = p->timeSent; /* resend it very soon */
2212             clock_Addmsec(&(p->retryTime),
2213                           10 + (((afs_uint32) p->backoff) << 8));
2214             /* Some systems are nice and tell us right away that we cannot
2215              * reach this recipient by returning an error code.
2216              * So, when this happens let's "down" the host NOW so
2217              * we don't sit around waiting for this host to timeout later.
2218              */
2219             if (call &&
2220 #ifdef AFS_NT40_ENV
2221                 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2222 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2223                 code == -ENETUNREACH
2224 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2225                 code == EHOSTUNREACH
2226 #else
2227                 0
2228 #endif
2229                 )
2230                 call->lastReceiveTime = 0;
2231         }
2232 #ifdef KERNEL
2233 #ifdef RX_KERNEL_TRACE
2234         if (ICL_SETACTIVE(afs_iclSetp)) {
2235             AFS_GLOCK();
2236             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2237                        "after osi_NetSend()");
2238             if (!waslocked)
2239                 AFS_GUNLOCK();
2240         } else
2241 #else
2242         if (waslocked)
2243             AFS_GLOCK();
2244 #endif
2245 #endif
2246 #ifdef RXDEBUG
2247     }
2248     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2249 #endif
2250     rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2251     MUTEX_ENTER(&peer->peer_lock);
2252     hadd32(peer->bytesSent, p->length);
2253     MUTEX_EXIT(&peer->peer_lock);
2254 }
2255
2256 /* Send a list of packets to appropriate destination for the specified
2257  * connection.  The headers are first encoded and placed in the packets.
2258  */
2259 void
2260 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2261                    struct rx_packet **list, int len, int istack)
2262 {
2263 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2264     int waslocked;
2265 #endif
2266     struct sockaddr_in addr;
2267     register struct rx_peer *peer = conn->peer;
2268     osi_socket socket;
2269     struct rx_packet *p = NULL;
2270     struct iovec wirevec[RX_MAXIOVECS];
2271     int i, length, code;
2272     afs_uint32 serial;
2273     afs_uint32 temp;
2274     struct rx_jumboHeader *jp;
2275 #ifdef RXDEBUG
2276     char deliveryType = 'S';
2277 #endif
2278     /* The address we're sending the packet to */
2279     addr.sin_family = AF_INET;
2280     addr.sin_port = peer->port;
2281     addr.sin_addr.s_addr = peer->host;
2282
2283     if (len + 1 > RX_MAXIOVECS) {
2284         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2285     }
2286
2287     /*
2288      * Stamp the packets in this jumbogram with consecutive serial numbers
2289      */
2290     MUTEX_ENTER(&conn->conn_data_lock);
2291     serial = conn->serial;
2292     conn->serial += len;
2293     MUTEX_EXIT(&conn->conn_data_lock);
2294
2295
2296     /* This stuff should be revamped, I think, so that most, if not
2297      * all, of the header stuff is always added here.  We could
2298      * probably do away with the encode/decode routines. XXXXX */
2299
2300     jp = NULL;
2301     length = RX_HEADER_SIZE;
2302     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2303     wirevec[0].iov_len = RX_HEADER_SIZE;
2304     for (i = 0; i < len; i++) {
2305         p = list[i];
2306
2307         /* The whole 3.5 jumbogram scheme relies on packets fitting
2308          * in a single packet buffer. */
2309         if (p->niovecs > 2) {
2310             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2311         }
2312
2313         /* Set the RX_JUMBO_PACKET flags in all but the last packets
2314          * in this chunk.  */
2315         if (i < len - 1) {
2316             if (p->length != RX_JUMBOBUFFERSIZE) {
2317                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2318             }
2319             p->header.flags |= RX_JUMBO_PACKET;
2320             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2321             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2322         } else {
2323             wirevec[i + 1].iov_len = p->length;
2324             length += p->length;
2325         }
2326         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2327         if (jp != NULL) {
2328             /* Convert jumbo packet header to network byte order */
2329             temp = (afs_uint32) (p->header.flags) << 24;
2330             temp |= (afs_uint32) (p->header.spare);
2331             *(afs_uint32 *) jp = htonl(temp);
2332         }
2333         jp = (struct rx_jumboHeader *)
2334             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2335
2336         /* Stamp each packet with a unique serial number.  The serial
2337          * number is maintained on a connection basis because some types
2338          * of security may be based on the serial number of the packet,
2339          * and security is handled on a per authenticated-connection
2340          * basis. */
2341         /* Pre-increment, to guarantee no zero serial number; a zero
2342          * serial number means the packet was never sent. */
2343         p->header.serial = ++serial;
2344         /* This is so we can adjust retransmit time-outs better in the face of
2345          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2346          */
2347         if (p->firstSerial == 0) {
2348             p->firstSerial = p->header.serial;
2349         }
2350 #ifdef RXDEBUG
2351         /* If an output tracer function is defined, call it with the packet and
2352          * network address.  Note this function may modify its arguments. */
2353         if (rx_almostSent) {
2354             int drop = (*rx_almostSent) (p, &addr);
2355             /* drop packet if return value is non-zero? */
2356             if (drop)
2357                 deliveryType = 'D';     /* Drop the packet */
2358         }
2359 #endif
2360
2361         /* Get network byte order header */
2362         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
2363                                          * touch ALL the fields */
2364     }
2365
2366     /* Send the packet out on the same socket that related packets are being
2367      * received on */
2368     socket =
2369         (conn->type ==
2370          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2371
2372 #ifdef RXDEBUG
2373     /* Possibly drop this packet,  for testing purposes */
2374     if ((deliveryType == 'D')
2375         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2376             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2377         deliveryType = 'D';     /* Drop the packet */
2378     } else {
2379         deliveryType = 'S';     /* Send the packet */
2380 #endif /* RXDEBUG */
2381
2382         /* Loop until the packet is sent.  We'd prefer just to use a
2383          * blocking socket, but unfortunately the interface doesn't
2384          * allow us to have the socket block in send mode, and not
2385          * block in receive mode */
2386 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2387         waslocked = ISAFS_GLOCK();
2388         if (!istack && waslocked)
2389             AFS_GUNLOCK();
2390 #endif
2391         if ((code =
2392              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2393                          istack)) != 0) {
2394             /* send failed, so let's hurry up the resend, eh? */
2395             rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2396             for (i = 0; i < len; i++) {
2397                 p = list[i];
2398                 p->retryTime = p->timeSent;     /* resend it very soon */
2399                 clock_Addmsec(&(p->retryTime),
2400                               10 + (((afs_uint32) p->backoff) << 8));
2401             }
2402             /* Some systems are nice and tell us right away that we cannot
2403              * reach this recipient by returning an error code.
2404              * So, when this happens let's "down" the host NOW so
2405              * we don't sit around waiting for this host to timeout later.
2406              */
2407             if (call &&
2408 #ifdef AFS_NT40_ENV
2409                 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2410 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2411                 code == -ENETUNREACH
2412 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2413                 code == EHOSTUNREACH
2414 #else
2415                 0
2416 #endif
2417                 )
2418                 call->lastReceiveTime = 0;
2419         }
2420 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2421         if (!istack && waslocked)
2422             AFS_GLOCK();
2423 #endif
2424 #ifdef RXDEBUG
2425     }
2426
2427     assert(p != NULL);
2428
2429     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2430
2431 #endif
2432     rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2433     MUTEX_ENTER(&peer->peer_lock);
2434     hadd32(peer->bytesSent, p->length);
2435     MUTEX_EXIT(&peer->peer_lock);
2436 }
2437
2438
2439 /* Send a "special" packet to the peer connection.  If call is
2440  * specified, then the packet is directed to a specific call channel
2441  * associated with the connection, otherwise it is directed to the
2442  * connection only. Uses optionalPacket if it is supplied, rather than
2443  * allocating a new packet buffer.  Nbytes is the length of the data
2444  * portion of the packet.  If data is non-null, nbytes of data are
2445  * copied into the packet.  Type is the type of the packet, as defined
2446  * in rx.h.  Bug: there's a lot of duplication between this and other
2447  * routines.  This needs to be cleaned up. */
2448 struct rx_packet *
2449 rxi_SendSpecial(register struct rx_call *call,
2450                 register struct rx_connection *conn,
2451                 struct rx_packet *optionalPacket, int type, char *data,
2452                 int nbytes, int istack)
2453 {
2454     /* Some of the following stuff should be common code for all
2455      * packet sends (it's repeated elsewhere) */
2456     register struct rx_packet *p;
2457     unsigned int i = 0;
2458     int savelen = 0, saven = 0;
2459     int channel, callNumber;
2460     if (call) {
2461         channel = call->channel;
2462         callNumber = *call->callNumber;
2463         /* BUSY packets refer to the next call on this connection */
2464         if (type == RX_PACKET_TYPE_BUSY) {
2465             callNumber++;
2466         }
2467     } else {
2468         channel = 0;
2469         callNumber = 0;
2470     }
2471     p = optionalPacket;
2472     if (!p) {
2473         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2474         if (!p)
2475             osi_Panic("rxi_SendSpecial failure");
2476     }
2477
2478     if (nbytes != -1)
2479         p->length = nbytes;
2480     else
2481         nbytes = p->length;
2482     p->header.serviceId = conn->serviceId;
2483     p->header.securityIndex = conn->securityIndex;
2484     p->header.cid = (conn->cid | channel);
2485     p->header.callNumber = callNumber;
2486     p->header.seq = 0;
2487     p->header.epoch = conn->epoch;
2488     p->header.type = type;
2489     p->header.flags = 0;
2490     if (conn->type == RX_CLIENT_CONNECTION)
2491         p->header.flags |= RX_CLIENT_INITIATED;
2492     if (data)
2493         rx_packetwrite(p, 0, nbytes, data);
2494
2495     for (i = 1; i < p->niovecs; i++) {
2496         if (nbytes <= p->wirevec[i].iov_len) {
2497             savelen = p->wirevec[i].iov_len;
2498             saven = p->niovecs;
2499             p->wirevec[i].iov_len = nbytes;
2500             p->niovecs = i + 1; /* so condition fails because i == niovecs */
2501         } else
2502             nbytes -= p->wirevec[i].iov_len;
2503     }
2504
2505     if (call)
2506         rxi_Send(call, p, istack);
2507     else
2508         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2509     if (saven) {                /* means we truncated the packet above.  We probably don't  */
2510         /* really need to do this, but it seems safer this way, given that  */
2511         /* sneaky optionalPacket... */
2512         p->wirevec[i - 1].iov_len = savelen;
2513         p->niovecs = saven;
2514     }
2515     if (!optionalPacket)
2516         rxi_FreePacket(p);
2517     return optionalPacket;
2518 }
2519
2520
2521 /* Encode the packet's header (from the struct header in the packet to
2522  * the net byte order representation in the wire representation of the
2523  * packet, which is what is actually sent out on the wire) */
2524 void
2525 rxi_EncodePacketHeader(register struct rx_packet *p)
2526 {
2527     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2528
2529     memset((char *)buf, 0, RX_HEADER_SIZE);
2530     *buf++ = htonl(p->header.epoch);
2531     *buf++ = htonl(p->header.cid);
2532     *buf++ = htonl(p->header.callNumber);
2533     *buf++ = htonl(p->header.seq);
2534     *buf++ = htonl(p->header.serial);
2535     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2536                    | (((afs_uint32) p->header.flags) << 16)
2537                    | (p->header.userStatus << 8) | p->header.securityIndex);
2538     /* Note: top 16 bits of this next word were reserved */
2539     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2540 }
2541
2542 /* Decode the packet's header (from net byte order to a struct header) */
2543 void
2544 rxi_DecodePacketHeader(register struct rx_packet *p)
2545 {
2546     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2547     afs_uint32 temp;
2548
2549     p->header.epoch = ntohl(*buf);
2550     buf++;
2551     p->header.cid = ntohl(*buf);
2552     buf++;
2553     p->header.callNumber = ntohl(*buf);
2554     buf++;
2555     p->header.seq = ntohl(*buf);
2556     buf++;
2557     p->header.serial = ntohl(*buf);
2558     buf++;
2559
2560     temp = ntohl(*buf);
2561     buf++;
2562
2563     /* C will truncate byte fields to bytes for me */
2564     p->header.type = temp >> 24;
2565     p->header.flags = temp >> 16;
2566     p->header.userStatus = temp >> 8;
2567     p->header.securityIndex = temp >> 0;
2568
2569     temp = ntohl(*buf);
2570     buf++;
2571
2572     p->header.serviceId = (temp & 0xffff);
2573     p->header.spare = temp >> 16;
2574     /* Note: top 16 bits of this last word are the security checksum */
2575 }
2576
2577 void
2578 rxi_PrepareSendPacket(register struct rx_call *call,
2579                       register struct rx_packet *p, register int last)
2580 {
2581     register struct rx_connection *conn = call->conn;
2582     int i;
2583     ssize_t len;                /* len must be a signed type; it can go negative */
2584
2585     p->flags &= ~RX_PKTFLAG_ACKED;
2586     p->header.cid = (conn->cid | call->channel);
2587     p->header.serviceId = conn->serviceId;
2588     p->header.securityIndex = conn->securityIndex;
2589
2590     /* No data packets on call 0. Where do these come from? */
2591     if (*call->callNumber == 0)
2592         *call->callNumber = 1;
2593
2594     p->header.callNumber = *call->callNumber;
2595     p->header.seq = call->tnext++;
2596     p->header.epoch = conn->epoch;
2597     p->header.type = RX_PACKET_TYPE_DATA;
2598     p->header.flags = 0;
2599     p->header.spare = 0;
2600     if (conn->type == RX_CLIENT_CONNECTION)
2601         p->header.flags |= RX_CLIENT_INITIATED;
2602
2603     if (last)
2604         p->header.flags |= RX_LAST_PACKET;
2605
2606     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2607     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2608     p->header.serial = 0;       /* Another way of saying never transmitted... */
2609     p->backoff = 0;
2610
2611     /* Now that we're sure this is the last data on the call, make sure
2612      * that the "length" and the sum of the iov_lens matches. */
2613     len = p->length + call->conn->securityHeaderSize;
2614
2615     for (i = 1; i < p->niovecs && len > 0; i++) {
2616         len -= p->wirevec[i].iov_len;
2617     }
2618     if (len > 0) {
2619         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2620     } else if (i < p->niovecs) {
2621         /* Free any extra elements in the wirevec */
2622 #if defined(RX_ENABLE_TSFPQ)
2623         rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2624 #else /* !RX_ENABLE_TSFPQ */
2625         MUTEX_ENTER(&rx_freePktQ_lock);
2626         rxi_FreeDataBufsNoLock(p, i);
2627         MUTEX_EXIT(&rx_freePktQ_lock);
2628 #endif /* !RX_ENABLE_TSFPQ */
2629
2630         p->niovecs = i;
2631     }
2632     p->wirevec[i - 1].iov_len += len;
2633     RXS_PreparePacket(conn->securityObject, call, p);
2634 }
2635
2636 /* Given an interface MTU size, calculate an adjusted MTU size that
2637  * will make efficient use of the RX buffers when the peer is sending
2638  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2639 int
2640 rxi_AdjustIfMTU(int mtu)
2641 {
2642     int adjMTU;
2643     int frags;
2644
2645     if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2646         return mtu;
2647     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2648     if (mtu <= adjMTU) {
2649         return mtu;
2650     }
2651     mtu -= adjMTU;
2652     if (mtu <= 0) {
2653         return adjMTU;
2654     }
2655     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2656     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2657 }
2658
2659 /* Given an interface MTU size, and the peer's advertised max receive
2660  * size, calculate an adjisted maxMTU size that makes efficient use
2661  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2662 int
2663 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2664 {
2665     int maxMTU = mtu * rxi_nSendFrags;
2666     maxMTU = MIN(maxMTU, peerMaxMTU);
2667     return rxi_AdjustIfMTU(maxMTU);
2668 }
2669
2670 /* Given a packet size, figure out how many datagram packet will fit.
2671  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2672  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2673  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2674 int
2675 rxi_AdjustDgramPackets(int frags, int mtu)
2676 {
2677     int maxMTU;
2678     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2679         return 1;
2680     }
2681     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2682     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2683     /* subtract the size of the first and last packets */
2684     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2685     if (maxMTU < 0) {
2686         return 1;
2687     }
2688     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2689 }