src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_xmit_nt.h"
  75 #include <stdlib.h>
  76 #else
  77 #include <sys/socket.h>
  78 #include <netinet/in.h>
  79 #endif
  80 #include "rx_clock.h"
  81 #include "rx.h"
  82 #include "rx_queue.h"
  83 #ifdef  AFS_SUN5_ENV
  84 #include <sys/sysmacros.h>
  85 #endif
  86 #include "rx_packet.h"
  87 #include "rx_globals.h"
  88 #include <lwp.h>
  89 #include <assert.h>
  90 #ifdef HAVE_STRING_H
  91 #include <string.h>
  92 #else
  93 #ifdef HAVE_STRINGS_H
  94 #include <strings.h>
  95 #endif
  96 #endif
  97 #ifdef HAVE_UNISTD_H
  98 #include <unistd.h>
  99 #endif
 100 #endif /* KERNEL */
 101
 102 #ifdef RX_LOCKS_DB
 103 /* rxdb_fileID is used to identify the lock location, along with line#. */
 104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 105 #endif /* RX_LOCKS_DB */
 106 struct rx_packet *rx_mallocedP = 0;
 107
 108 extern char cml_version_number[];
 109 extern int (*rx_almostSent) ();
 110
 111 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
 112
 113 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 114                                 afs_int32 ahost, short aport,
 115                                 afs_int32 istack);
 116
 117 /* some rules about packets:
 118  * 1.  When a packet is allocated, the final iov_buf contains room for
 119  * a security trailer, but iov_len masks that fact.  If the security
 120  * package wants to add the trailer, it may do so, and then extend
 121  * iov_len appropriately.  For this reason, packet's niovecs and
 122  * iov_len fields should be accurate before calling PreparePacket.
 123 */
 124
 125 /* Preconditions:
 126  *        all packet buffers (iov_base) are integral multiples of
 127  *        the word size.
 128  *        offset is an integral multiple of the word size.
 129  */
 130 afs_int32
 131 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 132 {
 133     unsigned int i;
 134     size_t l;
 135     for (l = 0, i = 1; i < packet->niovecs; i++) {
 136         if (l + packet->wirevec[i].iov_len > offset) {
 137             return
 138                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 139                                  (offset - l)));
 140         }
 141         l += packet->wirevec[i].iov_len;
 142     }
 143
 144     return 0;
 145 }
 146
 147 /* Preconditions:
 148  *        all packet buffers (iov_base) are integral multiples of the word size.
 149  *        offset is an integral multiple of the word size.
 150  */
 151 afs_int32
 152 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 153 {
 154     unsigned int i;
 155     size_t l;
 156     for (l = 0, i = 1; i < packet->niovecs; i++) {
 157         if (l + packet->wirevec[i].iov_len > offset) {
 158             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 159                              (offset - l))) = data;
 160             return 0;
 161         }
 162         l += packet->wirevec[i].iov_len;
 163     }
 164
 165     return 0;
 166 }
 167
 168 /* Preconditions:
 169  *        all packet buffers (iov_base) are integral multiples of the
 170  *        word size.
 171  *        offset is an integral multiple of the word size.
 172  * Packet Invariants:
 173  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 174  */
 175 afs_int32
 176 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 177                   char *out)
 178 {
 179     unsigned int i, j, l, r;
 180     for (l = 0, i = 1; i < packet->niovecs; i++) {
 181         if (l + packet->wirevec[i].iov_len > offset) {
 182             break;
 183         }
 184         l += packet->wirevec[i].iov_len;
 185     }
 186
 187     /* i is the iovec which contains the first little bit of data in which we
 188      * are interested.  l is the total length of everything prior to this iovec.
 189      * j is the number of bytes we can safely copy out of this iovec.
 190      * offset only applies to the first iovec.
 191      */
 192     r = resid;
 193     while ((resid > 0) && (i < packet->niovecs)) {
 194         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 195         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 196         resid -= j;
 197         out += j;
 198         l += packet->wirevec[i].iov_len;
 199         offset = l;
 200         i++;
 201     }
 202
 203     return (resid ? (r - resid) : r);
 204 }
 205
 206
 207 /* Preconditions:
 208  *        all packet buffers (iov_base) are integral multiples of the
 209  *        word size.
 210  *        offset is an integral multiple of the word size.
 211  */
 212 afs_int32
 213 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 214 {
 215     int i, j, l, r;
 216     char *b;
 217
 218     for (l = 0, i = 1; i < packet->niovecs; i++) {
 219         if (l + packet->wirevec[i].iov_len > offset) {
 220             break;
 221         }
 222         l += packet->wirevec[i].iov_len;
 223     }
 224
 225     /* i is the iovec which contains the first little bit of data in which we
 226      * are interested.  l is the total length of everything prior to this iovec.
 227      * j is the number of bytes we can safely copy out of this iovec.
 228      * offset only applies to the first iovec.
 229      */
 230     r = resid;
 231     while ((resid > 0) && (i < RX_MAXWVECS)) {
 232         if (i >= packet->niovecs)
 233             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 234                 break;
 235
 236         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 237         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 238         memcpy(b, in, j);
 239         resid -= j;
 240         in += j;
 241         l += packet->wirevec[i].iov_len;
 242         offset = l;
 243         i++;
 244     }
 245
 246     return (resid ? (r - resid) : r);
 247 }
 248
 249 int
 250 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
 251 {
 252     register struct rx_packet *p, *np;
 253
 254     num_pkts = AllocPacketBufs(class, num_pkts, q);
 255
 256     for (queue_Scan(q, p, np, rx_packet)) {
 257         RX_PACKET_IOV_FULLINIT(p);
 258     }
 259
 260     return num_pkts;
 261 }
 262
 263 #ifdef RX_ENABLE_TSFPQ
 264 static int
 265 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 266 {
 267     register struct rx_packet *c;
 268     register struct rx_ts_info_t * rx_ts_info;
 269     int transfer, alloc;
 270     SPLVAR;
 271
 272     RX_TS_INFO_GET(rx_ts_info);
 273
 274     transfer = num_pkts - rx_ts_info->_FPQ.len;
 275     if (transfer > 0) {
 276         NETPRI;
 277         MUTEX_ENTER(&rx_freePktQ_lock);
 278
 279         if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
 280             transfer += rx_TSFPQGlobSize;
 281         } else if (transfer <= rx_nFreePackets) {
 282             transfer = rx_nFreePackets;
 283         } else {
 284             /* alloc enough for us, plus a few globs for other threads */
 285             alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
 286             rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
 287             transfer += rx_TSFPQGlobSize;
 288         }
 289
 290         RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
 291
 292         MUTEX_EXIT(&rx_freePktQ_lock);
 293         USERPRI;
 294     }
 295
 296     RX_TS_FPQ_CHECKOUT2(rx_ts_info, num_pkts, q);
 297
 298     return num_pkts;
 299 }
 300 #else /* RX_ENABLE_TSFPQ */
 301 static int
 302 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
 303 {
 304     struct rx_packet *c;
 305     int i, overq = 0;
 306     SPLVAR;
 307
 308     NETPRI;
 309
 310     MUTEX_ENTER(&rx_freePktQ_lock);
 311
 312 #ifdef KERNEL
 313     for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
 314          num_pkts--, overq++);
 315
 316     if (overq) {
 317         rxi_NeedMorePackets = TRUE;
 318         MUTEX_ENTER(&rx_stats_mutex);
 319         switch (class) {
 320         case RX_PACKET_CLASS_RECEIVE:
 321             rx_stats.receivePktAllocFailures++;
 322             break;
 323         case RX_PACKET_CLASS_SEND:
 324             rx_stats.sendPktAllocFailures++;
 325             break;
 326         case RX_PACKET_CLASS_SPECIAL:
 327             rx_stats.specialPktAllocFailures++;
 328             break;
 329         case RX_PACKET_CLASS_RECV_CBUF:
 330             rx_stats.receiveCbufPktAllocFailures++;
 331             break;
 332         case RX_PACKET_CLASS_SEND_CBUF:
 333             rx_stats.sendCbufPktAllocFailures++;
 334             break;
 335         }
 336         MUTEX_EXIT(&rx_stats_mutex);
 337     }
 338
 339     if (rx_nFreePackets < num_pkts)
 340         num_pkts = rx_nFreePackets;
 341
 342     if (!num_pkts) {
 343         rxi_NeedMorePackets = TRUE;
 344         goto done;
 345     }
 346 #else /* KERNEL */
 347     if (rx_nFreePackets < num_pkts) {
 348         rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
 349     }
 350 #endif /* KERNEL */
 351
 352     for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
 353          i < num_pkts;
 354          i++, c=queue_Next(c, rx_packet)) {
 355         RX_FPQ_MARK_USED(c);
 356     }
 357
 358     queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
 359
 360     rx_nFreePackets -= num_pkts;
 361
 362 #ifdef KERNEL
 363   done:
 364 #endif
 365     MUTEX_EXIT(&rx_freePktQ_lock);
 366
 367     USERPRI;
 368     return num_pkts;
 369 }
 370 #endif /* RX_ENABLE_TSFPQ */
 371
 372 /*
 373  * Free a packet currently used as a continuation buffer
 374  */
 375 #ifdef RX_ENABLE_TSFPQ
 376 /* num_pkts=0 means queue length is unknown */
 377 int
 378 rxi_FreePackets(int num_pkts, struct rx_queue * q)
 379 {
 380     register struct rx_ts_info_t * rx_ts_info;
 381     register struct rx_packet *c, *nc;
 382     SPLVAR;
 383
 384     if (!num_pkts) {
 385         queue_Count(q, c, nc, rx_packet, num_pkts);
 386         if (!num_pkts)
 387             return 0;
 388     }
 389
 390     RX_TS_INFO_GET(rx_ts_info);
 391     RX_TS_FPQ_CHECKIN2(rx_ts_info, num_pkts, q);
 392
 393     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 394         NETPRI;
 395         MUTEX_ENTER(&rx_freePktQ_lock);
 396
 397         RX_TS_FPQ_LTOG(rx_ts_info);
 398
 399         /* Wakeup anyone waiting for packets */
 400         rxi_PacketsUnWait();
 401
 402         MUTEX_EXIT(&rx_freePktQ_lock);
 403         USERPRI;
 404     }
 405
 406     return num_pkts;
 407 }
 408 #else /* RX_ENABLE_TSFPQ */
 409 /* num_pkts=0 means queue length is unknown */
 410 int
 411 rxi_FreePackets(int num_pkts, struct rx_queue *q)
 412 {
 413     register struct rx_packet *p, *np;
 414     SPLVAR;
 415
 416     if (!num_pkts) {
 417         for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
 418             RX_FPQ_MARK_FREE(p);
 419         }
 420         if (!num_pkts)
 421             return 0;
 422     } else {
 423         for (queue_Scan(q, p, np, rx_packet)) {
 424             RX_FPQ_MARK_FREE(p);
 425         }
 426     }
 427
 428     NETPRI;
 429     MUTEX_ENTER(&rx_freePktQ_lock);
 430
 431     queue_SpliceAppend(&rx_freePacketQueue, q);
 432     rx_nFreePackets += num_pkts;
 433
 434     /* Wakeup anyone waiting for packets */
 435     rxi_PacketsUnWait();
 436
 437     MUTEX_EXIT(&rx_freePktQ_lock);
 438     USERPRI;
 439
 440     return num_pkts;
 441 }
 442 #endif /* RX_ENABLE_TSFPQ */
 443
 444 /* this one is kind of awful.
 445  * In rxkad, the packet has been all shortened, and everything, ready for
 446  * sending.  All of a sudden, we discover we need some of that space back.
 447  * This isn't terribly general, because it knows that the packets are only
 448  * rounded up to the EBS (userdata + security header).
 449  */
 450 int
 451 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 452 {
 453     int i;
 454     i = p->niovecs - 1;
 455     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 456         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 457             p->wirevec[i].iov_len += nb;
 458             return 0;
 459         }
 460     } else {
 461         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 462             p->wirevec[i].iov_len += nb;
 463             return 0;
 464         }
 465     }
 466
 467     return 0;
 468 }
 469
 470 /* get sufficient space to store nb bytes of data (or more), and hook
 471  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 472  * returns the number of bytes >0 which it failed to come up with.
 473  * Don't need to worry about locking on packet, since only
 474  * one thread can manipulate one at a time. Locking on continution
 475  * packets is handled by AllocPacketBufs */
 476 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 477 int
 478 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 479 {
 480     int i, nv;
 481     struct rx_queue q;
 482     register struct rx_packet *cb, *ncb;
 483
 484     /* compute the number of cbuf's we need */
 485     nv = nb / RX_CBUFFERSIZE;
 486     if ((nv * RX_CBUFFERSIZE) < nb)
 487         nv++;
 488     if ((nv + p->niovecs) > RX_MAXWVECS)
 489         nv = RX_MAXWVECS - p->niovecs;
 490     if (nv < 1)
 491         return nb;
 492
 493     /* allocate buffers */
 494     queue_Init(&q);
 495     nv = AllocPacketBufs(class, nv, &q);
 496
 497     /* setup packet iovs */
 498     for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
 499         queue_Remove(cb);
 500         p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 501         p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 502     }
 503
 504     nb -= (nv * RX_CBUFFERSIZE);
 505     p->length += (nv * RX_CBUFFERSIZE);
 506     p->niovecs += nv;
 507
 508     return nb;
 509 }
 510
 511 /* Add more packet buffers */
 512 #ifdef RX_ENABLE_TSFPQ
 513 void
 514 rxi_MorePackets(int apackets)
 515 {
 516     struct rx_packet *p, *e;
 517     register struct rx_ts_info_t * rx_ts_info;
 518     int getme;
 519     SPLVAR;
 520
 521     getme = apackets * sizeof(struct rx_packet);
 522     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 523
 524     PIN(p, getme);              /* XXXXX */
 525     memset((char *)p, 0, getme);
 526     RX_TS_INFO_GET(rx_ts_info);
 527
 528     for (e = p + apackets; p < e; p++) {
 529         RX_PACKET_IOV_INIT(p);
 530         p->niovecs = 2;
 531
 532         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 533     }
 534     rx_ts_info->_FPQ.delta += apackets;
 535
 536     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 537         NETPRI;
 538         MUTEX_ENTER(&rx_freePktQ_lock);
 539
 540         RX_TS_FPQ_LTOG(rx_ts_info);
 541         rxi_NeedMorePackets = FALSE;
 542         rxi_PacketsUnWait();
 543
 544         MUTEX_EXIT(&rx_freePktQ_lock);
 545         USERPRI;
 546     }
 547 }
 548 #else /* RX_ENABLE_TSFPQ */
 549 void
 550 rxi_MorePackets(int apackets)
 551 {
 552     struct rx_packet *p, *e;
 553     int getme;
 554     SPLVAR;
 555
 556     getme = apackets * sizeof(struct rx_packet);
 557     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 558
 559     PIN(p, getme);              /* XXXXX */
 560     memset((char *)p, 0, getme);
 561     NETPRI;
 562     MUTEX_ENTER(&rx_freePktQ_lock);
 563
 564     for (e = p + apackets; p < e; p++) {
 565         RX_PACKET_IOV_INIT(p);
 566         p->flags |= RX_PKTFLAG_FREE;
 567         p->niovecs = 2;
 568
 569         queue_Append(&rx_freePacketQueue, p);
 570     }
 571     rx_nFreePackets += apackets;
 572     rxi_NeedMorePackets = FALSE;
 573     rxi_PacketsUnWait();
 574
 575     MUTEX_EXIT(&rx_freePktQ_lock);
 576     USERPRI;
 577 }
 578 #endif /* RX_ENABLE_TSFPQ */
 579
 580 #ifdef RX_ENABLE_TSFPQ
 581 void
 582 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
 583 {
 584     struct rx_packet *p, *e;
 585     register struct rx_ts_info_t * rx_ts_info;
 586     int getme;
 587     SPLVAR;
 588
 589     getme = apackets * sizeof(struct rx_packet);
 590     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 591
 592     PIN(p, getme);              /* XXXXX */
 593     memset((char *)p, 0, getme);
 594     RX_TS_INFO_GET(rx_ts_info);
 595
 596     for (e = p + apackets; p < e; p++) {
 597         RX_PACKET_IOV_INIT(p);
 598         p->niovecs = 2;
 599
 600         RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 601     }
 602     rx_ts_info->_FPQ.delta += apackets;
 603
 604     if (flush_global &&
 605         (num_keep_local < apackets)) {
 606         NETPRI;
 607         MUTEX_ENTER(&rx_freePktQ_lock);
 608
 609         RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
 610         rxi_NeedMorePackets = FALSE;
 611         rxi_PacketsUnWait();
 612
 613         MUTEX_EXIT(&rx_freePktQ_lock);
 614         USERPRI;
 615     }
 616 }
 617 #endif /* RX_ENABLE_TSFPQ */
 618
 619 #ifndef KERNEL
 620 /* Add more packet buffers */
 621 void
 622 rxi_MorePacketsNoLock(int apackets)
 623 {
 624     struct rx_packet *p, *e;
 625     int getme;
 626
 627     /* allocate enough packets that 1/4 of the packets will be able
 628      * to hold maximal amounts of data */
 629     apackets += (apackets / 4)
 630         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 631     getme = apackets * sizeof(struct rx_packet);
 632     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 633
 634     memset((char *)p, 0, getme);
 635
 636     for (e = p + apackets; p < e; p++) {
 637         RX_PACKET_IOV_INIT(p);
 638         p->flags |= RX_PKTFLAG_FREE;
 639         p->niovecs = 2;
 640
 641         queue_Append(&rx_freePacketQueue, p);
 642     }
 643
 644     rx_nFreePackets += apackets;
 645 #ifdef RX_ENABLE_TSFPQ
 646     /* TSFPQ patch also needs to keep track of total packets */
 647     MUTEX_ENTER(&rx_stats_mutex);
 648     rx_nPackets += apackets;
 649     RX_TS_FPQ_COMPUTE_LIMITS;
 650     MUTEX_EXIT(&rx_stats_mutex);
 651 #endif /* RX_ENABLE_TSFPQ */
 652     rxi_NeedMorePackets = FALSE;
 653     rxi_PacketsUnWait();
 654 }
 655 #endif /* !KERNEL */
 656
 657 void
 658 rxi_FreeAllPackets(void)
 659 {
 660     /* must be called at proper interrupt level, etcetera */
 661     /* MTUXXX need to free all Packets */
 662     osi_Free(rx_mallocedP,
 663              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 664     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 665 }
 666
 667 #ifdef RX_ENABLE_TSFPQ
 668 void
 669 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
 670 {
 671     register struct rx_ts_info_t * rx_ts_info;
 672     register int xfer;
 673     SPLVAR;
 674
 675     RX_TS_INFO_GET(rx_ts_info);
 676
 677     if (num_keep_local != rx_ts_info->_FPQ.len) {
 678         NETPRI;
 679         MUTEX_ENTER(&rx_freePktQ_lock);
 680         if (num_keep_local < rx_ts_info->_FPQ.len) {
 681             xfer = rx_ts_info->_FPQ.len - num_keep_local;
 682             RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
 683             rxi_PacketsUnWait();
 684         } else {
 685             xfer = num_keep_local - rx_ts_info->_FPQ.len;
 686             if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
 687                 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
 688             if (rx_nFreePackets < xfer) {
 689                 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
 690             }
 691             RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
 692         }
 693         MUTEX_EXIT(&rx_freePktQ_lock);
 694         USERPRI;
 695     }
 696 }
 697
 698 void
 699 rxi_FlushLocalPacketsTSFPQ(void)
 700 {
 701     rxi_AdjustLocalPacketsTSFPQ(0, 0);
 702 }
 703 #endif /* RX_ENABLE_TSFPQ */
 704
 705 /* Allocate more packets iff we need more continuation buffers */
 706 /* In kernel, can't page in memory with interrupts disabled, so we
 707  * don't use the event mechanism. */
 708 void
 709 rx_CheckPackets(void)
 710 {
 711     if (rxi_NeedMorePackets) {
 712         rxi_MorePackets(rx_initSendWindow);
 713     }
 714 }
 715
 716 /* In the packet freeing routine below, the assumption is that
 717    we want all of the packets to be used equally frequently, so that we
 718    don't get packet buffers paging out.  It would be just as valid to
 719    assume that we DO want them to page out if not many are being used.
 720    In any event, we assume the former, and append the packets to the end
 721    of the free list.  */
 722 /* This explanation is bogus.  The free list doesn't remain in any kind of
 723    useful order for afs_int32: the packets in use get pretty much randomly scattered
 724    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 725    must be stored so that packets which are adjacent in memory are adjacent in the
 726    free list.  An array springs rapidly to mind.
 727    */
 728
 729 /* Actually free the packet p. */
 730 #ifdef RX_ENABLE_TSFPQ
 731 void
 732 rxi_FreePacketNoLock(struct rx_packet *p)
 733 {
 734     register struct rx_ts_info_t * rx_ts_info;
 735     dpf(("Free %lx\n", (unsigned long)p));
 736
 737     RX_TS_INFO_GET(rx_ts_info);
 738     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 739     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 740         RX_TS_FPQ_LTOG(rx_ts_info);
 741     }
 742 }
 743 #else /* RX_ENABLE_TSFPQ */
 744 void
 745 rxi_FreePacketNoLock(struct rx_packet *p)
 746 {
 747     dpf(("Free %lx\n", (unsigned long)p));
 748
 749     RX_FPQ_MARK_FREE(p);
 750     rx_nFreePackets++;
 751     queue_Append(&rx_freePacketQueue, p);
 752 }
 753 #endif /* RX_ENABLE_TSFPQ */
 754
 755 #ifdef RX_ENABLE_TSFPQ
 756 void
 757 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
 758 {
 759     register struct rx_ts_info_t * rx_ts_info;
 760     dpf(("Free %lx\n", (unsigned long)p));
 761
 762     RX_TS_INFO_GET(rx_ts_info);
 763     RX_TS_FPQ_CHECKIN(rx_ts_info,p);
 764
 765     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 766         NETPRI;
 767         MUTEX_ENTER(&rx_freePktQ_lock);
 768
 769         RX_TS_FPQ_LTOG(rx_ts_info);
 770
 771         /* Wakeup anyone waiting for packets */
 772         rxi_PacketsUnWait();
 773
 774         MUTEX_EXIT(&rx_freePktQ_lock);
 775         USERPRI;
 776     }
 777 }
 778 #endif /* RX_ENABLE_TSFPQ */
 779
 780 int
 781 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 782 {
 783     struct iovec *iov, *end;
 784
 785     if (first != 1)             /* MTUXXX */
 786         osi_Panic("FreeDataBufs 1: first must be 1");
 787     iov = &p->wirevec[1];
 788     end = iov + (p->niovecs - 1);
 789     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 790         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 791     for (iov++; iov < end; iov++) {
 792         if (!iov->iov_base)
 793             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 794         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 795     }
 796     p->length = 0;
 797     p->niovecs = 0;
 798
 799     return 0;
 800 }
 801
 802 #ifdef RX_ENABLE_TSFPQ
 803 int
 804 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, int first, int flush_global)
 805 {
 806     struct iovec *iov, *end;
 807     register struct rx_ts_info_t * rx_ts_info;
 808
 809     RX_TS_INFO_GET(rx_ts_info);
 810
 811     if (first != 1)             /* MTUXXX */
 812         osi_Panic("FreeDataBufs 1: first must be 1");
 813     iov = &p->wirevec[1];
 814     end = iov + (p->niovecs - 1);
 815     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 816         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 817     for (iov++; iov < end; iov++) {
 818         if (!iov->iov_base)
 819             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 820         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 821     }
 822     p->length = 0;
 823     p->niovecs = 0;
 824
 825     if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
 826         NETPRI;
 827         MUTEX_ENTER(&rx_freePktQ_lock);
 828
 829         RX_TS_FPQ_LTOG(rx_ts_info);
 830
 831         /* Wakeup anyone waiting for packets */
 832         rxi_PacketsUnWait();
 833
 834         MUTEX_EXIT(&rx_freePktQ_lock);
 835         USERPRI;
 836     }
 837     return 0;
 838 }
 839 #endif /* RX_ENABLE_TSFPQ */
 840
 841 int rxi_nBadIovecs = 0;
 842
 843 /* rxi_RestoreDataBufs
 844  *
 845  * Restore the correct sizes to the iovecs. Called when reusing a packet
 846  * for reading off the wire.
 847  */
 848 void
 849 rxi_RestoreDataBufs(struct rx_packet *p)
 850 {
 851     int i;
 852     struct iovec *iov = &p->wirevec[2];
 853
 854     RX_PACKET_IOV_INIT(p);
 855
 856     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 857         if (!iov->iov_base) {
 858             rxi_nBadIovecs++;
 859             p->niovecs = i;
 860             break;
 861         }
 862         iov->iov_len = RX_CBUFFERSIZE;
 863     }
 864 }
 865
 866 #ifdef RX_ENABLE_TSFPQ
 867 int
 868 rxi_TrimDataBufs(struct rx_packet *p, int first)
 869 {
 870     int length;
 871     struct iovec *iov, *end;
 872     register struct rx_ts_info_t * rx_ts_info;
 873     SPLVAR;
 874
 875     if (first != 1)
 876         osi_Panic("TrimDataBufs 1: first must be 1");
 877
 878     /* Skip over continuation buffers containing message data */
 879     iov = &p->wirevec[2];
 880     end = iov + (p->niovecs - 2);
 881     length = p->length - p->wirevec[1].iov_len;
 882     for (; iov < end && length > 0; iov++) {
 883         if (!iov->iov_base)
 884             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 885         length -= iov->iov_len;
 886     }
 887
 888     /* iov now points to the first empty data buffer. */
 889     if (iov >= end)
 890         return 0;
 891
 892     RX_TS_INFO_GET(rx_ts_info);
 893     for (; iov < end; iov++) {
 894         if (!iov->iov_base)
 895             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 896         RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
 897         p->niovecs--;
 898     }
 899     if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
 900         NETPRI;
 901         MUTEX_ENTER(&rx_freePktQ_lock);
 902
 903         RX_TS_FPQ_LTOG(rx_ts_info);
 904         rxi_PacketsUnWait();
 905
 906         MUTEX_EXIT(&rx_freePktQ_lock);
 907         USERPRI;
 908     }
 909
 910     return 0;
 911 }
 912 #else /* RX_ENABLE_TSFPQ */
 913 int
 914 rxi_TrimDataBufs(struct rx_packet *p, int first)
 915 {
 916     int length;
 917     struct iovec *iov, *end;
 918     SPLVAR;
 919
 920     if (first != 1)
 921         osi_Panic("TrimDataBufs 1: first must be 1");
 922
 923     /* Skip over continuation buffers containing message data */
 924     iov = &p->wirevec[2];
 925     end = iov + (p->niovecs - 2);
 926     length = p->length - p->wirevec[1].iov_len;
 927     for (; iov < end && length > 0; iov++) {
 928         if (!iov->iov_base)
 929             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 930         length -= iov->iov_len;
 931     }
 932
 933     /* iov now points to the first empty data buffer. */
 934     if (iov >= end)
 935         return 0;
 936
 937     NETPRI;
 938     MUTEX_ENTER(&rx_freePktQ_lock);
 939
 940     for (; iov < end; iov++) {
 941         if (!iov->iov_base)
 942             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 943         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 944         p->niovecs--;
 945     }
 946     rxi_PacketsUnWait();
 947
 948     MUTEX_EXIT(&rx_freePktQ_lock);
 949     USERPRI;
 950
 951     return 0;
 952 }
 953 #endif /* RX_ENABLE_TSFPQ */
 954
 955 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 956  * remove it yourself first if you call this routine. */
 957 #ifdef RX_ENABLE_TSFPQ
 958 void
 959 rxi_FreePacket(struct rx_packet *p)
 960 {
 961     rxi_FreeDataBufsTSFPQ(p, 1, 0);
 962     rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
 963 }
 964 #else /* RX_ENABLE_TSFPQ */
 965 void
 966 rxi_FreePacket(struct rx_packet *p)
 967 {
 968     SPLVAR;
 969
 970     NETPRI;
 971     MUTEX_ENTER(&rx_freePktQ_lock);
 972
 973     rxi_FreeDataBufsNoLock(p, 1);
 974     rxi_FreePacketNoLock(p);
 975     /* Wakeup anyone waiting for packets */
 976     rxi_PacketsUnWait();
 977
 978     MUTEX_EXIT(&rx_freePktQ_lock);
 979     USERPRI;
 980 }
 981 #endif /* RX_ENABLE_TSFPQ */
 982
 983 /* rxi_AllocPacket sets up p->length so it reflects the number of
 984  * bytes in the packet at this point, **not including** the header.
 985  * The header is absolutely necessary, besides, this is the way the
 986  * length field is usually used */
 987 #ifdef RX_ENABLE_TSFPQ
 988 struct rx_packet *
 989 rxi_AllocPacketNoLock(int class)
 990 {
 991     register struct rx_packet *p;
 992     register struct rx_ts_info_t * rx_ts_info;
 993
 994     RX_TS_INFO_GET(rx_ts_info);
 995
 996 #ifdef KERNEL
 997     if (rxi_OverQuota(class)) {
 998         rxi_NeedMorePackets = TRUE;
 999         MUTEX_ENTER(&rx_stats_mutex);
1000         switch (class) {
1001         case RX_PACKET_CLASS_RECEIVE:
1002             rx_stats.receivePktAllocFailures++;
1003             break;
1004         case RX_PACKET_CLASS_SEND:
1005             rx_stats.sendPktAllocFailures++;
1006             break;
1007         case RX_PACKET_CLASS_SPECIAL:
1008             rx_stats.specialPktAllocFailures++;
1009             break;
1010         case RX_PACKET_CLASS_RECV_CBUF:
1011             rx_stats.receiveCbufPktAllocFailures++;
1012             break;
1013         case RX_PACKET_CLASS_SEND_CBUF:
1014             rx_stats.sendCbufPktAllocFailures++;
1015             break;
1016         }
1017         MUTEX_EXIT(&rx_stats_mutex);
1018         return (struct rx_packet *)0;
1019     }
1020 #endif /* KERNEL */
1021
1022     MUTEX_ENTER(&rx_stats_mutex);
1023     rx_stats.packetRequests++;
1024     MUTEX_EXIT(&rx_stats_mutex);
1025
1026     if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1027
1028 #ifdef KERNEL
1029         if (queue_IsEmpty(&rx_freePacketQueue))
1030             osi_Panic("rxi_AllocPacket error");
1031 #else /* KERNEL */
1032         if (queue_IsEmpty(&rx_freePacketQueue))
1033             rxi_MorePacketsNoLock(rx_initSendWindow);
1034 #endif /* KERNEL */
1035
1036
1037         RX_TS_FPQ_GTOL(rx_ts_info);
1038     }
1039
1040     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1041
1042     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1043
1044
1045     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1046      * order to truncate outbound packets.  In the near future, may need
1047      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1048      */
1049     RX_PACKET_IOV_FULLINIT(p);
1050     return p;
1051 }
1052 #else /* RX_ENABLE_TSFPQ */
1053 struct rx_packet *
1054 rxi_AllocPacketNoLock(int class)
1055 {
1056     register struct rx_packet *p;
1057
1058 #ifdef KERNEL
1059     if (rxi_OverQuota(class)) {
1060         rxi_NeedMorePackets = TRUE;
1061         MUTEX_ENTER(&rx_stats_mutex);
1062         switch (class) {
1063         case RX_PACKET_CLASS_RECEIVE:
1064             rx_stats.receivePktAllocFailures++;
1065             break;
1066         case RX_PACKET_CLASS_SEND:
1067             rx_stats.sendPktAllocFailures++;
1068             break;
1069         case RX_PACKET_CLASS_SPECIAL:
1070             rx_stats.specialPktAllocFailures++;
1071             break;
1072         case RX_PACKET_CLASS_RECV_CBUF:
1073             rx_stats.receiveCbufPktAllocFailures++;
1074             break;
1075         case RX_PACKET_CLASS_SEND_CBUF:
1076             rx_stats.sendCbufPktAllocFailures++;
1077             break;
1078         }
1079         MUTEX_EXIT(&rx_stats_mutex);
1080         return (struct rx_packet *)0;
1081     }
1082 #endif /* KERNEL */
1083
1084     MUTEX_ENTER(&rx_stats_mutex);
1085     rx_stats.packetRequests++;
1086     MUTEX_EXIT(&rx_stats_mutex);
1087
1088 #ifdef KERNEL
1089     if (queue_IsEmpty(&rx_freePacketQueue))
1090         osi_Panic("rxi_AllocPacket error");
1091 #else /* KERNEL */
1092     if (queue_IsEmpty(&rx_freePacketQueue))
1093         rxi_MorePacketsNoLock(rx_initSendWindow);
1094 #endif /* KERNEL */
1095
1096     rx_nFreePackets--;
1097     p = queue_First(&rx_freePacketQueue, rx_packet);
1098     queue_Remove(p);
1099     RX_FPQ_MARK_USED(p);
1100
1101     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1102
1103
1104     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1105      * order to truncate outbound packets.  In the near future, may need
1106      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1107      */
1108     RX_PACKET_IOV_FULLINIT(p);
1109     return p;
1110 }
1111 #endif /* RX_ENABLE_TSFPQ */
1112
1113 #ifdef RX_ENABLE_TSFPQ
1114 struct rx_packet *
1115 rxi_AllocPacketTSFPQ(int class, int pull_global)
1116 {
1117     register struct rx_packet *p;
1118     register struct rx_ts_info_t * rx_ts_info;
1119
1120     RX_TS_INFO_GET(rx_ts_info);
1121
1122     MUTEX_ENTER(&rx_stats_mutex);
1123     rx_stats.packetRequests++;
1124     MUTEX_EXIT(&rx_stats_mutex);
1125
1126     if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1127         MUTEX_ENTER(&rx_freePktQ_lock);
1128
1129         if (queue_IsEmpty(&rx_freePacketQueue))
1130             rxi_MorePacketsNoLock(rx_initSendWindow);
1131
1132         RX_TS_FPQ_GTOL(rx_ts_info);
1133
1134         MUTEX_EXIT(&rx_freePktQ_lock);
1135     } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1136         return NULL;
1137     }
1138
1139     RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1140
1141     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1142
1143     /* have to do this here because rx_FlushWrite fiddles with the iovs in
1144      * order to truncate outbound packets.  In the near future, may need
1145      * to allocate bufs from a static pool here, and/or in AllocSendPacket
1146      */
1147     RX_PACKET_IOV_FULLINIT(p);
1148     return p;
1149 }
1150 #endif /* RX_ENABLE_TSFPQ */
1151
1152 #ifdef RX_ENABLE_TSFPQ
1153 struct rx_packet *
1154 rxi_AllocPacket(int class)
1155 {
1156     register struct rx_packet *p;
1157
1158     p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1159     return p;
1160 }
1161 #else /* RX_ENABLE_TSFPQ */
1162 struct rx_packet *
1163 rxi_AllocPacket(int class)
1164 {
1165     register struct rx_packet *p;
1166
1167     MUTEX_ENTER(&rx_freePktQ_lock);
1168     p = rxi_AllocPacketNoLock(class);
1169     MUTEX_EXIT(&rx_freePktQ_lock);
1170     return p;
1171 }
1172 #endif /* RX_ENABLE_TSFPQ */
1173
1174 /* This guy comes up with as many buffers as it {takes,can get} given
1175  * the MTU for this call. It also sets the packet length before
1176  * returning.  caution: this is often called at NETPRI
1177  * Called with call locked.
1178  */
1179 struct rx_packet *
1180 rxi_AllocSendPacket(register struct rx_call *call, int want)
1181 {
1182     register struct rx_packet *p = (struct rx_packet *)0;
1183     register int mud;
1184     register unsigned delta;
1185
1186     SPLVAR;
1187     mud = call->MTU - RX_HEADER_SIZE;
1188     delta =
1189         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1190         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1191
1192 #ifdef RX_ENABLE_TSFPQ
1193     if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1194         want += delta;
1195         want = MIN(want, mud);
1196
1197         if ((unsigned)want > p->length)
1198             (void)rxi_AllocDataBuf(p, (want - p->length),
1199                                    RX_PACKET_CLASS_SEND_CBUF);
1200
1201         if ((unsigned)p->length > mud)
1202             p->length = mud;
1203
1204         if (delta >= p->length) {
1205             rxi_FreePacket(p);
1206             p = NULL;
1207         } else {
1208             p->length -= delta;
1209         }
1210         return p;
1211     }
1212 #endif /* RX_ENABLE_TSFPQ */
1213
1214     while (!(call->error)) {
1215         MUTEX_ENTER(&rx_freePktQ_lock);
1216         /* if an error occurred, or we get the packet we want, we're done */
1217         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1218             MUTEX_EXIT(&rx_freePktQ_lock);
1219
1220             want += delta;
1221             want = MIN(want, mud);
1222
1223             if ((unsigned)want > p->length)
1224                 (void)rxi_AllocDataBuf(p, (want - p->length),
1225                                        RX_PACKET_CLASS_SEND_CBUF);
1226
1227             if ((unsigned)p->length > mud)
1228                 p->length = mud;
1229
1230             if (delta >= p->length) {
1231                 rxi_FreePacket(p);
1232                 p = NULL;
1233             } else {
1234                 p->length -= delta;
1235             }
1236             break;
1237         }
1238
1239         /* no error occurred, and we didn't get a packet, so we sleep.
1240          * At this point, we assume that packets will be returned
1241          * sooner or later, as packets are acknowledged, and so we
1242          * just wait.  */
1243         NETPRI;
1244         call->flags |= RX_CALL_WAIT_PACKETS;
1245         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1246         MUTEX_EXIT(&call->lock);
1247         rx_waitingForPackets = 1;
1248
1249 #ifdef  RX_ENABLE_LOCKS
1250         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1251 #else
1252         osi_rxSleep(&rx_waitingForPackets);
1253 #endif
1254         MUTEX_EXIT(&rx_freePktQ_lock);
1255         MUTEX_ENTER(&call->lock);
1256         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1257         call->flags &= ~RX_CALL_WAIT_PACKETS;
1258         USERPRI;
1259     }
1260
1261     return p;
1262 }
1263
1264 #ifndef KERNEL
1265
1266 /* count the number of used FDs */
1267 static int
1268 CountFDs(register int amax)
1269 {
1270     struct stat tstat;
1271     register int i, code;
1272     register int count;
1273
1274     count = 0;
1275     for (i = 0; i < amax; i++) {
1276         code = fstat(i, &tstat);
1277         if (code == 0)
1278             count++;
1279     }
1280     return count;
1281 }
1282
1283 #else /* KERNEL */
1284
1285 #define CountFDs(amax) amax
1286
1287 #endif /* KERNEL */
1288
1289 #if !defined(KERNEL) || defined(UKERNEL)
1290
1291 /* This function reads a single packet from the interface into the
1292  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
1293  * (host,port) of the sender are stored in the supplied variables, and
1294  * the data length of the packet is stored in the packet structure.
1295  * The header is decoded. */
1296 int
1297 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
1298                u_short * port)
1299 {
1300     struct sockaddr_in from;
1301     int nbytes;
1302     afs_int32 rlen;
1303     register afs_int32 tlen, savelen;
1304     struct msghdr msg;
1305     rx_computelen(p, tlen);
1306     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
1307
1308     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
1309     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
1310                                  * it once in order to avoid races.  */
1311     tlen = rlen - tlen;
1312     if (tlen > 0) {
1313         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1314         if (tlen > 0) {
1315             tlen = rlen - tlen;
1316         } else
1317             tlen = rlen;
1318     } else
1319         tlen = rlen;
1320
1321     /* Extend the last iovec for padding, it's just to make sure that the
1322      * read doesn't return more data than we expect, and is done to get around
1323      * our problems caused by the lack of a length field in the rx header.
1324      * Use the extra buffer that follows the localdata in each packet
1325      * structure. */
1326     savelen = p->wirevec[p->niovecs - 1].iov_len;
1327     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1328
1329     memset((char *)&msg, 0, sizeof(msg));
1330     msg.msg_name = (char *)&from;
1331     msg.msg_namelen = sizeof(struct sockaddr_in);
1332     msg.msg_iov = p->wirevec;
1333     msg.msg_iovlen = p->niovecs;
1334     nbytes = rxi_Recvmsg(socket, &msg, 0);
1335
1336     /* restore the vec to its correct state */
1337     p->wirevec[p->niovecs - 1].iov_len = savelen;
1338
1339     p->length = (nbytes - RX_HEADER_SIZE);
1340     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
1341         if (nbytes > 0)
1342             rxi_MorePackets(rx_initSendWindow);
1343         else if (nbytes < 0 && errno == EWOULDBLOCK) {
1344             MUTEX_ENTER(&rx_stats_mutex);
1345             rx_stats.noPacketOnRead++;
1346             MUTEX_EXIT(&rx_stats_mutex);
1347         } else {
1348             MUTEX_ENTER(&rx_stats_mutex);
1349             rx_stats.bogusPacketOnRead++;
1350             rx_stats.bogusHost = from.sin_addr.s_addr;
1351             MUTEX_EXIT(&rx_stats_mutex);
1352             dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
1353                  from.sin_port, nbytes));
1354         }
1355         return 0;
1356     } else {
1357         /* Extract packet header. */
1358         rxi_DecodePacketHeader(p);
1359
1360         *host = from.sin_addr.s_addr;
1361         *port = from.sin_port;
1362         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1363             struct rx_peer *peer;
1364             MUTEX_ENTER(&rx_stats_mutex);
1365             rx_stats.packetsRead[p->header.type - 1]++;
1366             MUTEX_EXIT(&rx_stats_mutex);
1367             /*
1368              * Try to look up this peer structure.  If it doesn't exist,
1369              * don't create a new one -
1370              * we don't keep count of the bytes sent/received if a peer
1371              * structure doesn't already exist.
1372              *
1373              * The peer/connection cleanup code assumes that there is 1 peer
1374              * per connection.  If we actually created a peer structure here
1375              * and this packet was an rxdebug packet, the peer structure would
1376              * never be cleaned up.
1377              */
1378             peer = rxi_FindPeer(*host, *port, 0, 0);
1379             /* Since this may not be associated with a connection,
1380              * it may have no refCount, meaning we could race with
1381              * ReapConnections
1382              */
1383             if (peer && (peer->refCount > 0)) {
1384                 MUTEX_ENTER(&peer->peer_lock);
1385                 hadd32(peer->bytesReceived, p->length);
1386                 MUTEX_EXIT(&peer->peer_lock);
1387             }
1388         }
1389
1390         /* Free any empty packet buffers at the end of this packet */
1391         rxi_TrimDataBufs(p, 1);
1392
1393         return 1;
1394     }
1395 }
1396
1397 #endif /* !KERNEL || UKERNEL */
1398
1399 /* This function splits off the first packet in a jumbo packet.
1400  * As of AFS 3.5, jumbograms contain more than one fixed size
1401  * packet, and the RX_JUMBO_PACKET flag is set in all but the
1402  * last packet header. All packets (except the last) are padded to
1403  * fall on RX_CBUFFERSIZE boundaries.
1404  * HACK: We store the length of the first n-1 packets in the
1405  * last two pad bytes. */
1406
1407 struct rx_packet *
1408 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1409                      int first)
1410 {
1411     struct rx_packet *np;
1412     struct rx_jumboHeader *jp;
1413     int niov, i;
1414     struct iovec *iov;
1415     int length;
1416     afs_uint32 temp;
1417
1418     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1419      * bytes in length. All but the first packet are preceded by
1420      * an abbreviated four byte header. The length of the last packet
1421      * is calculated from the size of the jumbogram. */
1422     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1423
1424     if ((int)p->length < length) {
1425         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1426         return NULL;
1427     }
1428     niov = p->niovecs - 2;
1429     if (niov < 1) {
1430         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1431         return NULL;
1432     }
1433     iov = &p->wirevec[2];
1434     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1435
1436     /* Get a pointer to the abbreviated packet header */
1437     jp = (struct rx_jumboHeader *)
1438         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1439
1440     /* Set up the iovecs for the next packet */
1441     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1442     np->wirevec[0].iov_len = sizeof(struct rx_header);
1443     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1444     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1445     np->niovecs = niov + 1;
1446     for (i = 2, iov++; i <= niov; i++, iov++) {
1447         np->wirevec[i] = *iov;
1448     }
1449     np->length = p->length - length;
1450     p->length = RX_JUMBOBUFFERSIZE;
1451     p->niovecs = 2;
1452
1453     /* Convert the jumbo packet header to host byte order */
1454     temp = ntohl(*(afs_uint32 *) jp);
1455     jp->flags = (u_char) (temp >> 24);
1456     jp->cksum = (u_short) (temp);
1457
1458     /* Fill in the packet header */
1459     np->header = p->header;
1460     np->header.serial = p->header.serial + 1;
1461     np->header.seq = p->header.seq + 1;
1462     np->header.flags = jp->flags;
1463     np->header.spare = jp->cksum;
1464
1465     return np;
1466 }
1467
1468 #ifndef KERNEL
1469 /* Send a udp datagram */
1470 int
1471 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1472             int length, int istack)
1473 {
1474     struct msghdr msg;
1475         int ret;
1476
1477     memset(&msg, 0, sizeof(msg));
1478     msg.msg_iov = dvec;
1479     msg.msg_iovlen = nvecs;
1480     msg.msg_name = addr;
1481     msg.msg_namelen = sizeof(struct sockaddr_in);
1482
1483     ret = rxi_Sendmsg(socket, &msg, 0);
1484
1485     return ret;
1486 }
1487 #elif !defined(UKERNEL)
1488 /*
1489  * message receipt is done in rxk_input or rx_put.
1490  */
1491
1492 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1493 /*
1494  * Copy an mblock to the contiguous area pointed to by cp.
1495  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1496  * but it doesn't really.
1497  * Returns the number of bytes not transferred.
1498  * The message is NOT changed.
1499  */
1500 static int
1501 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1502 {
1503     register int n;
1504
1505     for (; mp && len > 0; mp = mp->b_cont) {
1506         if (mp->b_datap->db_type != M_DATA) {
1507             return -1;
1508         }
1509         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1510         memcpy(cp, (char *)mp->b_rptr, n);
1511         cp += n;
1512         len -= n;
1513         mp->b_rptr += n;
1514     }
1515     return (len);
1516 }
1517
1518 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1519  * but it doesn't really.
1520  * This sucks, anyway, do it like m_cpy.... below
1521  */
1522 static int
1523 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1524            int niovs)
1525 {
1526     register int m, n, o, t, i;
1527
1528     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1529         if (mp->b_datap->db_type != M_DATA) {
1530             return -1;
1531         }
1532         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1533         len -= n;
1534         while (n) {
1535             if (!t) {
1536                 o = 0;
1537                 i++;
1538                 t = iovs[i].iov_len;
1539             }
1540             m = MIN(n, t);
1541             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1542             mp->b_rptr += m;
1543             o += m;
1544             t -= m;
1545             n -= m;
1546         }
1547     }
1548     return (len);
1549 }
1550
1551 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1552 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1553 #else
1554 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1555 static int
1556 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1557 {
1558     caddr_t p1, p2;
1559     unsigned int l1, l2, i, t;
1560
1561     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1562         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1563
1564     while (off && m)
1565         if (m->m_len <= off) {
1566             off -= m->m_len;
1567             m = m->m_next;
1568             continue;
1569         } else
1570             break;
1571
1572     if (m == NULL)
1573         return len;
1574
1575     p1 = mtod(m, caddr_t) + off;
1576     l1 = m->m_len - off;
1577     i = 0;
1578     p2 = iovs[0].iov_base;
1579     l2 = iovs[0].iov_len;
1580
1581     while (len) {
1582         t = MIN(l1, MIN(l2, (unsigned int)len));
1583         memcpy(p2, p1, t);
1584         p1 += t;
1585         p2 += t;
1586         l1 -= t;
1587         l2 -= t;
1588         len -= t;
1589         if (!l1) {
1590             m = m->m_next;
1591             if (!m)
1592                 break;
1593             p1 = mtod(m, caddr_t);
1594             l1 = m->m_len;
1595         }
1596         if (!l2) {
1597             if (++i >= niovs)
1598                 break;
1599             p2 = iovs[i].iov_base;
1600             l2 = iovs[i].iov_len;
1601         }
1602
1603     }
1604
1605     return len;
1606 }
1607 #endif /* LINUX */
1608 #endif /* AFS_SUN5_ENV */
1609
1610 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1611 int
1612 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1613 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1614      mblk_t *amb;
1615 #else
1616      struct mbuf *amb;
1617 #endif
1618      void (*free) ();
1619      struct rx_packet *phandle;
1620      int hdr_len, data_len;
1621 {
1622     register int code;
1623
1624     code =
1625         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1626                      phandle->niovecs);
1627     (*free) (amb);
1628
1629     return code;
1630 }
1631 #endif /* LINUX */
1632 #endif /*KERNEL && !UKERNEL */
1633
1634
1635 /* send a response to a debug packet */
1636
1637 struct rx_packet *
1638 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1639                        afs_int32 ahost, short aport, int istack)
1640 {
1641     struct rx_debugIn tin;
1642     afs_int32 tl;
1643     struct rx_serverQueueEntry *np, *nqe;
1644
1645     /*
1646      * Only respond to client-initiated Rx debug packets,
1647      * and clear the client flag in the response.
1648      */
1649     if (ap->header.flags & RX_CLIENT_INITIATED) {
1650         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1651         rxi_EncodePacketHeader(ap);
1652     } else {
1653         return ap;
1654     }
1655
1656     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1657     /* all done with packet, now set length to the truth, so we can
1658      * reuse this packet */
1659     rx_computelen(ap, ap->length);
1660
1661     tin.type = ntohl(tin.type);
1662     tin.index = ntohl(tin.index);
1663     switch (tin.type) {
1664     case RX_DEBUGI_GETSTATS:{
1665             struct rx_debugStats tstat;
1666
1667             /* get basic stats */
1668             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1669             tstat.version = RX_DEBUGI_VERSION;
1670 #ifndef RX_ENABLE_LOCKS
1671             tstat.waitingForPackets = rx_waitingForPackets;
1672 #endif
1673             MUTEX_ENTER(&rx_serverPool_lock);
1674             tstat.nFreePackets = htonl(rx_nFreePackets);
1675             tstat.callsExecuted = htonl(rxi_nCalls);
1676             tstat.packetReclaims = htonl(rx_packetReclaims);
1677             tstat.usedFDs = CountFDs(64);
1678             tstat.nWaiting = htonl(rx_nWaiting);
1679             tstat.nWaited = htonl(rx_nWaited);
1680             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1681                         tstat.idleThreads);
1682             MUTEX_EXIT(&rx_serverPool_lock);
1683             tstat.idleThreads = htonl(tstat.idleThreads);
1684             tl = sizeof(struct rx_debugStats) - ap->length;
1685             if (tl > 0)
1686                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1687
1688             if (tl <= 0) {
1689                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1690                                (char *)&tstat);
1691                 ap->length = sizeof(struct rx_debugStats);
1692                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1693                 rx_computelen(ap, ap->length);
1694             }
1695             break;
1696         }
1697
1698     case RX_DEBUGI_GETALLCONN:
1699     case RX_DEBUGI_GETCONN:{
1700             int i, j;
1701             register struct rx_connection *tc;
1702             struct rx_call *tcall;
1703             struct rx_debugConn tconn;
1704             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1705
1706
1707             tl = sizeof(struct rx_debugConn) - ap->length;
1708             if (tl > 0)
1709                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1710             if (tl > 0)
1711                 return ap;
1712
1713             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1714             /* get N'th (maybe) "interesting" connection info */
1715             for (i = 0; i < rx_hashTableSize; i++) {
1716 #if !defined(KERNEL)
1717                 /* the time complexity of the algorithm used here
1718                  * exponentially increses with the number of connections.
1719                  */
1720 #ifdef AFS_PTHREAD_ENV
1721                 pthread_yield();
1722 #else
1723                 (void)IOMGR_Poll();
1724 #endif
1725 #endif
1726                 MUTEX_ENTER(&rx_connHashTable_lock);
1727                 /* We might be slightly out of step since we are not
1728                  * locking each call, but this is only debugging output.
1729                  */
1730                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1731                     if ((all || rxi_IsConnInteresting(tc))
1732                         && tin.index-- <= 0) {
1733                         tconn.host = tc->peer->host;
1734                         tconn.port = tc->peer->port;
1735                         tconn.cid = htonl(tc->cid);
1736                         tconn.epoch = htonl(tc->epoch);
1737                         tconn.serial = htonl(tc->serial);
1738                         for (j = 0; j < RX_MAXCALLS; j++) {
1739                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1740                             if ((tcall = tc->call[j])) {
1741                                 tconn.callState[j] = tcall->state;
1742                                 tconn.callMode[j] = tcall->mode;
1743                                 tconn.callFlags[j] = tcall->flags;
1744                                 if (queue_IsNotEmpty(&tcall->rq))
1745                                     tconn.callOther[j] |= RX_OTHER_IN;
1746                                 if (queue_IsNotEmpty(&tcall->tq))
1747                                     tconn.callOther[j] |= RX_OTHER_OUT;
1748                             } else
1749                                 tconn.callState[j] = RX_STATE_NOTINIT;
1750                         }
1751
1752                         tconn.natMTU = htonl(tc->peer->natMTU);
1753                         tconn.error = htonl(tc->error);
1754                         tconn.flags = tc->flags;
1755                         tconn.type = tc->type;
1756                         tconn.securityIndex = tc->securityIndex;
1757                         if (tc->securityObject) {
1758                             RXS_GetStats(tc->securityObject, tc,
1759                                          &tconn.secStats);
1760 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1761 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1762                             DOHTONL(flags);
1763                             DOHTONL(expires);
1764                             DOHTONL(packetsReceived);
1765                             DOHTONL(packetsSent);
1766                             DOHTONL(bytesReceived);
1767                             DOHTONL(bytesSent);
1768                             for (i = 0;
1769                                  i <
1770                                  sizeof(tconn.secStats.spares) /
1771                                  sizeof(short); i++)
1772                                 DOHTONS(spares[i]);
1773                             for (i = 0;
1774                                  i <
1775                                  sizeof(tconn.secStats.sparel) /
1776                                  sizeof(afs_int32); i++)
1777                                 DOHTONL(sparel[i]);
1778                         }
1779
1780                         MUTEX_EXIT(&rx_connHashTable_lock);
1781                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1782                                        (char *)&tconn);
1783                         tl = ap->length;
1784                         ap->length = sizeof(struct rx_debugConn);
1785                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1786                                             istack);
1787                         ap->length = tl;
1788                         return ap;
1789                     }
1790                 }
1791                 MUTEX_EXIT(&rx_connHashTable_lock);
1792             }
1793             /* if we make it here, there are no interesting packets */
1794             tconn.cid = htonl(0xffffffff);      /* means end */
1795             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1796                            (char *)&tconn);
1797             tl = ap->length;
1798             ap->length = sizeof(struct rx_debugConn);
1799             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1800             ap->length = tl;
1801             break;
1802         }
1803
1804         /*
1805          * Pass back all the peer structures we have available
1806          */
1807
1808     case RX_DEBUGI_GETPEER:{
1809             int i;
1810             register struct rx_peer *tp;
1811             struct rx_debugPeer tpeer;
1812
1813
1814             tl = sizeof(struct rx_debugPeer) - ap->length;
1815             if (tl > 0)
1816                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1817             if (tl > 0)
1818                 return ap;
1819
1820             memset((char *)&tpeer, 0, sizeof(tpeer));
1821             for (i = 0; i < rx_hashTableSize; i++) {
1822 #if !defined(KERNEL)
1823                 /* the time complexity of the algorithm used here
1824                  * exponentially increses with the number of peers.
1825                  *
1826                  * Yielding after processing each hash table entry
1827                  * and dropping rx_peerHashTable_lock.
1828                  * also increases the risk that we will miss a new
1829                  * entry - but we are willing to live with this
1830                  * limitation since this is meant for debugging only
1831                  */
1832 #ifdef AFS_PTHREAD_ENV
1833                 pthread_yield();
1834 #else
1835                 (void)IOMGR_Poll();
1836 #endif
1837 #endif
1838                 MUTEX_ENTER(&rx_peerHashTable_lock);
1839                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1840                     if (tin.index-- <= 0) {
1841                         tpeer.host = tp->host;
1842                         tpeer.port = tp->port;
1843                         tpeer.ifMTU = htons(tp->ifMTU);
1844                         tpeer.idleWhen = htonl(tp->idleWhen);
1845                         tpeer.refCount = htons(tp->refCount);
1846                         tpeer.burstSize = tp->burstSize;
1847                         tpeer.burst = tp->burst;
1848                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1849                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1850                         tpeer.rtt = htonl(tp->rtt);
1851                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1852                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1853                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1854                         tpeer.nSent = htonl(tp->nSent);
1855                         tpeer.reSends = htonl(tp->reSends);
1856                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1857                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1858                         tpeer.rateFlag = htonl(tp->rateFlag);
1859                         tpeer.natMTU = htons(tp->natMTU);
1860                         tpeer.maxMTU = htons(tp->maxMTU);
1861                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1862                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1863                         tpeer.MTU = htons(tp->MTU);
1864                         tpeer.cwind = htons(tp->cwind);
1865                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1866                         tpeer.congestSeq = htons(tp->congestSeq);
1867                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1868                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1869                         tpeer.bytesReceived.high =
1870                             htonl(tp->bytesReceived.high);
1871                         tpeer.bytesReceived.low =
1872                             htonl(tp->bytesReceived.low);
1873
1874                         MUTEX_EXIT(&rx_peerHashTable_lock);
1875                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1876                                        (char *)&tpeer);
1877                         tl = ap->length;
1878                         ap->length = sizeof(struct rx_debugPeer);
1879                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1880                                             istack);
1881                         ap->length = tl;
1882                         return ap;
1883                     }
1884                 }
1885                 MUTEX_EXIT(&rx_peerHashTable_lock);
1886             }
1887             /* if we make it here, there are no interesting packets */
1888             tpeer.host = htonl(0xffffffff);     /* means end */
1889             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1890                            (char *)&tpeer);
1891             tl = ap->length;
1892             ap->length = sizeof(struct rx_debugPeer);
1893             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1894             ap->length = tl;
1895             break;
1896         }
1897
1898     case RX_DEBUGI_RXSTATS:{
1899             int i;
1900             afs_int32 *s;
1901
1902             tl = sizeof(rx_stats) - ap->length;
1903             if (tl > 0)
1904                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1905             if (tl > 0)
1906                 return ap;
1907
1908             /* Since its all int32s convert to network order with a loop. */
1909             MUTEX_ENTER(&rx_stats_mutex);
1910             s = (afs_int32 *) & rx_stats;
1911             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1912                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1913
1914             tl = ap->length;
1915             ap->length = sizeof(rx_stats);
1916             MUTEX_EXIT(&rx_stats_mutex);
1917             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1918             ap->length = tl;
1919             break;
1920         }
1921
1922     default:
1923         /* error response packet */
1924         tin.type = htonl(RX_DEBUGI_BADTYPE);
1925         tin.index = tin.type;
1926         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1927         tl = ap->length;
1928         ap->length = sizeof(struct rx_debugIn);
1929         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1930         ap->length = tl;
1931         break;
1932     }
1933     return ap;
1934 }
1935
1936 struct rx_packet *
1937 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1938                          afs_int32 ahost, short aport, int istack)
1939 {
1940     afs_int32 tl;
1941
1942     /*
1943      * Only respond to client-initiated version requests, and
1944      * clear that flag in the response.
1945      */
1946     if (ap->header.flags & RX_CLIENT_INITIATED) {
1947         char buf[66];
1948
1949         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1950         rxi_EncodePacketHeader(ap);
1951         memset(buf, 0, sizeof(buf));
1952         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1953         rx_packetwrite(ap, 0, 65, buf);
1954         tl = ap->length;
1955         ap->length = 65;
1956         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1957         ap->length = tl;
1958     }
1959
1960     return ap;
1961 }
1962
1963
1964 /* send a debug packet back to the sender */
1965 static void
1966 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1967                     afs_int32 ahost, short aport, afs_int32 istack)
1968 {
1969     struct sockaddr_in taddr;
1970     int i;
1971     int nbytes;
1972     int saven = 0;
1973     size_t savelen = 0;
1974 #ifdef KERNEL
1975     int waslocked = ISAFS_GLOCK();
1976 #endif
1977
1978     taddr.sin_family = AF_INET;
1979     taddr.sin_port = aport;
1980     taddr.sin_addr.s_addr = ahost;
1981 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1982     taddr.sin_len = sizeof(struct sockaddr_in);
1983 #endif
1984
1985     /* We need to trim the niovecs. */
1986     nbytes = apacket->length;
1987     for (i = 1; i < apacket->niovecs; i++) {
1988         if (nbytes <= apacket->wirevec[i].iov_len) {
1989             savelen = apacket->wirevec[i].iov_len;
1990             saven = apacket->niovecs;
1991             apacket->wirevec[i].iov_len = nbytes;
1992             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
1993         } else
1994             nbytes -= apacket->wirevec[i].iov_len;
1995     }
1996 #ifdef KERNEL
1997 #ifdef RX_KERNEL_TRACE
1998     if (ICL_SETACTIVE(afs_iclSetp)) {
1999         if (!waslocked)
2000             AFS_GLOCK();
2001         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2002                    "before osi_NetSend()");
2003         AFS_GUNLOCK();
2004     } else
2005 #else
2006     if (waslocked)
2007         AFS_GUNLOCK();
2008 #endif
2009 #endif
2010     /* debug packets are not reliably delivered, hence the cast below. */
2011     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2012                       apacket->length + RX_HEADER_SIZE, istack);
2013 #ifdef KERNEL
2014 #ifdef RX_KERNEL_TRACE
2015     if (ICL_SETACTIVE(afs_iclSetp)) {
2016         AFS_GLOCK();
2017         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2018                    "after osi_NetSend()");
2019         if (!waslocked)
2020             AFS_GUNLOCK();
2021     } else
2022 #else
2023     if (waslocked)
2024         AFS_GLOCK();
2025 #endif
2026 #endif
2027     if (saven) {                /* means we truncated the packet above. */
2028         apacket->wirevec[i - 1].iov_len = savelen;
2029         apacket->niovecs = saven;
2030     }
2031
2032 }
2033
2034 /* Send the packet to appropriate destination for the specified
2035  * call.  The header is first encoded and placed in the packet.
2036  */
2037 void
2038 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2039                struct rx_packet *p, int istack)
2040 {
2041 #if defined(KERNEL)
2042     int waslocked;
2043 #endif
2044     int code;
2045     struct sockaddr_in addr;
2046     register struct rx_peer *peer = conn->peer;
2047     osi_socket socket;
2048 #ifdef RXDEBUG
2049     char deliveryType = 'S';
2050 #endif
2051     /* The address we're sending the packet to */
2052     memset(&addr, 0, sizeof(addr));
2053     addr.sin_family = AF_INET;
2054     addr.sin_port = peer->port;
2055     addr.sin_addr.s_addr = peer->host;
2056
2057     /* This stuff should be revamped, I think, so that most, if not
2058      * all, of the header stuff is always added here.  We could
2059      * probably do away with the encode/decode routines. XXXXX */
2060
2061     /* Stamp each packet with a unique serial number.  The serial
2062      * number is maintained on a connection basis because some types
2063      * of security may be based on the serial number of the packet,
2064      * and security is handled on a per authenticated-connection
2065      * basis. */
2066     /* Pre-increment, to guarantee no zero serial number; a zero
2067      * serial number means the packet was never sent. */
2068     MUTEX_ENTER(&conn->conn_data_lock);
2069     p->header.serial = ++conn->serial;
2070     MUTEX_EXIT(&conn->conn_data_lock);
2071     /* This is so we can adjust retransmit time-outs better in the face of
2072      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2073      */
2074     if (p->firstSerial == 0) {
2075         p->firstSerial = p->header.serial;
2076     }
2077 #ifdef RXDEBUG
2078     /* If an output tracer function is defined, call it with the packet and
2079      * network address.  Note this function may modify its arguments. */
2080     if (rx_almostSent) {
2081         int drop = (*rx_almostSent) (p, &addr);
2082         /* drop packet if return value is non-zero? */
2083         if (drop)
2084             deliveryType = 'D'; /* Drop the packet */
2085     }
2086 #endif
2087
2088     /* Get network byte order header */
2089     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
2090                                  * touch ALL the fields */
2091
2092     /* Send the packet out on the same socket that related packets are being
2093      * received on */
2094     socket =
2095         (conn->type ==
2096          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2097
2098 #ifdef RXDEBUG
2099     /* Possibly drop this packet,  for testing purposes */
2100     if ((deliveryType == 'D')
2101         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2102             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2103         deliveryType = 'D';     /* Drop the packet */
2104     } else {
2105         deliveryType = 'S';     /* Send the packet */
2106 #endif /* RXDEBUG */
2107
2108         /* Loop until the packet is sent.  We'd prefer just to use a
2109          * blocking socket, but unfortunately the interface doesn't
2110          * allow us to have the socket block in send mode, and not
2111          * block in receive mode */
2112 #ifdef KERNEL
2113         waslocked = ISAFS_GLOCK();
2114 #ifdef RX_KERNEL_TRACE
2115         if (ICL_SETACTIVE(afs_iclSetp)) {
2116             if (!waslocked)
2117                 AFS_GLOCK();
2118             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2119                        "before osi_NetSend()");
2120             AFS_GUNLOCK();
2121         } else
2122 #else
2123         if (waslocked)
2124             AFS_GUNLOCK();
2125 #endif
2126 #endif
2127         if ((code =
2128              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2129                          p->length + RX_HEADER_SIZE, istack)) != 0) {
2130             /* send failed, so let's hurry up the resend, eh? */
2131             MUTEX_ENTER(&rx_stats_mutex);
2132             rx_stats.netSendFailures++;
2133             MUTEX_EXIT(&rx_stats_mutex);
2134             p->retryTime = p->timeSent; /* resend it very soon */
2135             clock_Addmsec(&(p->retryTime),
2136                           10 + (((afs_uint32) p->backoff) << 8));
2137
2138 #ifdef AFS_NT40_ENV
2139             /* Windows is nice -- it can tell us right away that we cannot
2140              * reach this recipient by returning an WSAEHOSTUNREACH error
2141              * code.  So, when this happens let's "down" the host NOW so
2142              * we don't sit around waiting for this host to timeout later.
2143              */
2144                 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2145                         call->lastReceiveTime = 0;
2146 #endif
2147 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2148             /* Linux is nice -- it can tell us right away that we cannot
2149              * reach this recipient by returning an ENETUNREACH error
2150              * code.  So, when this happens let's "down" the host NOW so
2151              * we don't sit around waiting for this host to timeout later.
2152              */
2153             if (call && code == -ENETUNREACH)
2154                 call->lastReceiveTime = 0;
2155 #endif
2156         }
2157 #ifdef KERNEL
2158 #ifdef RX_KERNEL_TRACE
2159         if (ICL_SETACTIVE(afs_iclSetp)) {
2160             AFS_GLOCK();
2161             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2162                        "after osi_NetSend()");
2163             if (!waslocked)
2164                 AFS_GUNLOCK();
2165         } else
2166 #else
2167         if (waslocked)
2168             AFS_GLOCK();
2169 #endif
2170 #endif
2171 #ifdef RXDEBUG
2172     }
2173     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2174 #endif
2175     MUTEX_ENTER(&rx_stats_mutex);
2176     rx_stats.packetsSent[p->header.type - 1]++;
2177     MUTEX_EXIT(&rx_stats_mutex);
2178     MUTEX_ENTER(&peer->peer_lock);
2179     hadd32(peer->bytesSent, p->length);
2180     MUTEX_EXIT(&peer->peer_lock);
2181 }
2182
2183 /* Send a list of packets to appropriate destination for the specified
2184  * connection.  The headers are first encoded and placed in the packets.
2185  */
2186 void
2187 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2188                    struct rx_packet **list, int len, int istack)
2189 {
2190 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2191     int waslocked;
2192 #endif
2193     struct sockaddr_in addr;
2194     register struct rx_peer *peer = conn->peer;
2195     osi_socket socket;
2196     struct rx_packet *p = NULL;
2197     struct iovec wirevec[RX_MAXIOVECS];
2198     int i, length, code;
2199     afs_uint32 serial;
2200     afs_uint32 temp;
2201     struct rx_jumboHeader *jp;
2202 #ifdef RXDEBUG
2203     char deliveryType = 'S';
2204 #endif
2205     /* The address we're sending the packet to */
2206     addr.sin_family = AF_INET;
2207     addr.sin_port = peer->port;
2208     addr.sin_addr.s_addr = peer->host;
2209
2210     if (len + 1 > RX_MAXIOVECS) {
2211         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2212     }
2213
2214     /*
2215      * Stamp the packets in this jumbogram with consecutive serial numbers
2216      */
2217     MUTEX_ENTER(&conn->conn_data_lock);
2218     serial = conn->serial;
2219     conn->serial += len;
2220     MUTEX_EXIT(&conn->conn_data_lock);
2221
2222
2223     /* This stuff should be revamped, I think, so that most, if not
2224      * all, of the header stuff is always added here.  We could
2225      * probably do away with the encode/decode routines. XXXXX */
2226
2227     jp = NULL;
2228     length = RX_HEADER_SIZE;
2229     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2230     wirevec[0].iov_len = RX_HEADER_SIZE;
2231     for (i = 0; i < len; i++) {
2232         p = list[i];
2233
2234         /* The whole 3.5 jumbogram scheme relies on packets fitting
2235          * in a single packet buffer. */
2236         if (p->niovecs > 2) {
2237             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2238         }
2239
2240         /* Set the RX_JUMBO_PACKET flags in all but the last packets
2241          * in this chunk.  */
2242         if (i < len - 1) {
2243             if (p->length != RX_JUMBOBUFFERSIZE) {
2244                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2245             }
2246             p->header.flags |= RX_JUMBO_PACKET;
2247             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2248             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2249         } else {
2250             wirevec[i + 1].iov_len = p->length;
2251             length += p->length;
2252         }
2253         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2254         if (jp != NULL) {
2255             /* Convert jumbo packet header to network byte order */
2256             temp = (afs_uint32) (p->header.flags) << 24;
2257             temp |= (afs_uint32) (p->header.spare);
2258             *(afs_uint32 *) jp = htonl(temp);
2259         }
2260         jp = (struct rx_jumboHeader *)
2261             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2262
2263         /* Stamp each packet with a unique serial number.  The serial
2264          * number is maintained on a connection basis because some types
2265          * of security may be based on the serial number of the packet,
2266          * and security is handled on a per authenticated-connection
2267          * basis. */
2268         /* Pre-increment, to guarantee no zero serial number; a zero
2269          * serial number means the packet was never sent. */
2270         p->header.serial = ++serial;
2271         /* This is so we can adjust retransmit time-outs better in the face of
2272          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
2273          */
2274         if (p->firstSerial == 0) {
2275             p->firstSerial = p->header.serial;
2276         }
2277 #ifdef RXDEBUG
2278         /* If an output tracer function is defined, call it with the packet and
2279          * network address.  Note this function may modify its arguments. */
2280         if (rx_almostSent) {
2281             int drop = (*rx_almostSent) (p, &addr);
2282             /* drop packet if return value is non-zero? */
2283             if (drop)
2284                 deliveryType = 'D';     /* Drop the packet */
2285         }
2286 #endif
2287
2288         /* Get network byte order header */
2289         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
2290                                          * touch ALL the fields */
2291     }
2292
2293     /* Send the packet out on the same socket that related packets are being
2294      * received on */
2295     socket =
2296         (conn->type ==
2297          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2298
2299 #ifdef RXDEBUG
2300     /* Possibly drop this packet,  for testing purposes */
2301     if ((deliveryType == 'D')
2302         || ((rx_intentionallyDroppedPacketsPer100 > 0)
2303             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2304         deliveryType = 'D';     /* Drop the packet */
2305     } else {
2306         deliveryType = 'S';     /* Send the packet */
2307 #endif /* RXDEBUG */
2308
2309         /* Loop until the packet is sent.  We'd prefer just to use a
2310          * blocking socket, but unfortunately the interface doesn't
2311          * allow us to have the socket block in send mode, and not
2312          * block in receive mode */
2313 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2314         waslocked = ISAFS_GLOCK();
2315         if (!istack && waslocked)
2316             AFS_GUNLOCK();
2317 #endif
2318         if ((code =
2319              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2320                          istack)) != 0) {
2321             /* send failed, so let's hurry up the resend, eh? */
2322             MUTEX_ENTER(&rx_stats_mutex);
2323             rx_stats.netSendFailures++;
2324             MUTEX_EXIT(&rx_stats_mutex);
2325             for (i = 0; i < len; i++) {
2326                 p = list[i];
2327                 p->retryTime = p->timeSent;     /* resend it very soon */
2328                 clock_Addmsec(&(p->retryTime),
2329                               10 + (((afs_uint32) p->backoff) << 8));
2330             }
2331 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2332             /* Linux is nice -- it can tell us right away that we cannot
2333              * reach this recipient by returning an ENETUNREACH error
2334              * code.  So, when this happens let's "down" the host NOW so
2335              * we don't sit around waiting for this host to timeout later.
2336              */
2337             if (call && code == -ENETUNREACH)
2338                 call->lastReceiveTime = 0;
2339 #endif
2340         }
2341 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
2342         if (!istack && waslocked)
2343             AFS_GLOCK();
2344 #endif
2345 #ifdef RXDEBUG
2346     }
2347
2348     assert(p != NULL);
2349
2350     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2351
2352 #endif
2353     MUTEX_ENTER(&rx_stats_mutex);
2354     rx_stats.packetsSent[p->header.type - 1]++;
2355     MUTEX_EXIT(&rx_stats_mutex);
2356     MUTEX_ENTER(&peer->peer_lock);
2357
2358     hadd32(peer->bytesSent, p->length);
2359     MUTEX_EXIT(&peer->peer_lock);
2360 }
2361
2362
2363 /* Send a "special" packet to the peer connection.  If call is
2364  * specified, then the packet is directed to a specific call channel
2365  * associated with the connection, otherwise it is directed to the
2366  * connection only. Uses optionalPacket if it is supplied, rather than
2367  * allocating a new packet buffer.  Nbytes is the length of the data
2368  * portion of the packet.  If data is non-null, nbytes of data are
2369  * copied into the packet.  Type is the type of the packet, as defined
2370  * in rx.h.  Bug: there's a lot of duplication between this and other
2371  * routines.  This needs to be cleaned up. */
2372 struct rx_packet *
2373 rxi_SendSpecial(register struct rx_call *call,
2374                 register struct rx_connection *conn,
2375                 struct rx_packet *optionalPacket, int type, char *data,
2376                 int nbytes, int istack)
2377 {
2378     /* Some of the following stuff should be common code for all
2379      * packet sends (it's repeated elsewhere) */
2380     register struct rx_packet *p;
2381     unsigned int i = 0;
2382     int savelen = 0, saven = 0;
2383     int channel, callNumber;
2384     if (call) {
2385         channel = call->channel;
2386         callNumber = *call->callNumber;
2387         /* BUSY packets refer to the next call on this connection */
2388         if (type == RX_PACKET_TYPE_BUSY) {
2389             callNumber++;
2390         }
2391     } else {
2392         channel = 0;
2393         callNumber = 0;
2394     }
2395     p = optionalPacket;
2396     if (!p) {
2397         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2398         if (!p)
2399             osi_Panic("rxi_SendSpecial failure");
2400     }
2401
2402     if (nbytes != -1)
2403         p->length = nbytes;
2404     else
2405         nbytes = p->length;
2406     p->header.serviceId = conn->serviceId;
2407     p->header.securityIndex = conn->securityIndex;
2408     p->header.cid = (conn->cid | channel);
2409     p->header.callNumber = callNumber;
2410     p->header.seq = 0;
2411     p->header.epoch = conn->epoch;
2412     p->header.type = type;
2413     p->header.flags = 0;
2414     if (conn->type == RX_CLIENT_CONNECTION)
2415         p->header.flags |= RX_CLIENT_INITIATED;
2416     if (data)
2417         rx_packetwrite(p, 0, nbytes, data);
2418
2419     for (i = 1; i < p->niovecs; i++) {
2420         if (nbytes <= p->wirevec[i].iov_len) {
2421             savelen = p->wirevec[i].iov_len;
2422             saven = p->niovecs;
2423             p->wirevec[i].iov_len = nbytes;
2424             p->niovecs = i + 1; /* so condition fails because i == niovecs */
2425         } else
2426             nbytes -= p->wirevec[i].iov_len;
2427     }
2428
2429     if (call)
2430         rxi_Send(call, p, istack);
2431     else
2432         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2433     if (saven) {                /* means we truncated the packet above.  We probably don't  */
2434         /* really need to do this, but it seems safer this way, given that  */
2435         /* sneaky optionalPacket... */
2436         p->wirevec[i - 1].iov_len = savelen;
2437         p->niovecs = saven;
2438     }
2439     if (!optionalPacket)
2440         rxi_FreePacket(p);
2441     return optionalPacket;
2442 }
2443
2444
2445 /* Encode the packet's header (from the struct header in the packet to
2446  * the net byte order representation in the wire representation of the
2447  * packet, which is what is actually sent out on the wire) */
2448 void
2449 rxi_EncodePacketHeader(register struct rx_packet *p)
2450 {
2451     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2452
2453     memset((char *)buf, 0, RX_HEADER_SIZE);
2454     *buf++ = htonl(p->header.epoch);
2455     *buf++ = htonl(p->header.cid);
2456     *buf++ = htonl(p->header.callNumber);
2457     *buf++ = htonl(p->header.seq);
2458     *buf++ = htonl(p->header.serial);
2459     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2460                    | (((afs_uint32) p->header.flags) << 16)
2461                    | (p->header.userStatus << 8) | p->header.securityIndex);
2462     /* Note: top 16 bits of this next word were reserved */
2463     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2464 }
2465
2466 /* Decode the packet's header (from net byte order to a struct header) */
2467 void
2468 rxi_DecodePacketHeader(register struct rx_packet *p)
2469 {
2470     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2471     afs_uint32 temp;
2472
2473     p->header.epoch = ntohl(*buf);
2474     buf++;
2475     p->header.cid = ntohl(*buf);
2476     buf++;
2477     p->header.callNumber = ntohl(*buf);
2478     buf++;
2479     p->header.seq = ntohl(*buf);
2480     buf++;
2481     p->header.serial = ntohl(*buf);
2482     buf++;
2483
2484     temp = ntohl(*buf);
2485     buf++;
2486
2487     /* C will truncate byte fields to bytes for me */
2488     p->header.type = temp >> 24;
2489     p->header.flags = temp >> 16;
2490     p->header.userStatus = temp >> 8;
2491     p->header.securityIndex = temp >> 0;
2492
2493     temp = ntohl(*buf);
2494     buf++;
2495
2496     p->header.serviceId = (temp & 0xffff);
2497     p->header.spare = temp >> 16;
2498     /* Note: top 16 bits of this last word are the security checksum */
2499 }
2500
2501 void
2502 rxi_PrepareSendPacket(register struct rx_call *call,
2503                       register struct rx_packet *p, register int last)
2504 {
2505     register struct rx_connection *conn = call->conn;
2506     int i, j;
2507     ssize_t len;                /* len must be a signed type; it can go negative */
2508
2509     p->flags &= ~RX_PKTFLAG_ACKED;
2510     p->header.cid = (conn->cid | call->channel);
2511     p->header.serviceId = conn->serviceId;
2512     p->header.securityIndex = conn->securityIndex;
2513
2514     /* No data packets on call 0. Where do these come from? */
2515     if (*call->callNumber == 0)
2516         *call->callNumber = 1;
2517
2518     p->header.callNumber = *call->callNumber;
2519     p->header.seq = call->tnext++;
2520     p->header.epoch = conn->epoch;
2521     p->header.type = RX_PACKET_TYPE_DATA;
2522     p->header.flags = 0;
2523     p->header.spare = 0;
2524     if (conn->type == RX_CLIENT_CONNECTION)
2525         p->header.flags |= RX_CLIENT_INITIATED;
2526
2527     if (last)
2528         p->header.flags |= RX_LAST_PACKET;
2529
2530     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2531     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2532     p->header.serial = 0;       /* Another way of saying never transmitted... */
2533     p->backoff = 0;
2534
2535     /* Now that we're sure this is the last data on the call, make sure
2536      * that the "length" and the sum of the iov_lens matches. */
2537     len = p->length + call->conn->securityHeaderSize;
2538
2539     for (i = 1; i < p->niovecs && len > 0; i++) {
2540         len -= p->wirevec[i].iov_len;
2541     }
2542     if (len > 0) {
2543         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2544     } else {
2545         struct rx_queue q;
2546        int nb;
2547
2548         queue_Init(&q);
2549
2550         /* Free any extra elements in the wirevec */
2551         for (j = MAX(2, i), nb = p->niovecs - j; j < p->niovecs; j++) {
2552             queue_Append(&q,RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2553         }
2554         if (nb)
2555             rxi_FreePackets(nb, &q);
2556
2557         p->niovecs = i;
2558         p->wirevec[i - 1].iov_len += len;
2559     }
2560     RXS_PreparePacket(conn->securityObject, call, p);
2561 }
2562
2563 /* Given an interface MTU size, calculate an adjusted MTU size that
2564  * will make efficient use of the RX buffers when the peer is sending
2565  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2566 int
2567 rxi_AdjustIfMTU(int mtu)
2568 {
2569     int adjMTU;
2570     int frags;
2571
2572     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2573     if (mtu <= adjMTU) {
2574         return mtu;
2575     }
2576     mtu -= adjMTU;
2577     if (mtu <= 0) {
2578         return adjMTU;
2579     }
2580     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2581     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2582 }
2583
2584 /* Given an interface MTU size, and the peer's advertised max receive
2585  * size, calculate an adjisted maxMTU size that makes efficient use
2586  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2587 int
2588 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2589 {
2590     int maxMTU = mtu * rxi_nSendFrags;
2591     maxMTU = MIN(maxMTU, peerMaxMTU);
2592     return rxi_AdjustIfMTU(maxMTU);
2593 }
2594
2595 /* Given a packet size, figure out how many datagram packet will fit.
2596  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2597  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2598  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2599 int
2600 rxi_AdjustDgramPackets(int frags, int mtu)
2601 {
2602     int maxMTU;
2603     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2604         return 1;
2605     }
2606     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2607     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2608     /* subtract the size of the first and last packets */
2609     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2610     if (maxMTU < 0) {
2611         return 1;
2612     }
2613     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2614 }