src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "../afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID("$Header$");
  18
  19 #ifdef KERNEL
  20 #if defined(UKERNEL)
  21 #include "../afs/sysincludes.h"
  22 #include "../afs/afsincludes.h"
  23 #include "../rx/rx_kcommon.h"
  24 #include "../rx/rx_clock.h"
  25 #include "../rx/rx_queue.h"
  26 #include "../rx/rx_packet.h"
  27 #else /* defined(UKERNEL) */
  28 #include "../h/types.h"
  29 #ifndef AFS_LINUX20_ENV
  30 #include "../h/systm.h"
  31 #endif
  32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  33 #include "../afs/sysincludes.h"
  34 #endif
  35 #include "../h/socket.h"
  36 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  37 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  38 #include "../sys/mount.h"   /* it gets pulled in by something later anyway */
  39 #endif
  40 #include "../h/mbuf.h"
  41 #endif
  42 #include "../netinet/in.h"
  43 #include "../afs/afs_osi.h"
  44 #include "../rx/rx_kmutex.h"
  45 #include "../rx/rx_clock.h"
  46 #include "../rx/rx_queue.h"
  47 #ifdef  AFS_SUN5_ENV
  48 #include <sys/sysmacros.h>
  49 #endif
  50 #include "../rx/rx_packet.h"
  51 #endif /* defined(UKERNEL) */
  52 #include "../rx/rx_globals.h"
  53 #else /* KERNEL */
  54 #include "sys/types.h"
  55 #include <sys/stat.h>
  56 #include <errno.h>
  57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  58 #ifdef AFS_NT40_ENV
  59 #include <winsock2.h>
  60 #else
  61 #include <sys/socket.h>
  62 #include <netinet/in.h>
  63 #endif /* AFS_NT40_ENV */
  64 #include "rx_xmit_nt.h"
  65 #include <stdlib.h>
  66 #else
  67 #include <sys/socket.h>
  68 #include <netinet/in.h>
  69 #endif
  70 #include "rx_clock.h"
  71 #include "rx.h"
  72 #include "rx_queue.h"
  73 #ifdef  AFS_SUN5_ENV
  74 #include <sys/sysmacros.h>
  75 #endif
  76 #include "rx_packet.h"
  77 #include "rx_globals.h"
  78 #include <lwp.h>
  79 #include "rx_internal.h"
  80 #ifdef HAVE_STRING_H
  81 #include <string.h>
  82 #else
  83 #ifdef HAVE_STRINGS_H
  84 #include <strings.h>
  85 #endif
  86 #endif
  87 #ifdef HAVE_UNISTD_H
  88 #include <unistd.h>
  89 #endif
  90 #endif /* KERNEL */
  91
  92 #ifdef RX_LOCKS_DB
  93 /* rxdb_fileID is used to identify the lock location, along with line#. */
  94 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  95 #endif /* RX_LOCKS_DB */
  96 struct rx_packet *rx_mallocedP = 0;
  97
  98 extern char cml_version_number[];
  99 extern int (*rx_almostSent)();
 100
 101 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 102                                afs_int32 ahost, short aport, afs_int32 istack);
 103
 104 /* some rules about packets:
 105  * 1.  When a packet is allocated, the final iov_buf contains room for
 106  * a security trailer, but iov_len masks that fact.  If the security
 107  * package wants to add the trailer, it may do so, and then extend
 108  * iov_len appropriately.  For this reason, packet's niovecs and
 109  * iov_len fields should be accurate before calling PreparePacket.
 110 */
 111
 112 /* Preconditions:
 113  *        all packet buffers (iov_base) are integral multiples of
 114  *        the word size.
 115  *        offset is an integral multiple of the word size.
 116  */
 117 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 118 {
 119   unsigned int i;
 120   size_t l;
 121   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 122     if (l + packet->wirevec[i].iov_len > offset) {
 123       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 124     }
 125     l += packet->wirevec[i].iov_len;
 126   }
 127
 128   return 0;
 129 }
 130
 131 /* Preconditions:
 132  *        all packet buffers (iov_base) are integral multiples of the word size.
 133  *        offset is an integral multiple of the word size.
 134  */
 135 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 136 {
 137   unsigned int i;
 138   size_t l;
 139   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 140     if (l + packet->wirevec[i].iov_len > offset) {
 141       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 142           data;
 143       return 0;
 144     }
 145     l += packet->wirevec[i].iov_len;
 146   }
 147
 148   return 0;
 149 }
 150
 151 /* Preconditions:
 152  *        all packet buffers (iov_base) are integral multiples of the
 153  *        word size.
 154  *        offset is an integral multiple of the word size.
 155  * Packet Invariants:
 156  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 157  */
 158 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 159                         int resid, char *out)
 160 {
 161   unsigned int i, j, l, r;
 162   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 163     if (l + packet->wirevec[i].iov_len > offset) {
 164       break;
 165     }
 166     l += packet->wirevec[i].iov_len;
 167   }
 168
 169   /* i is the iovec which contains the first little bit of data in which we
 170    * are interested.  l is the total length of everything prior to this iovec.
 171    * j is the number of bytes we can safely copy out of this iovec.
 172    */
 173   r = resid;
 174   while ((resid > 0) && (i < packet->niovecs)) {
 175     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 176     memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 177     resid -= j;
 178     l += packet->wirevec[i].iov_len;
 179     i++;
 180   }
 181
 182   return (resid ? (r - resid) : r);
 183 }
 184
 185
 186 /* Preconditions:
 187  *        all packet buffers (iov_base) are integral multiples of the
 188  *        word size.
 189  *        offset is an integral multiple of the word size.
 190  */
 191 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 192                          char *in)
 193 {
 194   int i, j, l, r;
 195   char * b;
 196
 197   for (l=0, i=1; i < packet->niovecs; i++ ) {
 198     if (l + packet->wirevec[i].iov_len > offset) {
 199       break;
 200     }
 201     l += packet->wirevec[i].iov_len;
 202   }
 203
 204   /* i is the iovec which contains the first little bit of data in which we
 205    * are interested.  l is the total length of everything prior to this iovec.
 206    * j is the number of bytes we can safely copy out of this iovec.
 207    */
 208   r = resid;
 209   while ((resid > 0) && (i < RX_MAXWVECS)) {
 210     if (i >= packet->niovecs)
 211       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 212         break;
 213
 214     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 215     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 216     memcpy(b, in, j);
 217     resid -= j;
 218     l += packet->wirevec[i].iov_len;
 219     i++;
 220   }
 221
 222   return (resid ? (r - resid) : r);
 223 }
 224
 225 static struct rx_packet * allocCBuf(int class)
 226 {
 227   struct rx_packet *c;
 228   SPLVAR;
 229
 230   NETPRI;
 231   MUTEX_ENTER(&rx_freePktQ_lock);
 232
 233 #ifdef KERNEL
 234   if (rxi_OverQuota(class)) {
 235     c = NULL;
 236     rxi_NeedMorePackets = TRUE;
 237     MUTEX_ENTER(&rx_stats_mutex);
 238     switch(class) {
 239         case RX_PACKET_CLASS_RECEIVE:
 240             rx_stats.receivePktAllocFailures++;
 241             break;
 242         case RX_PACKET_CLASS_SEND:
 243             rx_stats.sendPktAllocFailures++;
 244             break;
 245         case RX_PACKET_CLASS_SPECIAL:
 246             rx_stats.specialPktAllocFailures++;
 247             break;
 248         case RX_PACKET_CLASS_RECV_CBUF:
 249             rx_stats.receiveCbufPktAllocFailures++;
 250             break;
 251         case RX_PACKET_CLASS_SEND_CBUF:
 252             rx_stats.sendCbufPktAllocFailures++;
 253             break;
 254     }
 255     MUTEX_EXIT(&rx_stats_mutex);
 256     goto done;
 257   }
 258
 259   if (queue_IsEmpty(&rx_freePacketQueue)) {
 260     c = NULL;
 261     rxi_NeedMorePackets = TRUE;
 262     goto done;
 263   }
 264 #else /* KERNEL */
 265   if (queue_IsEmpty(&rx_freePacketQueue)) {
 266     rxi_MorePacketsNoLock(rx_initSendWindow);
 267   }
 268 #endif /* KERNEL */
 269
 270   rx_nFreePackets--;
 271   c = queue_First(&rx_freePacketQueue, rx_packet);
 272   queue_Remove(c);
 273   if (!(c->flags & RX_PKTFLAG_FREE))
 274     osi_Panic("rxi_AllocPacket: packet not free\n");
 275   c->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 276   c->header.flags = 0;
 277
 278 #ifdef KERNEL
 279  done:
 280 #endif
 281   MUTEX_EXIT(&rx_freePktQ_lock);
 282
 283   USERPRI;
 284   return c;
 285 }
 286
 287 /*
 288  * Free a packet currently used as a continuation buffer
 289  */
 290 void rxi_freeCBuf(struct rx_packet *c)
 291 {
 292   SPLVAR;
 293
 294   NETPRI;
 295   MUTEX_ENTER(&rx_freePktQ_lock);
 296
 297   rxi_FreePacketNoLock(c);
 298   /* Wakeup anyone waiting for packets */
 299   rxi_PacketsUnWait();
 300
 301   MUTEX_EXIT(&rx_freePktQ_lock);
 302   USERPRI;
 303 }
 304
 305 /* this one is kind of awful.
 306  * In rxkad, the packet has been all shortened, and everything, ready for
 307  * sending.  All of a sudden, we discover we need some of that space back.
 308  * This isn't terribly general, because it knows that the packets are only
 309  * rounded up to the EBS (userdata + security header).
 310  */
 311 int rxi_RoundUpPacket(p, nb)
 312      struct rx_packet * p;
 313      unsigned int nb;
 314 {
 315   int i;
 316   i = p->niovecs - 1;
 317   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 318     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 319       p->wirevec[i].iov_len += nb;
 320       return 0;
 321     }
 322   }
 323   else {
 324     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 325       p->wirevec[i].iov_len += nb;
 326       return 0;
 327     }
 328   }
 329
 330 return 0;
 331 }
 332 /* get sufficient space to store nb bytes of data (or more), and hook
 333  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 334  * returns the number of bytes >0 which it failed to come up with.
 335  * Don't need to worry about locking on packet, since only
 336  * one thread can manipulate one at a time. Locking on continution
 337  * packets is handled by allocCBuf */
 338 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 339 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 340 {
 341   int i;
 342
 343   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 344       register struct rx_packet *cb;
 345       if ((cb = allocCBuf(class))) {
 346           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 347           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 348           nb -= RX_CBUFFERSIZE;
 349           p->length += RX_CBUFFERSIZE;
 350           p->niovecs++;
 351       }
 352       else break;
 353   }
 354
 355   return nb;
 356 }
 357
 358 /* Add more packet buffers */
 359 void rxi_MorePackets(int apackets)
 360 {
 361   struct rx_packet *p, *e;
 362   int getme;
 363   SPLVAR;
 364
 365   getme = apackets * sizeof(struct rx_packet);
 366   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 367
 368   PIN(p, getme);        /* XXXXX */
 369   memset((char *)p, 0, getme);
 370   NETPRI;
 371   AFS_RXGLOCK();
 372   MUTEX_ENTER(&rx_freePktQ_lock);
 373
 374   for (e = p + apackets; p<e; p++) {
 375     p->wirevec[0].iov_base = (char *) (p->wirehead);
 376     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 377     p->wirevec[1].iov_base = (char *) (p->localdata);
 378     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 379     p->flags |= RX_PKTFLAG_FREE;
 380     p->niovecs = 2;
 381
 382     queue_Append(&rx_freePacketQueue, p);
 383   }
 384   rx_nFreePackets += apackets;
 385   rxi_NeedMorePackets = FALSE;
 386   rxi_PacketsUnWait();
 387
 388   AFS_RXGUNLOCK();
 389   MUTEX_EXIT(&rx_freePktQ_lock);
 390   USERPRI;
 391 }
 392
 393 #ifndef KERNEL
 394 /* Add more packet buffers */
 395 void rxi_MorePacketsNoLock(int apackets)
 396 {
 397   struct rx_packet *p, *e;
 398   int getme;
 399
 400   /* allocate enough packets that 1/4 of the packets will be able
 401    * to hold maximal amounts of data */
 402   apackets += (apackets/4)
 403               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 404   getme = apackets * sizeof(struct rx_packet);
 405   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 406
 407   memset((char *)p, 0, getme);
 408
 409   for (e = p + apackets; p<e; p++) {
 410     p->wirevec[0].iov_base = (char *) (p->wirehead);
 411     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 412     p->wirevec[1].iov_base = (char *) (p->localdata);
 413     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 414     p->flags |= RX_PKTFLAG_FREE;
 415     p->niovecs = 2;
 416
 417     queue_Append(&rx_freePacketQueue, p);
 418   }
 419   rx_nFreePackets += apackets;
 420   rxi_NeedMorePackets = FALSE;
 421   rxi_PacketsUnWait();
 422 }
 423 #endif /* !KERNEL */
 424
 425 void rxi_FreeAllPackets(void)
 426 {
 427   /* must be called at proper interrupt level, etcetera */
 428   /* MTUXXX need to free all Packets */
 429   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 430   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 431 }
 432
 433 /* Allocate more packets iff we need more continuation buffers */
 434 /* In kernel, can't page in memory with interrupts disabled, so we
 435  * don't use the event mechanism. */
 436 void rx_CheckPackets()
 437 {
 438   if (rxi_NeedMorePackets) {
 439     rxi_MorePackets(rx_initSendWindow);
 440   }
 441 }
 442
 443 /* In the packet freeing routine below, the assumption is that
 444    we want all of the packets to be used equally frequently, so that we
 445    don't get packet buffers paging out.  It would be just as valid to
 446    assume that we DO want them to page out if not many are being used.
 447    In any event, we assume the former, and append the packets to the end
 448    of the free list.  */
 449 /* This explanation is bogus.  The free list doesn't remain in any kind of
 450    useful order for afs_int32: the packets in use get pretty much randomly scattered
 451    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 452    must be stored so that packets which are adjacent in memory are adjacent in the
 453    free list.  An array springs rapidly to mind.
 454    */
 455
 456 /* Actually free the packet p. */
 457 void rxi_FreePacketNoLock(struct rx_packet *p)
 458 {
 459   dpf(("Free %x\n", p));
 460
 461   if (p->flags & RX_PKTFLAG_FREE)
 462     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 463   rx_nFreePackets++;
 464   p->flags |= RX_PKTFLAG_FREE;
 465   queue_Append(&rx_freePacketQueue, p);
 466 }
 467
 468 int rxi_FreeDataBufsNoLock(p, first)
 469      struct rx_packet * p;
 470      int first;
 471 {
 472   struct iovec *iov, *end;
 473
 474   if (first != 1)          /* MTUXXX */
 475       osi_Panic("FreeDataBufs 1: first must be 1");
 476   iov = &p->wirevec[1];
 477   end = iov + (p->niovecs-1);
 478   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 479         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 480   for (iov++ ; iov < end ; iov++) {
 481     if (!iov->iov_base)
 482         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 483     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 484   }
 485   p->length = 0;
 486   p->niovecs = 0;
 487
 488   return 0;
 489 }
 490
 491 int rxi_nBadIovecs = 0;
 492
 493 /* rxi_RestoreDataBufs
 494  *
 495  * Restore the correct sizes to the iovecs. Called when reusing a packet
 496  * for reading off the wire.
 497  */
 498 void rxi_RestoreDataBufs(struct rx_packet *p)
 499 {
 500     int i;
 501     struct iovec *iov = &p->wirevec[2];
 502
 503     p->wirevec[0].iov_base = (char *) (p->wirehead);
 504     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 505     p->wirevec[1].iov_base = (char *) (p->localdata);
 506     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 507
 508     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 509         if (!iov->iov_base) {
 510             rxi_nBadIovecs ++;
 511             p->niovecs = i;
 512             break;
 513         }
 514         iov->iov_len = RX_CBUFFERSIZE;
 515     }
 516 }
 517
 518 int rxi_TrimDataBufs(p, first)
 519      struct rx_packet * p;
 520      int first;
 521 {
 522   int length;
 523   struct iovec *iov, *end;
 524   SPLVAR;
 525
 526   if (first != 1)
 527       osi_Panic("TrimDataBufs 1: first must be 1");
 528
 529   /* Skip over continuation buffers containing message data */
 530   iov = &p->wirevec[2];
 531   end = iov + (p->niovecs-2);
 532   length = p->length - p->wirevec[1].iov_len;
 533   for (; iov < end && length > 0 ; iov++) {
 534     if (!iov->iov_base)
 535         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 536     length -= iov->iov_len;
 537   }
 538
 539   /* iov now points to the first empty data buffer. */
 540   if (iov >= end)
 541     return 0;
 542
 543   NETPRI;
 544   MUTEX_ENTER(&rx_freePktQ_lock);
 545
 546   for (; iov < end ; iov++) {
 547     if (!iov->iov_base)
 548         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 549     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 550     p->niovecs--;
 551   }
 552   rxi_PacketsUnWait();
 553
 554   MUTEX_EXIT(&rx_freePktQ_lock);
 555   USERPRI;
 556
 557   return 0;
 558 }
 559
 560 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 561  * remove it yourself first if you call this routine. */
 562 void rxi_FreePacket(struct rx_packet *p)
 563 {
 564   SPLVAR;
 565
 566   NETPRI;
 567   MUTEX_ENTER(&rx_freePktQ_lock);
 568
 569   rxi_FreeDataBufsNoLock(p,1);
 570   rxi_FreePacketNoLock(p);
 571   /* Wakeup anyone waiting for packets */
 572   rxi_PacketsUnWait();
 573
 574   MUTEX_EXIT(&rx_freePktQ_lock);
 575   USERPRI;
 576 }
 577
 578
 579 /* rxi_AllocPacket sets up p->length so it reflects the number of
 580  * bytes in the packet at this point, **not including** the header.
 581  * The header is absolutely necessary, besides, this is the way the
 582  * length field is usually used */
 583 struct rx_packet *rxi_AllocPacketNoLock(class)
 584      int class;
 585 {
 586   register struct rx_packet *p;
 587
 588 #ifdef KERNEL
 589   if (rxi_OverQuota(class)) {
 590     rxi_NeedMorePackets = TRUE;
 591     MUTEX_ENTER(&rx_stats_mutex);
 592     switch(class) {
 593         case RX_PACKET_CLASS_RECEIVE:
 594             rx_stats.receivePktAllocFailures++;
 595             break;
 596         case RX_PACKET_CLASS_SEND:
 597             rx_stats.sendPktAllocFailures++;
 598             break;
 599         case RX_PACKET_CLASS_SPECIAL:
 600             rx_stats.specialPktAllocFailures++;
 601             break;
 602         case RX_PACKET_CLASS_RECV_CBUF:
 603             rx_stats.receiveCbufPktAllocFailures++;
 604             break;
 605         case RX_PACKET_CLASS_SEND_CBUF:
 606             rx_stats.sendCbufPktAllocFailures++;
 607             break;
 608     }
 609     MUTEX_EXIT(&rx_stats_mutex);
 610     return (struct rx_packet *) 0;
 611   }
 612 #endif /* KERNEL */
 613
 614   MUTEX_ENTER(&rx_stats_mutex);
 615   rx_stats.packetRequests++;
 616   MUTEX_EXIT(&rx_stats_mutex);
 617
 618 #ifdef KERNEL
 619   if (queue_IsEmpty(&rx_freePacketQueue))
 620     osi_Panic("rxi_AllocPacket error");
 621 #else /* KERNEL */
 622   if (queue_IsEmpty(&rx_freePacketQueue))
 623     rxi_MorePacketsNoLock(rx_initSendWindow);
 624 #endif /* KERNEL */
 625
 626   rx_nFreePackets--;
 627   p = queue_First(&rx_freePacketQueue, rx_packet);
 628   if (!(p->flags & RX_PKTFLAG_FREE))
 629     osi_Panic("rxi_AllocPacket: packet not free\n");
 630
 631   dpf(("Alloc %x, class %d\n", p, class));
 632
 633   queue_Remove(p);
 634   p->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 635   p->header.flags = 0;
 636
 637   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 638    * order to truncate outbound packets.  In the near future, may need
 639    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 640    */
 641   p->wirevec[0].iov_base = (char *) (p->wirehead);
 642   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 643   p->wirevec[1].iov_base = (char *) (p->localdata);
 644   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 645   p->niovecs = 2;
 646   p->length = RX_FIRSTBUFFERSIZE;
 647   return p;
 648 }
 649
 650 struct rx_packet *rxi_AllocPacket(class)
 651      int class;
 652 {
 653     register struct rx_packet *p;
 654
 655     MUTEX_ENTER(&rx_freePktQ_lock);
 656     p = rxi_AllocPacketNoLock(class);
 657     MUTEX_EXIT(&rx_freePktQ_lock);
 658     return p;
 659 }
 660
 661 /* This guy comes up with as many buffers as it {takes,can get} given
 662  * the MTU for this call. It also sets the packet length before
 663  * returning.  caution: this is often called at NETPRI
 664  * Called with call locked.
 665  */
 666 struct rx_packet *rxi_AllocSendPacket(call, want)
 667 register struct rx_call *call;
 668 int want;
 669 {
 670     register struct rx_packet *p = (struct rx_packet *) 0;
 671     register int mud;
 672     register unsigned delta;
 673
 674     SPLVAR;
 675     mud = call->MTU - RX_HEADER_SIZE;
 676     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 677         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 678
 679     while (!(call->error)) {
 680       MUTEX_ENTER(&rx_freePktQ_lock);
 681       /* if an error occurred, or we get the packet we want, we're done */
 682       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 683         MUTEX_EXIT(&rx_freePktQ_lock);
 684
 685         want += delta;
 686         want = MIN(want, mud);
 687
 688         if ((unsigned) want > p->length)
 689           (void) rxi_AllocDataBuf(p, (want - p->length),
 690                                   RX_PACKET_CLASS_SEND_CBUF);
 691
 692         if ((unsigned) p->length > mud)
 693             p->length = mud;
 694
 695         if (delta >= p->length) {
 696           rxi_FreePacket(p);
 697           p = NULL;
 698         } else {
 699             p->length -= delta;
 700         }
 701         break;
 702       }
 703
 704       /* no error occurred, and we didn't get a packet, so we sleep.
 705        * At this point, we assume that packets will be returned
 706        * sooner or later, as packets are acknowledged, and so we
 707        * just wait.  */
 708       NETPRI;
 709       call->flags |= RX_CALL_WAIT_PACKETS;
 710       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 711       MUTEX_EXIT(&call->lock);
 712       rx_waitingForPackets = 1;
 713
 714 #ifdef  RX_ENABLE_LOCKS
 715       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 716 #else
 717       osi_rxSleep(&rx_waitingForPackets);
 718 #endif
 719       MUTEX_EXIT(&rx_freePktQ_lock);
 720       MUTEX_ENTER(&call->lock);
 721       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 722       call->flags &= ~RX_CALL_WAIT_PACKETS;
 723       USERPRI;
 724     }
 725
 726     return p;
 727 }
 728
 729 #ifndef KERNEL
 730
 731 /* count the number of used FDs */
 732 static int CountFDs(amax)
 733 register int amax; {
 734     struct stat tstat;
 735     register int i, code;
 736     register int count;
 737
 738     count = 0;
 739     for(i=0;i<amax;i++) {
 740         code = fstat(i, &tstat);
 741         if (code == 0) count++;
 742     }
 743     return count;
 744 }
 745
 746 #else /* KERNEL */
 747
 748 #define CountFDs(amax) amax
 749
 750 #endif /* KERNEL */
 751
 752 #if !defined(KERNEL) || defined(UKERNEL)
 753
 754 /* This function reads a single packet from the interface into the
 755  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 756  * (host,port) of the sender are stored in the supplied variables, and
 757  * the data length of the packet is stored in the packet structure.
 758  * The header is decoded. */
 759 int rxi_ReadPacket(socket, p, host, port)
 760      int socket;
 761      register struct rx_packet *p;
 762      afs_uint32 *host;
 763      u_short *port;
 764 {
 765     struct sockaddr_in from;
 766     int nbytes;
 767     afs_int32 rlen;
 768     register afs_int32 tlen, savelen;
 769     struct msghdr msg;
 770     rx_computelen(p, tlen);
 771     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 772
 773     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 774     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 775                                  * it once in order to avoid races.  */
 776     tlen = rlen - tlen;
 777     if (tlen > 0) {
 778       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 779       if (tlen >0) {
 780         tlen = rlen - tlen;
 781       }
 782       else tlen = rlen;
 783     }
 784     else tlen = rlen;
 785
 786    /* Extend the last iovec for padding, it's just to make sure that the
 787     * read doesn't return more data than we expect, and is done to get around
 788     * our problems caused by the lack of a length field in the rx header.
 789     * Use the extra buffer that follows the localdata in each packet
 790     * structure. */
 791     savelen = p->wirevec[p->niovecs-1].iov_len;
 792     p->wirevec[p->niovecs-1].iov_len += RX_EXTRABUFFERSIZE;
 793
 794     memset((char *)&msg, 0, sizeof(msg));
 795     msg.msg_name = (char *) &from;
 796     msg.msg_namelen = sizeof(struct sockaddr_in);
 797     msg.msg_iov = p->wirevec;
 798     msg.msg_iovlen = p->niovecs;
 799     nbytes = rxi_Recvmsg(socket, &msg, 0);
 800
 801    /* restore the vec to its correct state */
 802     p->wirevec[p->niovecs-1].iov_len = savelen;
 803
 804     p->length = (nbytes - RX_HEADER_SIZE);
 805     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 806       if (nbytes > 0)
 807         rxi_MorePackets(rx_initSendWindow);
 808 #ifndef AFS_NT40_ENV
 809       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 810         MUTEX_ENTER(&rx_stats_mutex);
 811         rx_stats.noPacketOnRead++;
 812         MUTEX_EXIT(&rx_stats_mutex);
 813       }
 814 #endif
 815       else {
 816         MUTEX_ENTER(&rx_stats_mutex);
 817         rx_stats.bogusPacketOnRead++;
 818         rx_stats.bogusHost = from.sin_addr.s_addr;
 819         MUTEX_EXIT(&rx_stats_mutex);
 820         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 821              from.sin_port,nbytes));
 822       }
 823       return  0;
 824     }
 825     else {
 826       /* Extract packet header. */
 827       rxi_DecodePacketHeader(p);
 828
 829       *host = from.sin_addr.s_addr;
 830       *port = from.sin_port;
 831       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 832         struct rx_peer *peer;
 833         MUTEX_ENTER(&rx_stats_mutex);
 834         rx_stats.packetsRead[p->header.type-1]++;
 835         MUTEX_EXIT(&rx_stats_mutex);
 836         /*
 837          * Try to look up this peer structure.  If it doesn't exist,
 838          * don't create a new one -
 839          * we don't keep count of the bytes sent/received if a peer
 840          * structure doesn't already exist.
 841          *
 842          * The peer/connection cleanup code assumes that there is 1 peer
 843          * per connection.  If we actually created a peer structure here
 844          * and this packet was an rxdebug packet, the peer structure would
 845          * never be cleaned up.
 846          */
 847         peer = rxi_FindPeer(*host, *port, 0, 0);
 848         if (peer) {
 849             MUTEX_ENTER(&peer->peer_lock);
 850             hadd32(peer->bytesReceived, p->length);
 851             MUTEX_EXIT(&peer->peer_lock);
 852         }
 853       }
 854
 855       /* Free any empty packet buffers at the end of this packet */
 856       rxi_TrimDataBufs(p, 1);
 857
 858       return  1;
 859     }
 860 }
 861
 862 #endif /* !KERNEL || UKERNEL */
 863
 864 /* This function splits off the first packet in a jumbo packet.
 865  * As of AFS 3.5, jumbograms contain more than one fixed size
 866  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 867  * last packet header. All packets (except the last) are padded to
 868  * fall on RX_CBUFFERSIZE boundaries.
 869  * HACK: We store the length of the first n-1 packets in the
 870  * last two pad bytes. */
 871
 872 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
 873      register struct rx_packet *p;
 874      afs_int32 host;
 875      short port;
 876      int first;
 877 {
 878     struct rx_packet *np;
 879     struct rx_jumboHeader *jp;
 880     int niov, i;
 881     struct iovec *iov;
 882     int length;
 883     afs_uint32 temp;
 884
 885     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 886      * bytes in length. All but the first packet are preceded by
 887      * an abbreviated four byte header. The length of the last packet
 888      * is calculated from the size of the jumbogram. */
 889     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 890
 891     if ((int)p->length < length) {
 892         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 893         return NULL;
 894     }
 895     niov = p->niovecs - 2;
 896     if (niov < 1) {
 897         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 898         return NULL;
 899     }
 900     iov = &p->wirevec[2];
 901     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 902
 903     /* Get a pointer to the abbreviated packet header */
 904     jp = (struct rx_jumboHeader *)
 905          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 906
 907     /* Set up the iovecs for the next packet */
 908     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 909     np->wirevec[0].iov_len = sizeof(struct rx_header);
 910     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 911     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 912     np->niovecs = niov+1;
 913     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 914         np->wirevec[i] = *iov;
 915     }
 916     np->length = p->length - length;
 917     p->length = RX_JUMBOBUFFERSIZE;
 918     p->niovecs = 2;
 919
 920     /* Convert the jumbo packet header to host byte order */
 921     temp = ntohl(*(afs_uint32 *)jp);
 922     jp->flags = (u_char)(temp >> 24);
 923     jp->cksum = (u_short)(temp);
 924
 925     /* Fill in the packet header */
 926     np->header = p->header;
 927     np->header.serial = p->header.serial + 1;
 928     np->header.seq = p->header.seq + 1;
 929     np->header.flags = jp->flags;
 930     np->header.spare = jp->cksum;
 931
 932     return np;
 933 }
 934
 935 #ifndef KERNEL
 936 /* Send a udp datagram */
 937 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
 938     osi_socket socket;
 939     char * addr;
 940     struct iovec *dvec;
 941     int nvecs;
 942     int length;
 943     int istack;
 944 {
 945     struct msghdr msg;
 946
 947     memset(&msg, 0, sizeof(msg));
 948     msg.msg_iov = dvec;
 949     msg.msg_iovlen = nvecs;
 950     msg.msg_name = addr;
 951     msg.msg_namelen = sizeof(struct sockaddr_in);
 952
 953     rxi_Sendmsg(socket, &msg, 0);
 954
 955     return 0;
 956 }
 957 #elif !defined(UKERNEL)
 958 /* osi_NetSend is defined in afs/afs_osinet.c
 959  * message receipt is done in rxk_input or rx_put.
 960  */
 961
 962 #ifdef AFS_SUN5_ENV
 963 /*
 964  * Copy an mblock to the contiguous area pointed to by cp.
 965  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 966  * but it doesn't really.
 967  * Returns the number of bytes not transferred.
 968  * The message is NOT changed.
 969  */
 970 static int cpytoc(mp, off, len, cp)
 971     mblk_t *mp;
 972     register int off, len;
 973     register char * cp;
 974 {
 975     register int n;
 976
 977     for (;mp && len > 0; mp = mp->b_cont) {
 978         if (mp->b_datap->db_type != M_DATA) {
 979             return -1;
 980         }
 981         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 982         memcpy(cp, (char *)mp->b_rptr, n);
 983         cp += n;
 984         len -= n;
 985         mp->b_rptr += n;
 986     }
 987     return (len);
 988 }
 989
 990 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 991  * but it doesn't really.
 992  * This sucks, anyway, do it like m_cpy.... below
 993  */
 994 static int cpytoiovec(mp, off, len, iovs, niovs)
 995     mblk_t *mp;
 996     int off, len, niovs;
 997     register struct iovec *iovs;
 998 {
 999     register int m,n,o,t,i;
1000
1001     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1002         if (mp->b_datap->db_type != M_DATA) {
1003             return -1;
1004         }
1005         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1006         len -= n;
1007         while (n) {
1008           if (!t) {
1009             o=0;
1010             i++;
1011             t = iovs[i].iov_len;
1012           }
1013           m = MIN(n,t);
1014           memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1015           mp->b_rptr += m;
1016           o += m;
1017           t -= m;
1018           n -= m;
1019         }
1020     }
1021     return (len);
1022 }
1023 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1024 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1025 #else
1026 #if !defined(AFS_LINUX20_ENV)
1027 static int m_cpytoiovec(m, off, len, iovs, niovs)
1028      struct mbuf *m;
1029      int off, len, niovs;
1030      struct iovec iovs[];
1031 {
1032   caddr_t p1, p2;
1033   unsigned int l1, l2, i, t;
1034
1035   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1036     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1037
1038   while (off && m)
1039     if (m->m_len <= off) {
1040       off -= m->m_len;
1041       m = m->m_next;
1042       continue;
1043     } else
1044       break;
1045
1046   if (m == NULL)
1047     return len;
1048
1049   p1 = mtod(m, caddr_t)+off;
1050   l1 = m->m_len - off;
1051   i = 0;
1052   p2 = iovs[0].iov_base;
1053   l2 = iovs[0].iov_len;
1054
1055   while (len) {
1056     t = MIN(l1, MIN(l2, (unsigned int)len));
1057     memcpy(p2, p1, t);
1058     p1 += t;    p2 += t;
1059     l1 -= t;    l2 -= t;
1060     len -= t;
1061     if (!l1) {
1062       m = m->m_next;
1063       if (!m)
1064         break;
1065       p1 = mtod(m, caddr_t);
1066       l1 = m->m_len;
1067     }
1068     if (!l2) {
1069       if (++i >= niovs)
1070         break;
1071       p2 = iovs[i].iov_base;
1072       l2 = iovs[i].iov_len;
1073     }
1074
1075   }
1076
1077 return len;
1078 }
1079 #endif /* LINUX */
1080 #endif /* AFS_SUN5_ENV */
1081
1082 #if !defined(AFS_LINUX20_ENV)
1083 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1084 #ifdef  AFS_SUN5_ENV
1085 mblk_t *amb;
1086 #else
1087 struct mbuf *amb;
1088 #endif
1089 void (*free)();
1090 struct rx_packet *phandle;
1091 int hdr_len, data_len;
1092 {
1093   register int code;
1094
1095   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1096   (*free)(amb);
1097
1098   return code;
1099 }
1100 #endif /* LINUX */
1101 #endif /*KERNEL && !UKERNEL*/
1102
1103
1104 /* send a response to a debug packet */
1105
1106 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1107   osi_socket asocket;
1108   afs_int32 ahost;
1109   short aport;
1110   register struct rx_packet *ap;
1111   int istack;
1112 {
1113     struct rx_debugIn tin;
1114     afs_int32 tl;
1115     struct rx_serverQueueEntry *np, *nqe;
1116
1117     /*
1118      * Only respond to client-initiated Rx debug packets,
1119      * and clear the client flag in the response.
1120      */
1121     if (ap->header.flags & RX_CLIENT_INITIATED) {
1122         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1123         rxi_EncodePacketHeader(ap);
1124     } else {
1125         return ap;
1126     }
1127
1128     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1129     /* all done with packet, now set length to the truth, so we can
1130      * reuse this packet */
1131     rx_computelen(ap, ap->length);
1132
1133     tin.type = ntohl(tin.type);
1134     tin.index = ntohl(tin.index);
1135     switch(tin.type) {
1136         case RX_DEBUGI_GETSTATS: {
1137             struct rx_debugStats tstat;
1138
1139             /* get basic stats */
1140             memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1141             tstat.version = RX_DEBUGI_VERSION;
1142 #ifndef RX_ENABLE_LOCKS
1143             tstat.waitingForPackets = rx_waitingForPackets;
1144 #endif
1145             tstat.nFreePackets = htonl(rx_nFreePackets);
1146             tstat.callsExecuted = htonl(rxi_nCalls);
1147             tstat.packetReclaims = htonl(rx_packetReclaims);
1148             tstat.usedFDs = CountFDs(64);
1149             tstat.nWaiting = htonl(rx_nWaiting);
1150             queue_Count( &rx_idleServerQueue, np, nqe,
1151                                 rx_serverQueueEntry, tstat.idleThreads);
1152             tstat.idleThreads = htonl(tstat.idleThreads);
1153             tl = sizeof(struct rx_debugStats) - ap->length;
1154             if (tl > 0)
1155               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1156
1157             if (tl <= 0) {
1158               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1159               ap->length = sizeof(struct rx_debugStats);
1160               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1161               rx_computelen(ap, ap->length);
1162             }
1163             break;
1164         }
1165
1166         case RX_DEBUGI_GETALLCONN:
1167         case RX_DEBUGI_GETCONN: {
1168             int i, j;
1169             register struct rx_connection *tc;
1170             struct rx_call *tcall;
1171             struct rx_debugConn tconn;
1172             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1173
1174
1175             tl = sizeof(struct rx_debugConn) - ap->length;
1176             if (tl > 0)
1177               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1178             if (tl > 0)
1179               return ap;
1180
1181             memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1182             /* get N'th (maybe) "interesting" connection info */
1183             for(i=0;i<rx_hashTableSize;i++) {
1184 #if !defined(KERNEL)
1185                 /* the time complexity of the algorithm used here
1186                  * exponentially increses with the number of connections.
1187                  */
1188 #ifdef AFS_PTHREAD_ENV
1189                 pthread_yield();
1190 #else
1191                 (void) IOMGR_Poll();
1192 #endif
1193 #endif
1194                 MUTEX_ENTER(&rx_connHashTable_lock);
1195                 /* We might be slightly out of step since we are not
1196                  * locking each call, but this is only debugging output.
1197                  */
1198                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1199                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1200                         tconn.host = tc->peer->host;
1201                         tconn.port = tc->peer->port;
1202                         tconn.cid = htonl(tc->cid);
1203                         tconn.epoch = htonl(tc->epoch);
1204                         tconn.serial = htonl(tc->serial);
1205                         for(j=0;j<RX_MAXCALLS;j++) {
1206                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1207                             if ((tcall=tc->call[j])) {
1208                                 tconn.callState[j] = tcall->state;
1209                                 tconn.callMode[j] = tcall->mode;
1210                                 tconn.callFlags[j] = tcall->flags;
1211                                 if (queue_IsNotEmpty(&tcall->rq))
1212                                     tconn.callOther[j] |= RX_OTHER_IN;
1213                                 if (queue_IsNotEmpty(&tcall->tq))
1214                                     tconn.callOther[j] |= RX_OTHER_OUT;
1215                             }
1216                             else tconn.callState[j] = RX_STATE_NOTINIT;
1217                         }
1218
1219                         tconn.natMTU = htonl(tc->peer->natMTU);
1220                         tconn.error = htonl(tc->error);
1221                         tconn.flags = tc->flags;
1222                         tconn.type = tc->type;
1223                         tconn.securityIndex = tc->securityIndex;
1224                         if (tc->securityObject) {
1225                             RXS_GetStats (tc->securityObject, tc,
1226                                           &tconn.secStats);
1227 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1228 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1229                             DOHTONL(flags);
1230                             DOHTONL(expires);
1231                             DOHTONL(packetsReceived);
1232                             DOHTONL(packetsSent);
1233                             DOHTONL(bytesReceived);
1234                             DOHTONL(bytesSent);
1235                             for (i=0;
1236                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1237                                  i++)
1238                                 DOHTONS(spares[i]);
1239                             for (i=0;
1240                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1241                                  i++)
1242                                 DOHTONL(sparel[i]);
1243                         }
1244
1245                         MUTEX_EXIT(&rx_connHashTable_lock);
1246                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1247                         tl = ap->length;
1248                         ap->length = sizeof(struct rx_debugConn);
1249                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1250                         ap->length = tl;
1251                         return ap;
1252                     }
1253                 }
1254                 MUTEX_EXIT(&rx_connHashTable_lock);
1255             }
1256             /* if we make it here, there are no interesting packets */
1257             tconn.cid = htonl(0xffffffff); /* means end */
1258             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1259             tl = ap->length;
1260             ap->length = sizeof(struct rx_debugConn);
1261             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1262             ap->length = tl;
1263             break;
1264         }
1265
1266         /*
1267          * Pass back all the peer structures we have available
1268          */
1269
1270         case RX_DEBUGI_GETPEER: {
1271             int i;
1272             register struct rx_peer *tp;
1273             struct rx_debugPeer tpeer;
1274
1275
1276             tl = sizeof(struct rx_debugPeer) - ap->length;
1277             if (tl > 0)
1278               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1279             if (tl > 0)
1280               return ap;
1281
1282             memset((char *)&tpeer, 0, sizeof(tpeer));
1283             for(i=0;i<rx_hashTableSize;i++) {
1284 #if !defined(KERNEL)
1285                 /* the time complexity of the algorithm used here
1286                  * exponentially increses with the number of peers.
1287                  *
1288                  * Yielding after processing each hash table entry
1289                  * and dropping rx_peerHashTable_lock.
1290                  * also increases the risk that we will miss a new
1291                  * entry - but we are willing to live with this
1292                  * limitation since this is meant for debugging only
1293                  */
1294 #ifdef AFS_PTHREAD_ENV
1295                 pthread_yield();
1296 #else
1297                 (void) IOMGR_Poll();
1298 #endif
1299 #endif
1300                 MUTEX_ENTER(&rx_peerHashTable_lock);
1301                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1302                     if (tin.index-- <= 0) {
1303                         tpeer.host = tp->host;
1304                         tpeer.port = tp->port;
1305                         tpeer.ifMTU = htons(tp->ifMTU);
1306                         tpeer.idleWhen = htonl(tp->idleWhen);
1307                         tpeer.refCount = htons(tp->refCount);
1308                         tpeer.burstSize = tp->burstSize;
1309                         tpeer.burst = tp->burst;
1310                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1311                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1312                         tpeer.rtt = htonl(tp->rtt);
1313                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1314                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1315                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1316                         tpeer.nSent = htonl(tp->nSent);
1317                         tpeer.reSends = htonl(tp->reSends);
1318                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1319                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1320                         tpeer.rateFlag = htonl(tp->rateFlag);
1321                         tpeer.natMTU = htons(tp->natMTU);
1322                         tpeer.maxMTU = htons(tp->maxMTU);
1323                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1324                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1325                         tpeer.MTU = htons(tp->MTU);
1326                         tpeer.cwind = htons(tp->cwind);
1327                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1328                         tpeer.congestSeq = htons(tp->congestSeq);
1329                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1330                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1331                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1332                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1333
1334                         MUTEX_EXIT(&rx_peerHashTable_lock);
1335                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1336                         tl = ap->length;
1337                         ap->length = sizeof(struct rx_debugPeer);
1338                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1339                         ap->length = tl;
1340                         return ap;
1341                     }
1342                 }
1343                 MUTEX_EXIT(&rx_peerHashTable_lock);
1344             }
1345             /* if we make it here, there are no interesting packets */
1346             tpeer.host = htonl(0xffffffff); /* means end */
1347             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1348             tl = ap->length;
1349             ap->length = sizeof(struct rx_debugPeer);
1350             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1351             ap->length = tl;
1352             break;
1353         }
1354
1355         case RX_DEBUGI_RXSTATS: {
1356             int i;
1357             afs_int32 *s;
1358
1359             tl = sizeof(rx_stats) - ap->length;
1360             if (tl > 0)
1361               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1362             if (tl > 0)
1363               return ap;
1364
1365             /* Since its all int32s convert to network order with a loop. */
1366             MUTEX_ENTER(&rx_stats_mutex);
1367             s = (afs_int32 *)&rx_stats;
1368             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1369                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1370
1371             tl = ap->length;
1372             ap->length = sizeof(rx_stats);
1373             MUTEX_EXIT(&rx_stats_mutex);
1374             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1375             ap->length = tl;
1376             break;
1377         }
1378
1379         default:
1380             /* error response packet */
1381             tin.type = htonl(RX_DEBUGI_BADTYPE);
1382             tin.index = tin.type;
1383             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1384             tl = ap->length;
1385             ap->length = sizeof(struct rx_debugIn);
1386             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1387             ap->length = tl;
1388             break;
1389     }
1390     return ap;
1391 }
1392
1393 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1394   osi_socket asocket;
1395   afs_int32 ahost;
1396   short aport;
1397   register struct rx_packet *ap;
1398   int istack;
1399 {
1400     afs_int32 tl;
1401
1402     /*
1403      * Only respond to client-initiated version requests, and
1404      * clear that flag in the response.
1405      */
1406     if (ap->header.flags & RX_CLIENT_INITIATED) {
1407         char buf[66];
1408
1409         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1410         rxi_EncodePacketHeader(ap);
1411         memset(buf, 0, sizeof(buf));
1412         strncpy(buf, cml_version_number+4, sizeof(buf)-1);
1413         rx_packetwrite(ap, 0, 65, buf);
1414         tl = ap->length;
1415         ap->length = 65;
1416         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1417         ap->length = tl;
1418     }
1419
1420     return ap;
1421 }
1422
1423
1424 /* send a debug packet back to the sender */
1425 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1426                                afs_int32 ahost, short aport, afs_int32 istack)
1427 {
1428     struct sockaddr_in taddr;
1429     int i;
1430     int nbytes;
1431     int saven = 0;
1432     size_t savelen = 0;
1433 #ifdef KERNEL
1434     int waslocked = ISAFS_GLOCK();
1435 #endif
1436
1437     taddr.sin_family = AF_INET;
1438     taddr.sin_port = aport;
1439     taddr.sin_addr.s_addr = ahost;
1440 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1441     taddr.sin_len = sizeof(struct sockaddr_in);
1442 #endif
1443
1444     /* We need to trim the niovecs. */
1445     nbytes = apacket->length;
1446     for (i=1; i < apacket->niovecs; i++) {
1447       if (nbytes <= apacket->wirevec[i].iov_len) {
1448         savelen = apacket->wirevec[i].iov_len;
1449         saven = apacket->niovecs;
1450         apacket->wirevec[i].iov_len = nbytes;
1451         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1452       }
1453       else nbytes -= apacket->wirevec[i].iov_len;
1454     }
1455     AFS_RXGUNLOCK();
1456 #ifdef KERNEL
1457     if (waslocked) AFS_GUNLOCK();
1458 #endif
1459     /* debug packets are not reliably delivered, hence the cast below. */
1460     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1461                        apacket->length+RX_HEADER_SIZE, istack);
1462 #ifdef KERNEL
1463     if (waslocked) AFS_GLOCK();
1464 #endif
1465     AFS_RXGLOCK();
1466     if (saven) {  /* means we truncated the packet above. */
1467       apacket->wirevec[i-1].iov_len = savelen;
1468       apacket->niovecs = saven;
1469     }
1470
1471 }
1472
1473 /* Send the packet to appropriate destination for the specified
1474  * connection.  The header is first encoded and placed in the packet.
1475  */
1476 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1477                     int istack)
1478 {
1479 #if defined(KERNEL)
1480     int waslocked;
1481 #endif
1482     struct sockaddr_in addr;
1483     register struct rx_peer *peer = conn->peer;
1484     osi_socket socket;
1485 #ifdef RXDEBUG
1486     char deliveryType = 'S';
1487 #endif
1488     /* The address we're sending the packet to */
1489     addr.sin_family = AF_INET;
1490     addr.sin_port = peer->port;
1491     addr.sin_addr.s_addr = peer->host;
1492
1493     /* This stuff should be revamped, I think, so that most, if not
1494      * all, of the header stuff is always added here.  We could
1495      * probably do away with the encode/decode routines. XXXXX */
1496
1497     /* Stamp each packet with a unique serial number.  The serial
1498      * number is maintained on a connection basis because some types
1499      * of security may be based on the serial number of the packet,
1500      * and security is handled on a per authenticated-connection
1501      * basis. */
1502     /* Pre-increment, to guarantee no zero serial number; a zero
1503      * serial number means the packet was never sent. */
1504     MUTEX_ENTER(&conn->conn_data_lock);
1505     p->header.serial = ++conn->serial;
1506     MUTEX_EXIT(&conn->conn_data_lock);
1507     /* This is so we can adjust retransmit time-outs better in the face of
1508      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1509      */
1510     if (p->firstSerial == 0) {
1511        p->firstSerial = p->header.serial;
1512      }
1513
1514 #ifdef RXDEBUG
1515     /* If an output tracer function is defined, call it with the packet and
1516      * network address.  Note this function may modify its arguments. */
1517     if (rx_almostSent) {
1518         int drop = (*rx_almostSent) (p, &addr);
1519         /* drop packet if return value is non-zero? */
1520         if (drop) deliveryType = 'D';   /* Drop the packet */
1521     }
1522 #endif
1523
1524     /* Get network byte order header */
1525     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1526                                  * touch ALL the fields */
1527
1528     /* Send the packet out on the same socket that related packets are being
1529      * received on */
1530     socket = (conn->type == RX_CLIENT_CONNECTION
1531               ? rx_socket : conn->service->socket);
1532
1533 #ifdef RXDEBUG
1534     /* Possibly drop this packet,  for testing purposes */
1535     if ((deliveryType == 'D') ||
1536         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1537          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1538         deliveryType = 'D';             /* Drop the packet */
1539     }
1540     else {
1541         deliveryType = 'S';             /* Send the packet */
1542 #endif /* RXDEBUG */
1543
1544         /* Loop until the packet is sent.  We'd prefer just to use a
1545          * blocking socket, but unfortunately the interface doesn't
1546          * allow us to have the socket block in send mode, and not
1547          * block in receive mode */
1548         AFS_RXGUNLOCK();
1549 #ifdef KERNEL
1550         waslocked = ISAFS_GLOCK();
1551         if (waslocked) AFS_GUNLOCK();
1552 #endif
1553         if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1554                         p->length+RX_HEADER_SIZE, istack)){
1555           /* send failed, so let's hurry up the resend, eh? */
1556           MUTEX_ENTER(&rx_stats_mutex);
1557           rx_stats.netSendFailures++;
1558           MUTEX_EXIT(&rx_stats_mutex);
1559           p->retryTime = p->timeSent;  /* resend it very soon */
1560           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1561         }
1562 #ifdef KERNEL
1563         if (waslocked) AFS_GLOCK();
1564 #endif
1565         AFS_RXGLOCK();
1566 #ifdef RXDEBUG
1567     }
1568     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1569          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1570          peer->host, peer->port, p->header.serial, p->header.epoch,
1571          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1572          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1573 #endif
1574     MUTEX_ENTER(&rx_stats_mutex);
1575     rx_stats.packetsSent[p->header.type-1]++;
1576     MUTEX_EXIT(&rx_stats_mutex);
1577     MUTEX_ENTER(&peer->peer_lock);
1578     hadd32(peer->bytesSent, p->length);
1579     MUTEX_EXIT(&peer->peer_lock);
1580 }
1581
1582 /* Send a list of packets to appropriate destination for the specified
1583  * connection.  The headers are first encoded and placed in the packets.
1584  */
1585 void rxi_SendPacketList(struct rx_connection * conn,
1586                         struct rx_packet **list,
1587                         int len,
1588                         int istack)
1589 {
1590 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1591     int waslocked;
1592 #endif
1593     struct sockaddr_in addr;
1594     register struct rx_peer *peer = conn->peer;
1595     osi_socket socket;
1596     struct rx_packet *p = NULL;
1597     struct iovec wirevec[RX_MAXIOVECS];
1598     int i, length;
1599     afs_uint32 serial;
1600     afs_uint32 temp;
1601     struct rx_jumboHeader *jp;
1602 #ifdef RXDEBUG
1603     char deliveryType = 'S';
1604 #endif
1605     /* The address we're sending the packet to */
1606     addr.sin_family = AF_INET;
1607     addr.sin_port = peer->port;
1608     addr.sin_addr.s_addr = peer->host;
1609
1610     if (len+1 > RX_MAXIOVECS) {
1611         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1612     }
1613
1614     /*
1615      * Stamp the packets in this jumbogram with consecutive serial numbers
1616      */
1617     MUTEX_ENTER(&conn->conn_data_lock);
1618     serial = conn->serial;
1619     conn->serial += len;
1620     MUTEX_EXIT(&conn->conn_data_lock);
1621
1622
1623     /* This stuff should be revamped, I think, so that most, if not
1624      * all, of the header stuff is always added here.  We could
1625      * probably do away with the encode/decode routines. XXXXX */
1626
1627     jp = NULL;
1628     length = RX_HEADER_SIZE;
1629     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1630     wirevec[0].iov_len = RX_HEADER_SIZE;
1631     for (i = 0 ; i < len ; i++) {
1632         p = list[i];
1633
1634         /* The whole 3.5 jumbogram scheme relies on packets fitting
1635          * in a single packet buffer. */
1636         if (p->niovecs > 2) {
1637             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1638         }
1639
1640         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1641          * in this chunk.  */
1642         if (i < len-1) {
1643             if (p->length != RX_JUMBOBUFFERSIZE) {
1644                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1645             }
1646             p->header.flags |= RX_JUMBO_PACKET;
1647             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1648             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1649         } else {
1650             wirevec[i+1].iov_len = p->length;
1651             length += p->length;
1652         }
1653         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1654         if (jp != NULL) {
1655             /* Convert jumbo packet header to network byte order */
1656             temp = (afs_uint32)(p->header.flags) << 24;
1657             temp |= (afs_uint32)(p->header.spare);
1658             *(afs_uint32 *)jp = htonl(temp);
1659         }
1660         jp = (struct rx_jumboHeader *)
1661              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1662
1663         /* Stamp each packet with a unique serial number.  The serial
1664          * number is maintained on a connection basis because some types
1665          * of security may be based on the serial number of the packet,
1666          * and security is handled on a per authenticated-connection
1667          * basis. */
1668         /* Pre-increment, to guarantee no zero serial number; a zero
1669          * serial number means the packet was never sent. */
1670         p->header.serial = ++serial;
1671         /* This is so we can adjust retransmit time-outs better in the face of
1672          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1673          */
1674         if (p->firstSerial == 0) {
1675            p->firstSerial = p->header.serial;
1676         }
1677
1678 #ifdef RXDEBUG
1679         /* If an output tracer function is defined, call it with the packet and
1680          * network address.  Note this function may modify its arguments. */
1681         if (rx_almostSent) {
1682             int drop = (*rx_almostSent) (p, &addr);
1683             /* drop packet if return value is non-zero? */
1684             if (drop) deliveryType = 'D';       /* Drop the packet */
1685         }
1686 #endif
1687
1688         /* Get network byte order header */
1689         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1690                                      * touch ALL the fields */
1691     }
1692
1693     /* Send the packet out on the same socket that related packets are being
1694      * received on */
1695     socket = (conn->type == RX_CLIENT_CONNECTION
1696               ? rx_socket : conn->service->socket);
1697
1698 #ifdef RXDEBUG
1699     /* Possibly drop this packet,  for testing purposes */
1700     if ((deliveryType == 'D') ||
1701         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1702          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1703         deliveryType = 'D';             /* Drop the packet */
1704     }
1705     else {
1706         deliveryType = 'S';             /* Send the packet */
1707 #endif /* RXDEBUG */
1708
1709         /* Loop until the packet is sent.  We'd prefer just to use a
1710          * blocking socket, but unfortunately the interface doesn't
1711          * allow us to have the socket block in send mode, and not
1712          * block in receive mode */
1713         AFS_RXGUNLOCK();
1714 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1715         waslocked = ISAFS_GLOCK();
1716         if (!istack && waslocked) AFS_GUNLOCK();
1717 #endif
1718         if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1719           /* send failed, so let's hurry up the resend, eh? */
1720           MUTEX_ENTER(&rx_stats_mutex);
1721           rx_stats.netSendFailures++;
1722           MUTEX_EXIT(&rx_stats_mutex);
1723           for (i = 0 ; i < len ; i++) {
1724             p = list[i];
1725             p->retryTime = p->timeSent;  /* resend it very soon */
1726             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1727           }
1728         }
1729 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1730         if (!istack && waslocked) AFS_GLOCK();
1731 #endif
1732         AFS_RXGLOCK();
1733 #ifdef RXDEBUG
1734     }
1735     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1736          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1737          peer->host, peer->port, p->header.serial, p->header.epoch,
1738          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1739          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1740 #endif
1741     MUTEX_ENTER(&rx_stats_mutex);
1742     rx_stats.packetsSent[p->header.type-1]++;
1743     MUTEX_EXIT(&rx_stats_mutex);
1744     MUTEX_ENTER(&peer->peer_lock);
1745     hadd32(peer->bytesSent, p->length);
1746     MUTEX_EXIT(&peer->peer_lock);
1747 }
1748
1749
1750 /* Send a "special" packet to the peer connection.  If call is
1751  * specified, then the packet is directed to a specific call channel
1752  * associated with the connection, otherwise it is directed to the
1753  * connection only. Uses optionalPacket if it is supplied, rather than
1754  * allocating a new packet buffer.  Nbytes is the length of the data
1755  * portion of the packet.  If data is non-null, nbytes of data are
1756  * copied into the packet.  Type is the type of the packet, as defined
1757  * in rx.h.  Bug: there's a lot of duplication between this and other
1758  * routines.  This needs to be cleaned up. */
1759 struct rx_packet *
1760 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1761     register struct rx_call *call;
1762     register struct rx_connection *conn;
1763     struct rx_packet *optionalPacket;
1764     int type;
1765     char *data;
1766     int nbytes, istack;
1767 {
1768     /* Some of the following stuff should be common code for all
1769      * packet sends (it's repeated elsewhere) */
1770     register struct rx_packet *p;
1771     unsigned int i = 0;
1772     int savelen = 0, saven = 0;
1773     int channel, callNumber;
1774     if (call) {
1775         channel = call->channel;
1776         callNumber = *call->callNumber;
1777         /* BUSY packets refer to the next call on this connection */
1778         if (type == RX_PACKET_TYPE_BUSY) {
1779             callNumber++;
1780         }
1781     } else {
1782         channel = 0;
1783         callNumber = 0;
1784     }
1785     p = optionalPacket;
1786     if (!p) {
1787         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1788         if (!p) osi_Panic("rxi_SendSpecial failure");
1789     }
1790
1791     if (nbytes != -1)
1792       p->length = nbytes;
1793     else
1794       nbytes = p->length;
1795     p->header.serviceId = conn->serviceId;
1796     p->header.securityIndex = conn->securityIndex;
1797     p->header.cid = (conn->cid | channel);
1798     p->header.callNumber = callNumber;
1799     p->header.seq = 0;
1800     p->header.epoch = conn->epoch;
1801     p->header.type = type;
1802     p->header.flags = 0;
1803     if (conn->type == RX_CLIENT_CONNECTION)
1804        p->header.flags |= RX_CLIENT_INITIATED;
1805     if (data)
1806       rx_packetwrite(p, 0, nbytes, data);
1807
1808     for (i=1; i < p->niovecs; i++) {
1809       if (nbytes <= p->wirevec[i].iov_len) {
1810         savelen = p->wirevec[i].iov_len;
1811         saven = p->niovecs;
1812         p->wirevec[i].iov_len = nbytes;
1813         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1814       }
1815       else nbytes -= p->wirevec[i].iov_len;
1816     }
1817
1818     if (call) rxi_Send(call, p, istack);
1819     else rxi_SendPacket(conn, p, istack);
1820     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1821       /* really need to do this, but it seems safer this way, given that  */
1822       /* sneaky optionalPacket... */
1823       p->wirevec[i-1].iov_len = savelen;
1824       p->niovecs = saven;
1825     }
1826     if (!optionalPacket) rxi_FreePacket(p);
1827     return optionalPacket;
1828 }
1829
1830
1831 /* Encode the packet's header (from the struct header in the packet to
1832  * the net byte order representation in the wire representation of the
1833  * packet, which is what is actually sent out on the wire) */
1834 void rxi_EncodePacketHeader(p)
1835 register struct rx_packet *p;
1836 {
1837     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1838
1839     memset((char *)buf, 0, RX_HEADER_SIZE);
1840     *buf++ = htonl(p->header.epoch);
1841     *buf++ = htonl(p->header.cid);
1842     *buf++ = htonl(p->header.callNumber);
1843     *buf++ = htonl(p->header.seq);
1844     *buf++ = htonl(p->header.serial);
1845     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1846                    | (((afs_uint32)p->header.flags)<<16)
1847                    | (p->header.userStatus<<8) | p->header.securityIndex);
1848     /* Note: top 16 bits of this next word were reserved */
1849     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1850 }
1851
1852 /* Decode the packet's header (from net byte order to a struct header) */
1853 void rxi_DecodePacketHeader(p)
1854 register struct rx_packet *p;
1855 {
1856     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1857     afs_uint32 temp;
1858
1859     p->header.epoch = ntohl(*buf);
1860     buf++;
1861     p->header.cid = ntohl(*buf);
1862     buf++;
1863     p->header.callNumber = ntohl(*buf);
1864     buf++;
1865     p->header.seq = ntohl(*buf);
1866     buf++;
1867     p->header.serial = ntohl(*buf);
1868     buf++;
1869
1870     temp = ntohl(*buf);
1871     buf++;
1872
1873     /* C will truncate byte fields to bytes for me */
1874     p->header.type = temp>>24;
1875     p->header.flags = temp>>16;
1876     p->header.userStatus = temp>>8;
1877     p->header.securityIndex = temp>>0;
1878
1879     temp = ntohl(*buf);
1880     buf++;
1881
1882     p->header.serviceId = (temp&0xffff);
1883     p->header.spare = temp>>16;
1884     /* Note: top 16 bits of this last word are the security checksum */
1885 }
1886
1887 void rxi_PrepareSendPacket(call, p, last)
1888     register struct rx_call *call;
1889     register struct rx_packet *p;
1890     register int last;
1891 {
1892     register struct rx_connection *conn = call->conn;
1893     int i, j;
1894     ssize_t len;        /* len must be a signed type; it can go negative */
1895
1896     p->flags &= ~RX_PKTFLAG_ACKED;
1897     p->header.cid = (conn->cid | call->channel);
1898     p->header.serviceId = conn->serviceId;
1899     p->header.securityIndex = conn->securityIndex;
1900     p->header.callNumber = *call->callNumber;
1901     p->header.seq = call->tnext++;
1902     p->header.epoch = conn->epoch;
1903     p->header.type = RX_PACKET_TYPE_DATA;
1904     p->header.flags = 0;
1905     p->header.spare = 0;
1906     if (conn->type == RX_CLIENT_CONNECTION)
1907       p->header.flags |= RX_CLIENT_INITIATED;
1908
1909     if (last)
1910       p->header.flags |= RX_LAST_PACKET;
1911
1912     clock_Zero(&p->retryTime); /* Never yet transmitted */
1913     clock_Zero(&p->firstSent); /* Never yet transmitted */
1914     p->header.serial = 0;      /* Another way of saying never transmitted... */
1915     p->backoff = 0;
1916
1917     /* Now that we're sure this is the last data on the call, make sure
1918      * that the "length" and the sum of the iov_lens matches. */
1919     len = p->length + call->conn->securityHeaderSize;
1920
1921     for (i=1; i < p->niovecs && len > 0; i++) {
1922       len -=  p->wirevec[i].iov_len;
1923     }
1924     if (len > 0) {
1925       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1926     }
1927     else {
1928       /* Free any extra elements in the wirevec */
1929       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1930         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1931       }
1932       p->niovecs = i;
1933       p->wirevec[i-1].iov_len += len;
1934     }
1935     RXS_PreparePacket(conn->securityObject, call, p);
1936 }
1937
1938 /* Given an interface MTU size, calculate an adjusted MTU size that
1939  * will make efficient use of the RX buffers when the peer is sending
1940  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1941 int rxi_AdjustIfMTU(int mtu)
1942 {
1943     int adjMTU;
1944     int frags;
1945
1946     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1947     if (mtu <= adjMTU) {
1948         return mtu;
1949     }
1950     mtu -= adjMTU;
1951     if (mtu <= 0) {
1952         return adjMTU;
1953     }
1954     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1955     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1956 }
1957
1958 /* Given an interface MTU size, and the peer's advertised max receive
1959  * size, calculate an adjisted maxMTU size that makes efficient use
1960  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1961 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1962 {
1963     int maxMTU = mtu * rxi_nSendFrags;
1964     maxMTU = MIN(maxMTU, peerMaxMTU);
1965     return rxi_AdjustIfMTU(maxMTU);
1966 }
1967
1968 /* Given a packet size, figure out how many datagram packet will fit.
1969  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1970  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1971  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1972 int rxi_AdjustDgramPackets(int frags, int mtu)
1973 {
1974     int maxMTU;
1975     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1976         return 1;
1977     }
1978     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1979     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1980     /* subtract the size of the first and last packets */
1981     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1982     if (maxMTU < 0) {
1983         return 1;
1984     }
1985     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1986 }