src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "../afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID("$Header$");
  18
  19 #ifdef KERNEL
  20 #if defined(UKERNEL)
  21 #include "../afs/sysincludes.h"
  22 #include "../afs/afsincludes.h"
  23 #include "../rx/rx_kcommon.h"
  24 #include "../rx/rx_clock.h"
  25 #include "../rx/rx_queue.h"
  26 #include "../rx/rx_packet.h"
  27 #else /* defined(UKERNEL) */
  28 #include "../h/types.h"
  29 #ifndef AFS_LINUX20_ENV
  30 #include "../h/systm.h"
  31 #endif
  32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  33 #include "../afs/sysincludes.h"
  34 #endif
  35 #include "../h/socket.h"
  36 #include "../netinet/in.h"
  37 #include "../afs/afs_osi.h"
  38 #include "../rx/rx_kmutex.h"
  39 #include "../rx/rx_clock.h"
  40 #include "../rx/rx_queue.h"
  41 #ifdef  AFS_SUN5_ENV
  42 #include <sys/sysmacros.h>
  43 #endif
  44 #include "../rx/rx_packet.h"
  45 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  46 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  47 #include "../sys/mount.h"   /* it gets pulled in by something later anyway */
  48 #endif
  49 #include "../h/mbuf.h"
  50 #endif
  51 #endif /* defined(UKERNEL) */
  52 #include "../rx/rx_globals.h"
  53 #else /* KERNEL */
  54 #include "sys/types.h"
  55 #include <sys/stat.h>
  56 #include <errno.h>
  57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  58 #ifdef AFS_NT40_ENV
  59 #include <winsock2.h>
  60 #else
  61 #include <sys/socket.h>
  62 #include <netinet/in.h>
  63 #endif /* AFS_NT40_ENV */
  64 #include "rx_xmit_nt.h"
  65 #include <stdlib.h>
  66 #else
  67 #include <sys/socket.h>
  68 #include <netinet/in.h>
  69 #endif
  70 #include "rx_clock.h"
  71 #include "rx.h"
  72 #include "rx_queue.h"
  73 #ifdef  AFS_SUN5_ENV
  74 #include <sys/sysmacros.h>
  75 #endif
  76 #include "rx_packet.h"
  77 #include "rx_globals.h"
  78 #include <lwp.h>
  79 #include "rx_internal.h"
  80 #ifdef HAVE_STRINGS_H
  81 #include <strings.h>
  82 #else
  83 #ifdef HAVE_STRING_H
  84 #include <string.h>
  85 #endif
  86 #endif
  87 #ifdef HAVE_UNISTD_H
  88 #include <unistd.h>
  89 #endif
  90 #endif /* KERNEL */
  91
  92 #ifdef RX_LOCKS_DB
  93 /* rxdb_fileID is used to identify the lock location, along with line#. */
  94 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  95 #endif /* RX_LOCKS_DB */
  96 struct rx_packet *rx_mallocedP = 0;
  97
  98 extern char cml_version_number[];
  99 extern int (*rx_almostSent)();
 100
 101 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 102                                afs_int32 ahost, short aport, afs_int32 istack);
 103
 104 /* some rules about packets:
 105  * 1.  When a packet is allocated, the final iov_buf contains room for
 106  * a security trailer, but iov_len masks that fact.  If the security
 107  * package wants to add the trailer, it may do so, and then extend
 108  * iov_len appropriately.  For this reason, packet's niovecs and
 109  * iov_len fields should be accurate before calling PreparePacket.
 110 */
 111
 112 /* Preconditions:
 113  *        all packet buffers (iov_base) are integral multiples of
 114  *        the word size.
 115  *        offset is an integral multiple of the word size.
 116  */
 117 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 118 {
 119   unsigned int i;
 120   size_t l;
 121   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 122     if (l + packet->wirevec[i].iov_len > offset) {
 123       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 124     }
 125     l += packet->wirevec[i].iov_len;
 126   }
 127
 128   return 0;
 129 }
 130
 131 /* Preconditions:
 132  *        all packet buffers (iov_base) are integral multiples of the word size.
 133  *        offset is an integral multiple of the word size.
 134  */
 135 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 136 {
 137   unsigned int i;
 138   size_t l;
 139   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 140     if (l + packet->wirevec[i].iov_len > offset) {
 141       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 142           data;
 143       return 0;
 144     }
 145     l += packet->wirevec[i].iov_len;
 146   }
 147
 148   return 0;
 149 }
 150
 151 /* Preconditions:
 152  *        all packet buffers (iov_base) are integral multiples of the
 153  *        word size.
 154  *        offset is an integral multiple of the word size.
 155  * Packet Invariants:
 156  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 157  */
 158 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 159                         int resid, char *out)
 160 {
 161   unsigned int i, j, l, r;
 162   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 163     if (l + packet->wirevec[i].iov_len > offset) {
 164       break;
 165     }
 166     l += packet->wirevec[i].iov_len;
 167   }
 168
 169   /* i is the iovec which contains the first little bit of data in which we
 170    * are interested.  l is the total length of everything prior to this iovec.
 171    * j is the number of bytes we can safely copy out of this iovec.
 172    */
 173   r = resid;
 174   while ((resid > 0) && (i < packet->niovecs)) {
 175     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 176     bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
 177     resid -= j;
 178     l += packet->wirevec[i].iov_len;
 179     i++;
 180   }
 181
 182   return (resid ? (r - resid) : r);
 183 }
 184
 185
 186 /* Preconditions:
 187  *        all packet buffers (iov_base) are integral multiples of the
 188  *        word size.
 189  *        offset is an integral multiple of the word size.
 190  */
 191 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 192                          char *in)
 193 {
 194   int i, j, l, r;
 195   char * b;
 196
 197   for (l=0, i=1; i < packet->niovecs; i++ ) {
 198     if (l + packet->wirevec[i].iov_len > offset) {
 199       break;
 200     }
 201     l += packet->wirevec[i].iov_len;
 202   }
 203
 204   /* i is the iovec which contains the first little bit of data in which we
 205    * are interested.  l is the total length of everything prior to this iovec.
 206    * j is the number of bytes we can safely copy out of this iovec.
 207    */
 208   r = resid;
 209   while ((resid > 0) && (i < RX_MAXWVECS)) {
 210     if (i >= packet->niovecs)
 211       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 212         break;
 213
 214     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 215     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 216     bcopy (in, b, j);
 217     resid -= j;
 218     l += packet->wirevec[i].iov_len;
 219     i++;
 220   }
 221
 222   return (resid ? (r - resid) : r);
 223 }
 224
 225 static struct rx_packet * allocCBuf(int class)
 226 {
 227   struct rx_packet *c;
 228   SPLVAR;
 229
 230   NETPRI;
 231   MUTEX_ENTER(&rx_freePktQ_lock);
 232
 233 #ifdef KERNEL
 234   if (rxi_OverQuota(class)) {
 235     c = NULL;
 236     rxi_NeedMorePackets = TRUE;
 237     MUTEX_ENTER(&rx_stats_mutex);
 238     switch(class) {
 239         case RX_PACKET_CLASS_RECEIVE:
 240             rx_stats.receivePktAllocFailures++;
 241             break;
 242         case RX_PACKET_CLASS_SEND:
 243             rx_stats.sendPktAllocFailures++;
 244             break;
 245         case RX_PACKET_CLASS_SPECIAL:
 246             rx_stats.specialPktAllocFailures++;
 247             break;
 248         case RX_PACKET_CLASS_RECV_CBUF:
 249             rx_stats.receiveCbufPktAllocFailures++;
 250             break;
 251         case RX_PACKET_CLASS_SEND_CBUF:
 252             rx_stats.sendCbufPktAllocFailures++;
 253             break;
 254     }
 255     MUTEX_EXIT(&rx_stats_mutex);
 256     goto done;
 257   }
 258
 259   if (queue_IsEmpty(&rx_freePacketQueue)) {
 260     c = NULL;
 261     rxi_NeedMorePackets = TRUE;
 262     goto done;
 263   }
 264 #else /* KERNEL */
 265   if (queue_IsEmpty(&rx_freePacketQueue)) {
 266     rxi_MorePacketsNoLock(rx_initSendWindow);
 267   }
 268 #endif /* KERNEL */
 269
 270   rx_nFreePackets--;
 271   c = queue_First(&rx_freePacketQueue, rx_packet);
 272   queue_Remove(c);
 273   if (c->header.flags != RX_FREE_PACKET)
 274     osi_Panic("rxi_AllocPacket: packet not free\n");
 275   c->header.flags = 0;
 276
 277 #ifdef KERNEL
 278  done:
 279 #endif
 280   MUTEX_EXIT(&rx_freePktQ_lock);
 281
 282   USERPRI;
 283   return c;
 284 }
 285
 286 /*
 287  * Free a packet currently used as a continuation buffer
 288  */
 289 void rxi_freeCBuf(struct rx_packet *c)
 290 {
 291   SPLVAR;
 292
 293   NETPRI;
 294   MUTEX_ENTER(&rx_freePktQ_lock);
 295
 296   rxi_FreePacketNoLock(c);
 297   /* Wakeup anyone waiting for packets */
 298   rxi_PacketsUnWait();
 299
 300   MUTEX_EXIT(&rx_freePktQ_lock);
 301   USERPRI;
 302 }
 303
 304 /* this one is kind of awful.
 305  * In rxkad, the packet has been all shortened, and everything, ready for
 306  * sending.  All of a sudden, we discover we need some of that space back.
 307  * This isn't terribly general, because it knows that the packets are only
 308  * rounded up to the EBS (userdata + security header).
 309  */
 310 int rxi_RoundUpPacket(p, nb)
 311      struct rx_packet * p;
 312      unsigned int nb;
 313 {
 314   int i;
 315   i = p->niovecs - 1;
 316   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 317     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 318       p->wirevec[i].iov_len += nb;
 319       return 0;
 320     }
 321   }
 322   else {
 323     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 324       p->wirevec[i].iov_len += nb;
 325       return 0;
 326     }
 327   }
 328
 329 return 0;
 330 }
 331 /* get sufficient space to store nb bytes of data (or more), and hook
 332  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 333  * returns the number of bytes >0 which it failed to come up with.
 334  * Don't need to worry about locking on packet, since only
 335  * one thread can manipulate one at a time. Locking on continution
 336  * packets is handled by allocCBuf */
 337 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 338 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 339 {
 340   int i;
 341
 342   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 343       register struct rx_packet *cb;
 344       if ((cb = allocCBuf(class))) {
 345           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 346           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 347           nb -= RX_CBUFFERSIZE;
 348           p->length += RX_CBUFFERSIZE;
 349           p->niovecs++;
 350       }
 351       else break;
 352   }
 353
 354   return nb;
 355 }
 356
 357 /* Add more packet buffers */
 358 void rxi_MorePackets(int apackets)
 359 {
 360   struct rx_packet *p, *e;
 361   int getme;
 362   SPLVAR;
 363
 364   getme = apackets * sizeof(struct rx_packet);
 365   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 366
 367   PIN(p, getme);        /* XXXXX */
 368   bzero((char *)p, getme);
 369   NETPRI;
 370   AFS_RXGLOCK();
 371   MUTEX_ENTER(&rx_freePktQ_lock);
 372
 373   for (e = p + apackets; p<e; p++) {
 374     p->wirevec[0].iov_base = (char *) (p->wirehead);
 375     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 376     p->wirevec[1].iov_base = (char *) (p->localdata);
 377     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 378     p->header.flags = RX_FREE_PACKET;
 379     p->niovecs = 2;
 380
 381     queue_Append(&rx_freePacketQueue, p);
 382   }
 383   rx_nFreePackets += apackets;
 384   rxi_NeedMorePackets = FALSE;
 385   rxi_PacketsUnWait();
 386
 387   AFS_RXGUNLOCK();
 388   MUTEX_EXIT(&rx_freePktQ_lock);
 389   USERPRI;
 390 }
 391
 392 #ifndef KERNEL
 393 /* Add more packet buffers */
 394 void rxi_MorePacketsNoLock(int apackets)
 395 {
 396   struct rx_packet *p, *e;
 397   int getme;
 398
 399   /* allocate enough packets that 1/4 of the packets will be able
 400    * to hold maximal amounts of data */
 401   apackets += (apackets/4)
 402               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 403   getme = apackets * sizeof(struct rx_packet);
 404   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 405
 406   bzero((char *)p, getme);
 407
 408   for (e = p + apackets; p<e; p++) {
 409     p->wirevec[0].iov_base = (char *) (p->wirehead);
 410     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 411     p->wirevec[1].iov_base = (char *) (p->localdata);
 412     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 413     p->header.flags = RX_FREE_PACKET;
 414     p->niovecs = 2;
 415
 416     queue_Append(&rx_freePacketQueue, p);
 417   }
 418   rx_nFreePackets += apackets;
 419   rxi_NeedMorePackets = FALSE;
 420   rxi_PacketsUnWait();
 421 }
 422 #endif /* !KERNEL */
 423
 424 void rxi_FreeAllPackets(void)
 425 {
 426   /* must be called at proper interrupt level, etcetera */
 427   /* MTUXXX need to free all Packets */
 428   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 429   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 430 }
 431
 432 /* Allocate more packets iff we need more continuation buffers */
 433 /* In kernel, can't page in memory with interrupts disabled, so we
 434  * don't use the event mechanism. */
 435 void rx_CheckPackets()
 436 {
 437   if (rxi_NeedMorePackets) {
 438     rxi_MorePackets(rx_initSendWindow);
 439   }
 440 }
 441
 442 /* In the packet freeing routine below, the assumption is that
 443    we want all of the packets to be used equally frequently, so that we
 444    don't get packet buffers paging out.  It would be just as valid to
 445    assume that we DO want them to page out if not many are being used.
 446    In any event, we assume the former, and append the packets to the end
 447    of the free list.  */
 448 /* This explanation is bogus.  The free list doesn't remain in any kind of
 449    useful order for afs_int32: the packets in use get pretty much randomly scattered
 450    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 451    must be stored so that packets which are adjacent in memory are adjacent in the
 452    free list.  An array springs rapidly to mind.
 453    */
 454
 455 /* Actually free the packet p. */
 456 void rxi_FreePacketNoLock(struct rx_packet *p)
 457 {
 458   dpf(("Free %x\n", p));
 459
 460   if (p->header.flags & RX_FREE_PACKET)
 461     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 462   rx_nFreePackets++;
 463   p->header.flags = RX_FREE_PACKET;
 464   queue_Append(&rx_freePacketQueue, p);
 465 }
 466
 467 int rxi_FreeDataBufsNoLock(p, first)
 468      struct rx_packet * p;
 469      int first;
 470 {
 471   struct iovec *iov, *end;
 472
 473   if (first != 1)          /* MTUXXX */
 474       osi_Panic("FreeDataBufs 1: first must be 1");
 475   iov = &p->wirevec[1];
 476   end = iov + (p->niovecs-1);
 477   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 478         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 479   for (iov++ ; iov < end ; iov++) {
 480     if (!iov->iov_base)
 481         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 482     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 483   }
 484   p->length = 0;
 485   p->niovecs = 0;
 486
 487   return 0;
 488 }
 489
 490 int rxi_nBadIovecs = 0;
 491
 492 /* rxi_RestoreDataBufs
 493  *
 494  * Restore the correct sizes to the iovecs. Called when reusing a packet
 495  * for reading off the wire.
 496  */
 497 void rxi_RestoreDataBufs(struct rx_packet *p)
 498 {
 499     int i;
 500     struct iovec *iov = &p->wirevec[2];
 501
 502     p->wirevec[0].iov_base = (char *) (p->wirehead);
 503     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 504     p->wirevec[1].iov_base = (char *) (p->localdata);
 505     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 506
 507     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 508         if (!iov->iov_base) {
 509             rxi_nBadIovecs ++;
 510             p->niovecs = i;
 511             break;
 512         }
 513         iov->iov_len = RX_CBUFFERSIZE;
 514     }
 515 }
 516
 517 int rxi_TrimDataBufs(p, first)
 518      struct rx_packet * p;
 519      int first;
 520 {
 521   int length;
 522   struct iovec *iov, *end;
 523   SPLVAR;
 524
 525   if (first != 1)
 526       osi_Panic("TrimDataBufs 1: first must be 1");
 527
 528   /* Skip over continuation buffers containing message data */
 529   iov = &p->wirevec[2];
 530   end = iov + (p->niovecs-2);
 531   length = p->length - p->wirevec[1].iov_len;
 532   for (; iov < end && length > 0 ; iov++) {
 533     if (!iov->iov_base)
 534         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 535     length -= iov->iov_len;
 536   }
 537
 538   /* iov now points to the first empty data buffer. */
 539   if (iov >= end)
 540     return 0;
 541
 542   NETPRI;
 543   MUTEX_ENTER(&rx_freePktQ_lock);
 544
 545   for (; iov < end ; iov++) {
 546     if (!iov->iov_base)
 547         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 548     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 549     p->niovecs--;
 550   }
 551   rxi_PacketsUnWait();
 552
 553   MUTEX_EXIT(&rx_freePktQ_lock);
 554   USERPRI;
 555
 556   return 0;
 557 }
 558
 559 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 560  * remove it yourself first if you call this routine. */
 561 void rxi_FreePacket(struct rx_packet *p)
 562 {
 563   SPLVAR;
 564
 565   NETPRI;
 566   MUTEX_ENTER(&rx_freePktQ_lock);
 567
 568   rxi_FreeDataBufsNoLock(p,1);
 569   rxi_FreePacketNoLock(p);
 570   /* Wakeup anyone waiting for packets */
 571   rxi_PacketsUnWait();
 572
 573   MUTEX_EXIT(&rx_freePktQ_lock);
 574   USERPRI;
 575 }
 576
 577
 578 /* rxi_AllocPacket sets up p->length so it reflects the number of
 579  * bytes in the packet at this point, **not including** the header.
 580  * The header is absolutely necessary, besides, this is the way the
 581  * length field is usually used */
 582 struct rx_packet *rxi_AllocPacketNoLock(class)
 583      int class;
 584 {
 585   register struct rx_packet *p;
 586
 587 #ifdef KERNEL
 588   if (rxi_OverQuota(class)) {
 589     rxi_NeedMorePackets = TRUE;
 590     MUTEX_ENTER(&rx_stats_mutex);
 591     switch(class) {
 592         case RX_PACKET_CLASS_RECEIVE:
 593             rx_stats.receivePktAllocFailures++;
 594             break;
 595         case RX_PACKET_CLASS_SEND:
 596             rx_stats.sendPktAllocFailures++;
 597             break;
 598         case RX_PACKET_CLASS_SPECIAL:
 599             rx_stats.specialPktAllocFailures++;
 600             break;
 601         case RX_PACKET_CLASS_RECV_CBUF:
 602             rx_stats.receiveCbufPktAllocFailures++;
 603             break;
 604         case RX_PACKET_CLASS_SEND_CBUF:
 605             rx_stats.sendCbufPktAllocFailures++;
 606             break;
 607     }
 608     MUTEX_EXIT(&rx_stats_mutex);
 609     return (struct rx_packet *) 0;
 610   }
 611 #endif /* KERNEL */
 612
 613   MUTEX_ENTER(&rx_stats_mutex);
 614   rx_stats.packetRequests++;
 615   MUTEX_EXIT(&rx_stats_mutex);
 616
 617 #ifdef KERNEL
 618   if (queue_IsEmpty(&rx_freePacketQueue))
 619     osi_Panic("rxi_AllocPacket error");
 620 #else /* KERNEL */
 621   if (queue_IsEmpty(&rx_freePacketQueue))
 622     rxi_MorePacketsNoLock(rx_initSendWindow);
 623 #endif /* KERNEL */
 624
 625   rx_nFreePackets--;
 626   p = queue_First(&rx_freePacketQueue, rx_packet);
 627   if (p->header.flags != RX_FREE_PACKET)
 628     osi_Panic("rxi_AllocPacket: packet not free\n");
 629
 630   dpf(("Alloc %x, class %d\n", p, class));
 631
 632   queue_Remove(p);
 633   p->header.flags = 0;
 634
 635   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 636    * order to truncate outbound packets.  In the near future, may need
 637    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 638    */
 639   p->wirevec[0].iov_base = (char *) (p->wirehead);
 640   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 641   p->wirevec[1].iov_base = (char *) (p->localdata);
 642   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 643   p->niovecs = 2;
 644   p->length = RX_FIRSTBUFFERSIZE;
 645   return p;
 646 }
 647
 648 struct rx_packet *rxi_AllocPacket(class)
 649      int class;
 650 {
 651     register struct rx_packet *p;
 652
 653     MUTEX_ENTER(&rx_freePktQ_lock);
 654     p = rxi_AllocPacketNoLock(class);
 655     MUTEX_EXIT(&rx_freePktQ_lock);
 656     return p;
 657 }
 658
 659 /* This guy comes up with as many buffers as it {takes,can get} given
 660  * the MTU for this call. It also sets the packet length before
 661  * returning.  caution: this is often called at NETPRI
 662  * Called with call locked.
 663  */
 664 struct rx_packet *rxi_AllocSendPacket(call, want)
 665 register struct rx_call *call;
 666 int want;
 667 {
 668     register struct rx_packet *p = (struct rx_packet *) 0;
 669     register int mud;
 670     register unsigned delta;
 671
 672     SPLVAR;
 673     mud = call->MTU - RX_HEADER_SIZE;
 674     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 675         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 676
 677     while (!(call->error)) {
 678       MUTEX_ENTER(&rx_freePktQ_lock);
 679       /* if an error occurred, or we get the packet we want, we're done */
 680       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 681         MUTEX_EXIT(&rx_freePktQ_lock);
 682
 683         want += delta;
 684         want = MIN(want, mud);
 685
 686         if ((unsigned) want > p->length)
 687           (void) rxi_AllocDataBuf(p, (want - p->length),
 688                                   RX_PACKET_CLASS_SEND_CBUF);
 689
 690         if ((unsigned) p->length > mud)
 691             p->length = mud;
 692
 693         if (delta >= p->length) {
 694           rxi_FreePacket(p);
 695           p = NULL;
 696         } else {
 697             p->length -= delta;
 698         }
 699         break;
 700       }
 701
 702       /* no error occurred, and we didn't get a packet, so we sleep.
 703        * At this point, we assume that packets will be returned
 704        * sooner or later, as packets are acknowledged, and so we
 705        * just wait.  */
 706       NETPRI;
 707       call->flags |= RX_CALL_WAIT_PACKETS;
 708       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 709       MUTEX_EXIT(&call->lock);
 710       rx_waitingForPackets = 1;
 711
 712 #ifdef  RX_ENABLE_LOCKS
 713       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 714 #else
 715       osi_rxSleep(&rx_waitingForPackets);
 716 #endif
 717       MUTEX_EXIT(&rx_freePktQ_lock);
 718       MUTEX_ENTER(&call->lock);
 719       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 720       call->flags &= ~RX_CALL_WAIT_PACKETS;
 721       USERPRI;
 722     }
 723
 724     return p;
 725 }
 726
 727 #ifndef KERNEL
 728
 729 /* count the number of used FDs */
 730 static int CountFDs(amax)
 731 register int amax; {
 732     struct stat tstat;
 733     register int i, code;
 734     register int count;
 735
 736     count = 0;
 737     for(i=0;i<amax;i++) {
 738         code = fstat(i, &tstat);
 739         if (code == 0) count++;
 740     }
 741     return count;
 742 }
 743
 744 #else /* KERNEL */
 745
 746 #define CountFDs(amax) amax
 747
 748 #endif /* KERNEL */
 749
 750 #if !defined(KERNEL) || defined(UKERNEL)
 751
 752 /* This function reads a single packet from the interface into the
 753  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 754  * (host,port) of the sender are stored in the supplied variables, and
 755  * the data length of the packet is stored in the packet structure.
 756  * The header is decoded. */
 757 int rxi_ReadPacket(socket, p, host, port)
 758      int socket;
 759      register struct rx_packet *p;
 760      afs_uint32 *host;
 761      u_short *port;
 762 {
 763     struct sockaddr_in from;
 764     int nbytes;
 765     afs_int32 rlen;
 766     register afs_int32 tlen, savelen;
 767     struct msghdr msg;
 768     rx_computelen(p, tlen);
 769     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 770
 771     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 772     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 773                                  * it once in order to avoid races.  */
 774     tlen = rlen - tlen;
 775     if (tlen > 0) {
 776       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 777       if (tlen >0) {
 778         tlen = rlen - tlen;
 779       }
 780       else tlen = rlen;
 781     }
 782     else tlen = rlen;
 783
 784    /* Extend the last iovec for padding, it's just to make sure that the
 785     * read doesn't return more data than we expect, and is done to get around
 786     * our problems caused by the lack of a length field in the rx header.
 787     * Use the extra buffer that follows the localdata in each packet
 788     * structure. */
 789     savelen = p->wirevec[p->niovecs].iov_len;
 790     p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
 791
 792     bzero((char *)&msg, sizeof(msg));
 793     msg.msg_name = (char *) &from;
 794     msg.msg_namelen = sizeof(struct sockaddr_in);
 795     msg.msg_iov = p->wirevec;
 796     msg.msg_iovlen = p->niovecs;
 797     nbytes = rxi_Recvmsg(socket, &msg, 0);
 798
 799    /* restore the vec to its correct state */
 800     p->wirevec[p->niovecs].iov_len = savelen;
 801
 802     p->length = (nbytes - RX_HEADER_SIZE);
 803     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 804       if (nbytes > 0)
 805         rxi_MorePackets(rx_initSendWindow);
 806 #ifndef AFS_NT40_ENV
 807       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 808         MUTEX_ENTER(&rx_stats_mutex);
 809         rx_stats.noPacketOnRead++;
 810         MUTEX_EXIT(&rx_stats_mutex);
 811       }
 812 #endif
 813       else {
 814         MUTEX_ENTER(&rx_stats_mutex);
 815         rx_stats.bogusPacketOnRead++;
 816         rx_stats.bogusHost = from.sin_addr.s_addr;
 817         MUTEX_EXIT(&rx_stats_mutex);
 818         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 819              from.sin_port,nbytes));
 820       }
 821       return  0;
 822     }
 823     else {
 824       /* Extract packet header. */
 825       rxi_DecodePacketHeader(p);
 826
 827       *host = from.sin_addr.s_addr;
 828       *port = from.sin_port;
 829       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 830         struct rx_peer *peer;
 831         MUTEX_ENTER(&rx_stats_mutex);
 832         rx_stats.packetsRead[p->header.type-1]++;
 833         MUTEX_EXIT(&rx_stats_mutex);
 834         /*
 835          * Try to look up this peer structure.  If it doesn't exist,
 836          * don't create a new one -
 837          * we don't keep count of the bytes sent/received if a peer
 838          * structure doesn't already exist.
 839          *
 840          * The peer/connection cleanup code assumes that there is 1 peer
 841          * per connection.  If we actually created a peer structure here
 842          * and this packet was an rxdebug packet, the peer structure would
 843          * never be cleaned up.
 844          */
 845         peer = rxi_FindPeer(*host, *port, 0, 0);
 846         if (peer) {
 847             MUTEX_ENTER(&peer->peer_lock);
 848             hadd32(peer->bytesReceived, p->length);
 849             MUTEX_EXIT(&peer->peer_lock);
 850         }
 851       }
 852
 853       /* Free any empty packet buffers at the end of this packet */
 854       rxi_TrimDataBufs(p, 1);
 855
 856       return  1;
 857     }
 858 }
 859
 860 #endif /* !KERNEL || UKERNEL */
 861
 862 /* This function splits off the first packet in a jumbo packet.
 863  * As of AFS 3.5, jumbograms contain more than one fixed size
 864  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 865  * last packet header. All packets (except the last) are padded to
 866  * fall on RX_CBUFFERSIZE boundaries.
 867  * HACK: We store the length of the first n-1 packets in the
 868  * last two pad bytes. */
 869
 870 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
 871      register struct rx_packet *p;
 872      afs_int32 host;
 873      short port;
 874      int first;
 875 {
 876     struct rx_packet *np;
 877     struct rx_jumboHeader *jp;
 878     int niov, i;
 879     struct iovec *iov;
 880     int length;
 881     afs_uint32 temp;
 882
 883     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 884      * bytes in length. All but the first packet are preceded by
 885      * an abbreviated four byte header. The length of the last packet
 886      * is calculated from the size of the jumbogram. */
 887     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 888
 889     if ((int)p->length < length) {
 890         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 891         return NULL;
 892     }
 893     niov = p->niovecs - 2;
 894     if (niov < 1) {
 895         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 896         return NULL;
 897     }
 898     iov = &p->wirevec[2];
 899     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 900
 901     /* Get a pointer to the abbreviated packet header */
 902     jp = (struct rx_jumboHeader *)
 903          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 904
 905     /* Set up the iovecs for the next packet */
 906     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 907     np->wirevec[0].iov_len = sizeof(struct rx_header);
 908     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 909     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 910     np->niovecs = niov+1;
 911     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 912         np->wirevec[i] = *iov;
 913     }
 914     np->length = p->length - length;
 915     p->length = RX_JUMBOBUFFERSIZE;
 916     p->niovecs = 2;
 917
 918     /* Convert the jumbo packet header to host byte order */
 919     temp = ntohl(*(afs_uint32 *)jp);
 920     jp->flags = (u_char)(temp >> 24);
 921     jp->cksum = (u_short)(temp);
 922
 923     /* Fill in the packet header */
 924     np->header = p->header;
 925     np->header.serial = p->header.serial + 1;
 926     np->header.seq = p->header.seq + 1;
 927     np->header.flags = jp->flags;
 928     np->header.spare = jp->cksum;
 929
 930     return np;
 931 }
 932
 933 #ifndef KERNEL
 934 /* Send a udp datagram */
 935 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
 936     osi_socket socket;
 937     char * addr;
 938     struct iovec *dvec;
 939     int nvecs;
 940     int length;
 941     int istack;
 942 {
 943     struct msghdr msg;
 944
 945     memset(&msg, 0, sizeof(msg));
 946     msg.msg_iov = dvec;
 947     msg.msg_iovlen = nvecs;
 948     msg.msg_name = addr;
 949     msg.msg_namelen = sizeof(struct sockaddr_in);
 950
 951     rxi_Sendmsg(socket, &msg, 0);
 952
 953     return 0;
 954 }
 955 #elif !defined(UKERNEL)
 956 /* osi_NetSend is defined in afs/afs_osinet.c
 957  * message receipt is done in rxk_input or rx_put.
 958  */
 959
 960 #ifdef AFS_SUN5_ENV
 961 /*
 962  * Copy an mblock to the contiguous area pointed to by cp.
 963  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 964  * but it doesn't really.
 965  * Returns the number of bytes not transferred.
 966  * The message is NOT changed.
 967  */
 968 static int cpytoc(mp, off, len, cp)
 969     mblk_t *mp;
 970     register int off, len;
 971     register char * cp;
 972 {
 973     register int n;
 974
 975     for (;mp && len > 0; mp = mp->b_cont) {
 976         if (mp->b_datap->db_type != M_DATA) {
 977             return -1;
 978         }
 979         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 980         bcopy((char *)mp->b_rptr, cp, n);
 981         cp += n;
 982         len -= n;
 983         mp->b_rptr += n;
 984     }
 985     return (len);
 986 }
 987
 988 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 989  * but it doesn't really.
 990  * This sucks, anyway, do it like m_cpy.... below
 991  */
 992 static int cpytoiovec(mp, off, len, iovs, niovs)
 993     mblk_t *mp;
 994     int off, len, niovs;
 995     register struct iovec *iovs;
 996 {
 997     register int m,n,o,t,i;
 998
 999     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1000         if (mp->b_datap->db_type != M_DATA) {
1001             return -1;
1002         }
1003         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1004         len -= n;
1005         while (n) {
1006           if (!t) {
1007             o=0;
1008             i++;
1009             t = iovs[i].iov_len;
1010           }
1011           m = MIN(n,t);
1012           bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1013           mp->b_rptr += m;
1014           o += m;
1015           t -= m;
1016           n -= m;
1017         }
1018     }
1019     return (len);
1020 }
1021 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1022 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1023 #else
1024 #if !defined(AFS_LINUX20_ENV)
1025 static int m_cpytoiovec(m, off, len, iovs, niovs)
1026      struct mbuf *m;
1027      int off, len, niovs;
1028      struct iovec iovs[];
1029 {
1030   caddr_t p1, p2;
1031   unsigned int l1, l2, i, t;
1032
1033   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1034     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1035
1036   while (off && m)
1037     if (m->m_len <= off) {
1038       off -= m->m_len;
1039       m = m->m_next;
1040       continue;
1041     } else
1042       break;
1043
1044   if (m == NULL)
1045     return len;
1046
1047   p1 = mtod(m, caddr_t)+off;
1048   l1 = m->m_len - off;
1049   i = 0;
1050   p2 = iovs[0].iov_base;
1051   l2 = iovs[0].iov_len;
1052
1053   while (len) {
1054     t = MIN(l1, MIN(l2, (unsigned int)len));
1055     bcopy (p1, p2, t);
1056     p1 += t;    p2 += t;
1057     l1 -= t;    l2 -= t;
1058     len -= t;
1059     if (!l1) {
1060       m = m->m_next;
1061       if (!m)
1062         break;
1063       p1 = mtod(m, caddr_t);
1064       l1 = m->m_len;
1065     }
1066     if (!l2) {
1067       if (++i >= niovs)
1068         break;
1069       p2 = iovs[i].iov_base;
1070       l2 = iovs[i].iov_len;
1071     }
1072
1073   }
1074
1075 return len;
1076 }
1077 #endif /* LINUX */
1078 #endif /* AFS_SUN5_ENV */
1079
1080 #if !defined(AFS_LINUX20_ENV)
1081 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1082 #ifdef  AFS_SUN5_ENV
1083 mblk_t *amb;
1084 #else
1085 struct mbuf *amb;
1086 #endif
1087 void (*free)();
1088 struct rx_packet *phandle;
1089 int hdr_len, data_len;
1090 {
1091   register int code;
1092
1093   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1094   (*free)(amb);
1095
1096   return code;
1097 }
1098 #endif /* LINUX */
1099 #endif /*KERNEL && !UKERNEL*/
1100
1101
1102 /* send a response to a debug packet */
1103
1104 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1105   osi_socket asocket;
1106   afs_int32 ahost;
1107   short aport;
1108   register struct rx_packet *ap;
1109   int istack;
1110 {
1111     struct rx_debugIn tin;
1112     afs_int32 tl;
1113     struct rx_serverQueueEntry *np, *nqe;
1114
1115     /*
1116      * Only respond to client-initiated Rx debug packets,
1117      * and clear the client flag in the response.
1118      */
1119     if (ap->header.flags & RX_CLIENT_INITIATED) {
1120         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1121         rxi_EncodePacketHeader(ap);
1122     } else {
1123         return ap;
1124     }
1125
1126     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1127     /* all done with packet, now set length to the truth, so we can
1128      * reuse this packet */
1129     rx_computelen(ap, ap->length);
1130
1131     tin.type = ntohl(tin.type);
1132     tin.index = ntohl(tin.index);
1133     switch(tin.type) {
1134         case RX_DEBUGI_GETSTATS: {
1135             struct rx_debugStats tstat;
1136
1137             /* get basic stats */
1138             bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1139             tstat.version = RX_DEBUGI_VERSION;
1140 #ifndef RX_ENABLE_LOCKS
1141             tstat.waitingForPackets = rx_waitingForPackets;
1142 #endif
1143             tstat.nFreePackets = htonl(rx_nFreePackets);
1144             tstat.callsExecuted = htonl(rxi_nCalls);
1145             tstat.packetReclaims = htonl(rx_packetReclaims);
1146             tstat.usedFDs = CountFDs(64);
1147             tstat.nWaiting = htonl(rx_nWaiting);
1148             queue_Count( &rx_idleServerQueue, np, nqe,
1149                                 rx_serverQueueEntry, tstat.idleThreads);
1150             tstat.idleThreads = htonl(tstat.idleThreads);
1151             tl = sizeof(struct rx_debugStats) - ap->length;
1152             if (tl > 0)
1153               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1154
1155             if (tl <= 0) {
1156               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1157               ap->length = sizeof(struct rx_debugStats);
1158               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1159               rx_computelen(ap, ap->length);
1160             }
1161             break;
1162         }
1163
1164         case RX_DEBUGI_GETALLCONN:
1165         case RX_DEBUGI_GETCONN: {
1166             int i, j;
1167             register struct rx_connection *tc;
1168             struct rx_call *tcall;
1169             struct rx_debugConn tconn;
1170             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1171
1172
1173             tl = sizeof(struct rx_debugConn) - ap->length;
1174             if (tl > 0)
1175               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1176             if (tl > 0)
1177               return ap;
1178
1179             bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1180             /* get N'th (maybe) "interesting" connection info */
1181             for(i=0;i<rx_hashTableSize;i++) {
1182 #if !defined(KERNEL)
1183                 /* the time complexity of the algorithm used here
1184                  * exponentially increses with the number of connections.
1185                  */
1186 #ifdef AFS_PTHREAD_ENV
1187                 pthread_yield();
1188 #else
1189                 (void) IOMGR_Poll();
1190 #endif
1191 #endif
1192                 MUTEX_ENTER(&rx_connHashTable_lock);
1193                 /* We might be slightly out of step since we are not
1194                  * locking each call, but this is only debugging output.
1195                  */
1196                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1197                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1198                         tconn.host = tc->peer->host;
1199                         tconn.port = tc->peer->port;
1200                         tconn.cid = htonl(tc->cid);
1201                         tconn.epoch = htonl(tc->epoch);
1202                         tconn.serial = htonl(tc->serial);
1203                         for(j=0;j<RX_MAXCALLS;j++) {
1204                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1205                             if ((tcall=tc->call[j])) {
1206                                 tconn.callState[j] = tcall->state;
1207                                 tconn.callMode[j] = tcall->mode;
1208                                 tconn.callFlags[j] = tcall->flags;
1209                                 if (queue_IsNotEmpty(&tcall->rq))
1210                                     tconn.callOther[j] |= RX_OTHER_IN;
1211                                 if (queue_IsNotEmpty(&tcall->tq))
1212                                     tconn.callOther[j] |= RX_OTHER_OUT;
1213                             }
1214                             else tconn.callState[j] = RX_STATE_NOTINIT;
1215                         }
1216
1217                         tconn.natMTU = htonl(tc->peer->natMTU);
1218                         tconn.error = htonl(tc->error);
1219                         tconn.flags = tc->flags;
1220                         tconn.type = tc->type;
1221                         tconn.securityIndex = tc->securityIndex;
1222                         if (tc->securityObject) {
1223                             RXS_GetStats (tc->securityObject, tc,
1224                                           &tconn.secStats);
1225 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1226 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1227                             DOHTONL(flags);
1228                             DOHTONL(expires);
1229                             DOHTONL(packetsReceived);
1230                             DOHTONL(packetsSent);
1231                             DOHTONL(bytesReceived);
1232                             DOHTONL(bytesSent);
1233                             for (i=0;
1234                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1235                                  i++)
1236                                 DOHTONS(spares[i]);
1237                             for (i=0;
1238                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1239                                  i++)
1240                                 DOHTONL(sparel[i]);
1241                         }
1242
1243                         MUTEX_EXIT(&rx_connHashTable_lock);
1244                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1245                         tl = ap->length;
1246                         ap->length = sizeof(struct rx_debugConn);
1247                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1248                         ap->length = tl;
1249                         return ap;
1250                     }
1251                 }
1252                 MUTEX_EXIT(&rx_connHashTable_lock);
1253             }
1254             /* if we make it here, there are no interesting packets */
1255             tconn.cid = htonl(0xffffffff); /* means end */
1256             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1257             tl = ap->length;
1258             ap->length = sizeof(struct rx_debugConn);
1259             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1260             ap->length = tl;
1261             break;
1262         }
1263
1264         /*
1265          * Pass back all the peer structures we have available
1266          */
1267
1268         case RX_DEBUGI_GETPEER: {
1269             int i;
1270             register struct rx_peer *tp;
1271             struct rx_debugPeer tpeer;
1272
1273
1274             tl = sizeof(struct rx_debugPeer) - ap->length;
1275             if (tl > 0)
1276               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1277             if (tl > 0)
1278               return ap;
1279
1280             bzero ((char *)&tpeer, sizeof(tpeer));
1281             for(i=0;i<rx_hashTableSize;i++) {
1282 #if !defined(KERNEL)
1283                 /* the time complexity of the algorithm used here
1284                  * exponentially increses with the number of peers.
1285                  *
1286                  * Yielding after processing each hash table entry
1287                  * and dropping rx_peerHashTable_lock.
1288                  * also increases the risk that we will miss a new
1289                  * entry - but we are willing to live with this
1290                  * limitation since this is meant for debugging only
1291                  */
1292 #ifdef AFS_PTHREAD_ENV
1293                 pthread_yield();
1294 #else
1295                 (void) IOMGR_Poll();
1296 #endif
1297 #endif
1298                 MUTEX_ENTER(&rx_peerHashTable_lock);
1299                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1300                     if (tin.index-- <= 0) {
1301                         tpeer.host = tp->host;
1302                         tpeer.port = tp->port;
1303                         tpeer.ifMTU = htons(tp->ifMTU);
1304                         tpeer.idleWhen = htonl(tp->idleWhen);
1305                         tpeer.refCount = htons(tp->refCount);
1306                         tpeer.burstSize = tp->burstSize;
1307                         tpeer.burst = tp->burst;
1308                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1309                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1310                         tpeer.rtt = htonl(tp->rtt);
1311                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1312                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1313                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1314                         tpeer.nSent = htonl(tp->nSent);
1315                         tpeer.reSends = htonl(tp->reSends);
1316                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1317                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1318                         tpeer.rateFlag = htonl(tp->rateFlag);
1319                         tpeer.natMTU = htons(tp->natMTU);
1320                         tpeer.maxMTU = htons(tp->maxMTU);
1321                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1322                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1323                         tpeer.MTU = htons(tp->MTU);
1324                         tpeer.cwind = htons(tp->cwind);
1325                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1326                         tpeer.congestSeq = htons(tp->congestSeq);
1327                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1328                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1329                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1330                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1331
1332                         MUTEX_EXIT(&rx_peerHashTable_lock);
1333                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1334                         tl = ap->length;
1335                         ap->length = sizeof(struct rx_debugPeer);
1336                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1337                         ap->length = tl;
1338                         return ap;
1339                     }
1340                 }
1341                 MUTEX_EXIT(&rx_peerHashTable_lock);
1342             }
1343             /* if we make it here, there are no interesting packets */
1344             tpeer.host = htonl(0xffffffff); /* means end */
1345             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1346             tl = ap->length;
1347             ap->length = sizeof(struct rx_debugPeer);
1348             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1349             ap->length = tl;
1350             break;
1351         }
1352
1353         case RX_DEBUGI_RXSTATS: {
1354             int i;
1355             afs_int32 *s;
1356
1357             tl = sizeof(rx_stats) - ap->length;
1358             if (tl > 0)
1359               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1360             if (tl > 0)
1361               return ap;
1362
1363             /* Since its all int32s convert to network order with a loop. */
1364             MUTEX_ENTER(&rx_stats_mutex);
1365             s = (afs_int32 *)&rx_stats;
1366             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1367                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1368
1369             tl = ap->length;
1370             ap->length = sizeof(rx_stats);
1371             MUTEX_EXIT(&rx_stats_mutex);
1372             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1373             ap->length = tl;
1374             break;
1375         }
1376
1377         default:
1378             /* error response packet */
1379             tin.type = htonl(RX_DEBUGI_BADTYPE);
1380             tin.index = tin.type;
1381             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1382             tl = ap->length;
1383             ap->length = sizeof(struct rx_debugIn);
1384             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1385             ap->length = tl;
1386             break;
1387     }
1388     return ap;
1389 }
1390
1391 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1392   osi_socket asocket;
1393   afs_int32 ahost;
1394   short aport;
1395   register struct rx_packet *ap;
1396   int istack;
1397 {
1398     afs_int32 tl;
1399
1400     /*
1401      * Only respond to client-initiated version requests, and
1402      * clear that flag in the response.
1403      */
1404     if (ap->header.flags & RX_CLIENT_INITIATED) {
1405         char buf[66];
1406
1407         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1408         rxi_EncodePacketHeader(ap);
1409         bzero(buf, sizeof(buf));
1410         snprintf(buf, sizeof(buf), "%s", cml_version_number+4);
1411         rx_packetwrite(ap, 0, 65, buf);
1412         tl = ap->length;
1413         ap->length = 65;
1414         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1415         ap->length = tl;
1416     }
1417
1418     return ap;
1419 }
1420
1421
1422 /* send a debug packet back to the sender */
1423 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1424                                afs_int32 ahost, short aport, afs_int32 istack)
1425 {
1426     struct sockaddr_in taddr;
1427     int i;
1428     int nbytes;
1429     int saven = 0;
1430     size_t savelen = 0;
1431 #ifdef KERNEL
1432     int waslocked = ISAFS_GLOCK();
1433 #endif
1434
1435     taddr.sin_family = AF_INET;
1436     taddr.sin_port = aport;
1437     taddr.sin_addr.s_addr = ahost;
1438
1439
1440     /* We need to trim the niovecs. */
1441     nbytes = apacket->length;
1442     for (i=1; i < apacket->niovecs; i++) {
1443       if (nbytes <= apacket->wirevec[i].iov_len) {
1444         savelen = apacket->wirevec[i].iov_len;
1445         saven = apacket->niovecs;
1446         apacket->wirevec[i].iov_len = nbytes;
1447         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1448       }
1449       else nbytes -= apacket->wirevec[i].iov_len;
1450     }
1451     AFS_RXGUNLOCK();
1452 #ifdef KERNEL
1453     if (waslocked) AFS_GUNLOCK();
1454 #endif
1455     /* debug packets are not reliably delivered, hence the cast below. */
1456     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1457                        apacket->length+RX_HEADER_SIZE, istack);
1458 #ifdef KERNEL
1459     if (waslocked) AFS_GLOCK();
1460 #endif
1461     AFS_RXGLOCK();
1462     if (saven) {  /* means we truncated the packet above. */
1463       apacket->wirevec[i-1].iov_len = savelen;
1464       apacket->niovecs = saven;
1465     }
1466
1467 }
1468
1469 /* Send the packet to appropriate destination for the specified
1470  * connection.  The header is first encoded and placed in the packet.
1471  */
1472 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1473                     int istack)
1474 {
1475 #if defined(KERNEL)
1476     int waslocked;
1477 #endif
1478     struct sockaddr_in addr;
1479     register struct rx_peer *peer = conn->peer;
1480     osi_socket socket;
1481 #ifdef RXDEBUG
1482     char deliveryType = 'S';
1483 #endif
1484     /* The address we're sending the packet to */
1485     addr.sin_family = AF_INET;
1486     addr.sin_port = peer->port;
1487     addr.sin_addr.s_addr = peer->host;
1488
1489     /* This stuff should be revamped, I think, so that most, if not
1490      * all, of the header stuff is always added here.  We could
1491      * probably do away with the encode/decode routines. XXXXX */
1492
1493     /* Stamp each packet with a unique serial number.  The serial
1494      * number is maintained on a connection basis because some types
1495      * of security may be based on the serial number of the packet,
1496      * and security is handled on a per authenticated-connection
1497      * basis. */
1498     /* Pre-increment, to guarantee no zero serial number; a zero
1499      * serial number means the packet was never sent. */
1500     MUTEX_ENTER(&conn->conn_data_lock);
1501     p->header.serial = ++conn->serial;
1502     MUTEX_EXIT(&conn->conn_data_lock);
1503     /* This is so we can adjust retransmit time-outs better in the face of
1504      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1505      */
1506     if (p->firstSerial == 0) {
1507        p->firstSerial = p->header.serial;
1508      }
1509
1510 #ifdef RXDEBUG
1511     /* If an output tracer function is defined, call it with the packet and
1512      * network address.  Note this function may modify its arguments. */
1513     if (rx_almostSent) {
1514         int drop = (*rx_almostSent) (p, &addr);
1515         /* drop packet if return value is non-zero? */
1516         if (drop) deliveryType = 'D';   /* Drop the packet */
1517     }
1518 #endif
1519
1520     /* Get network byte order header */
1521     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1522                                  * touch ALL the fields */
1523
1524     /* Send the packet out on the same socket that related packets are being
1525      * received on */
1526     socket = (conn->type == RX_CLIENT_CONNECTION
1527               ? rx_socket : conn->service->socket);
1528
1529 #ifdef RXDEBUG
1530     /* Possibly drop this packet,  for testing purposes */
1531     if ((deliveryType == 'D') ||
1532         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1533          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1534         deliveryType = 'D';             /* Drop the packet */
1535     }
1536     else {
1537         deliveryType = 'S';             /* Send the packet */
1538 #endif /* RXDEBUG */
1539
1540         /* Loop until the packet is sent.  We'd prefer just to use a
1541          * blocking socket, but unfortunately the interface doesn't
1542          * allow us to have the socket block in send mode, and not
1543          * block in receive mode */
1544         AFS_RXGUNLOCK();
1545 #ifdef KERNEL
1546         waslocked = ISAFS_GLOCK();
1547         if (waslocked) AFS_GUNLOCK();
1548 #endif
1549         if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1550                         p->length+RX_HEADER_SIZE, istack)){
1551           /* send failed, so let's hurry up the resend, eh? */
1552           MUTEX_ENTER(&rx_stats_mutex);
1553           rx_stats.netSendFailures++;
1554           MUTEX_EXIT(&rx_stats_mutex);
1555           p->retryTime = p->timeSent;  /* resend it very soon */
1556           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1557         }
1558 #ifdef KERNEL
1559         if (waslocked) AFS_GLOCK();
1560 #endif
1561         AFS_RXGLOCK();
1562 #ifdef RXDEBUG
1563     }
1564     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1565          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1566          peer->host, peer->port, p->header.serial, p->header.epoch,
1567          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1568          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1569 #endif
1570     MUTEX_ENTER(&rx_stats_mutex);
1571     rx_stats.packetsSent[p->header.type-1]++;
1572     MUTEX_EXIT(&rx_stats_mutex);
1573     MUTEX_ENTER(&peer->peer_lock);
1574     hadd32(peer->bytesSent, p->length);
1575     MUTEX_EXIT(&peer->peer_lock);
1576 }
1577
1578 /* Send a list of packets to appropriate destination for the specified
1579  * connection.  The headers are first encoded and placed in the packets.
1580  */
1581 void rxi_SendPacketList(struct rx_connection * conn,
1582                         struct rx_packet **list,
1583                         int len,
1584                         int istack)
1585 {
1586 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1587     int waslocked;
1588 #endif
1589     struct sockaddr_in addr;
1590     register struct rx_peer *peer = conn->peer;
1591     osi_socket socket;
1592     struct rx_packet *p = NULL;
1593     struct iovec wirevec[RX_MAXIOVECS];
1594     int i, length;
1595     afs_uint32 serial;
1596     afs_uint32 temp;
1597     struct rx_jumboHeader *jp;
1598 #ifdef RXDEBUG
1599     char deliveryType = 'S';
1600 #endif
1601     /* The address we're sending the packet to */
1602     addr.sin_family = AF_INET;
1603     addr.sin_port = peer->port;
1604     addr.sin_addr.s_addr = peer->host;
1605
1606     if (len+1 > RX_MAXIOVECS) {
1607         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1608     }
1609
1610     /*
1611      * Stamp the packets in this jumbogram with consecutive serial numbers
1612      */
1613     MUTEX_ENTER(&conn->conn_data_lock);
1614     serial = conn->serial;
1615     conn->serial += len;
1616     MUTEX_EXIT(&conn->conn_data_lock);
1617
1618
1619     /* This stuff should be revamped, I think, so that most, if not
1620      * all, of the header stuff is always added here.  We could
1621      * probably do away with the encode/decode routines. XXXXX */
1622
1623     jp = NULL;
1624     length = RX_HEADER_SIZE;
1625     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1626     wirevec[0].iov_len = RX_HEADER_SIZE;
1627     for (i = 0 ; i < len ; i++) {
1628         p = list[i];
1629
1630         /* The whole 3.5 jumbogram scheme relies on packets fitting
1631          * in a single packet buffer. */
1632         if (p->niovecs > 2) {
1633             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1634         }
1635
1636         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1637          * in this chunk.  */
1638         if (i < len-1) {
1639             if (p->length != RX_JUMBOBUFFERSIZE) {
1640                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1641             }
1642             p->header.flags |= RX_JUMBO_PACKET;
1643             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1644             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1645         } else {
1646             wirevec[i+1].iov_len = p->length;
1647             length += p->length;
1648         }
1649         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1650         if (jp != NULL) {
1651             /* Convert jumbo packet header to network byte order */
1652             temp = (afs_uint32)(p->header.flags) << 24;
1653             temp |= (afs_uint32)(p->header.spare);
1654             *(afs_uint32 *)jp = htonl(temp);
1655         }
1656         jp = (struct rx_jumboHeader *)
1657              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1658
1659         /* Stamp each packet with a unique serial number.  The serial
1660          * number is maintained on a connection basis because some types
1661          * of security may be based on the serial number of the packet,
1662          * and security is handled on a per authenticated-connection
1663          * basis. */
1664         /* Pre-increment, to guarantee no zero serial number; a zero
1665          * serial number means the packet was never sent. */
1666         p->header.serial = ++serial;
1667         /* This is so we can adjust retransmit time-outs better in the face of
1668          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1669          */
1670         if (p->firstSerial == 0) {
1671            p->firstSerial = p->header.serial;
1672         }
1673
1674 #ifdef RXDEBUG
1675         /* If an output tracer function is defined, call it with the packet and
1676          * network address.  Note this function may modify its arguments. */
1677         if (rx_almostSent) {
1678             int drop = (*rx_almostSent) (p, &addr);
1679             /* drop packet if return value is non-zero? */
1680             if (drop) deliveryType = 'D';       /* Drop the packet */
1681         }
1682 #endif
1683
1684         /* Get network byte order header */
1685         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1686                                      * touch ALL the fields */
1687     }
1688
1689     /* Send the packet out on the same socket that related packets are being
1690      * received on */
1691     socket = (conn->type == RX_CLIENT_CONNECTION
1692               ? rx_socket : conn->service->socket);
1693
1694 #ifdef RXDEBUG
1695     /* Possibly drop this packet,  for testing purposes */
1696     if ((deliveryType == 'D') ||
1697         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1698          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1699         deliveryType = 'D';             /* Drop the packet */
1700     }
1701     else {
1702         deliveryType = 'S';             /* Send the packet */
1703 #endif /* RXDEBUG */
1704
1705         /* Loop until the packet is sent.  We'd prefer just to use a
1706          * blocking socket, but unfortunately the interface doesn't
1707          * allow us to have the socket block in send mode, and not
1708          * block in receive mode */
1709         AFS_RXGUNLOCK();
1710 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1711         waslocked = ISAFS_GLOCK();
1712         if (!istack && waslocked) AFS_GUNLOCK();
1713 #endif
1714         if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1715           /* send failed, so let's hurry up the resend, eh? */
1716           MUTEX_ENTER(&rx_stats_mutex);
1717           rx_stats.netSendFailures++;
1718           MUTEX_EXIT(&rx_stats_mutex);
1719           for (i = 0 ; i < len ; i++) {
1720             p = list[i];
1721             p->retryTime = p->timeSent;  /* resend it very soon */
1722             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1723           }
1724         }
1725 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1726         if (!istack && waslocked) AFS_GLOCK();
1727 #endif
1728         AFS_RXGLOCK();
1729 #ifdef RXDEBUG
1730     }
1731     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1732          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1733          peer->host, peer->port, p->header.serial, p->header.epoch,
1734          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1735          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1736 #endif
1737     MUTEX_ENTER(&rx_stats_mutex);
1738     rx_stats.packetsSent[p->header.type-1]++;
1739     MUTEX_EXIT(&rx_stats_mutex);
1740     MUTEX_ENTER(&peer->peer_lock);
1741     hadd32(peer->bytesSent, p->length);
1742     MUTEX_EXIT(&peer->peer_lock);
1743 }
1744
1745
1746 /* Send a "special" packet to the peer connection.  If call is
1747  * specified, then the packet is directed to a specific call channel
1748  * associated with the connection, otherwise it is directed to the
1749  * connection only. Uses optionalPacket if it is supplied, rather than
1750  * allocating a new packet buffer.  Nbytes is the length of the data
1751  * portion of the packet.  If data is non-null, nbytes of data are
1752  * copied into the packet.  Type is the type of the packet, as defined
1753  * in rx.h.  Bug: there's a lot of duplication between this and other
1754  * routines.  This needs to be cleaned up. */
1755 struct rx_packet *
1756 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1757     register struct rx_call *call;
1758     register struct rx_connection *conn;
1759     struct rx_packet *optionalPacket;
1760     int type;
1761     char *data;
1762     int nbytes, istack;
1763 {
1764     /* Some of the following stuff should be common code for all
1765      * packet sends (it's repeated elsewhere) */
1766     register struct rx_packet *p;
1767     unsigned int i = 0;
1768     int savelen = 0, saven = 0;
1769     int channel, callNumber;
1770     if (call) {
1771         channel = call->channel;
1772         callNumber = *call->callNumber;
1773         /* BUSY packets refer to the next call on this connection */
1774         if (type == RX_PACKET_TYPE_BUSY) {
1775             callNumber++;
1776         }
1777     } else {
1778         channel = 0;
1779         callNumber = 0;
1780     }
1781     p = optionalPacket;
1782     if (!p) {
1783         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1784         if (!p) osi_Panic("rxi_SendSpecial failure");
1785     }
1786
1787     if (nbytes != -1)
1788       p->length = nbytes;
1789     else
1790       nbytes = p->length;
1791     p->header.serviceId = conn->serviceId;
1792     p->header.securityIndex = conn->securityIndex;
1793     p->header.cid = (conn->cid | channel);
1794     p->header.callNumber = callNumber;
1795     p->header.seq = 0;
1796     p->header.epoch = conn->epoch;
1797     p->header.type = type;
1798     p->header.flags = 0;
1799     if (conn->type == RX_CLIENT_CONNECTION)
1800        p->header.flags |= RX_CLIENT_INITIATED;
1801     if (data)
1802       rx_packetwrite(p, 0, nbytes, data);
1803
1804     for (i=1; i < p->niovecs; i++) {
1805       if (nbytes <= p->wirevec[i].iov_len) {
1806         savelen = p->wirevec[i].iov_len;
1807         saven = p->niovecs;
1808         p->wirevec[i].iov_len = nbytes;
1809         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1810       }
1811       else nbytes -= p->wirevec[i].iov_len;
1812     }
1813
1814     if (call) rxi_Send(call, p, istack);
1815     else rxi_SendPacket(conn, p, istack);
1816     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1817       /* really need to do this, but it seems safer this way, given that  */
1818       /* sneaky optionalPacket... */
1819       p->wirevec[i-1].iov_len = savelen;
1820       p->niovecs = saven;
1821     }
1822     if (!optionalPacket) rxi_FreePacket(p);
1823     return optionalPacket;
1824 }
1825
1826
1827 /* Encode the packet's header (from the struct header in the packet to
1828  * the net byte order representation in the wire representation of the
1829  * packet, which is what is actually sent out on the wire) */
1830 void rxi_EncodePacketHeader(p)
1831 register struct rx_packet *p;
1832 {
1833     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1834
1835     bzero((char *)buf, RX_HEADER_SIZE);
1836     *buf++ = htonl(p->header.epoch);
1837     *buf++ = htonl(p->header.cid);
1838     *buf++ = htonl(p->header.callNumber);
1839     *buf++ = htonl(p->header.seq);
1840     *buf++ = htonl(p->header.serial);
1841     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1842                    | (((afs_uint32)p->header.flags)<<16)
1843                    | (p->header.userStatus<<8) | p->header.securityIndex);
1844     /* Note: top 16 bits of this next word were reserved */
1845     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1846 }
1847
1848 /* Decode the packet's header (from net byte order to a struct header) */
1849 void rxi_DecodePacketHeader(p)
1850 register struct rx_packet *p;
1851 {
1852     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1853     afs_uint32 temp;
1854
1855     p->header.epoch = ntohl(*buf++);
1856     p->header.cid = ntohl(*buf++);
1857     p->header.callNumber = ntohl(*buf++);
1858     p->header.seq = ntohl(*buf++);
1859     p->header.serial = ntohl(*buf++);
1860     temp = ntohl(*buf++);
1861     /* C will truncate byte fields to bytes for me */
1862     p->header.type = temp>>24;
1863     p->header.flags = temp>>16;
1864     p->header.userStatus = temp>>8;
1865     p->header.securityIndex = temp>>0;
1866     temp = ntohl(*buf++);
1867     p->header.serviceId = (temp&0xffff);
1868     p->header.spare = temp>>16;
1869     /* Note: top 16 bits of this last word are the security checksum */
1870 }
1871
1872 void rxi_PrepareSendPacket(call, p, last)
1873     register struct rx_call *call;
1874     register struct rx_packet *p;
1875     register int last;
1876 {
1877     register struct rx_connection *conn = call->conn;
1878     int i, j;
1879     ssize_t len;        /* len must be a signed type; it can go negative */
1880
1881     p->acked = 0;
1882     p->header.cid = (conn->cid | call->channel);
1883     p->header.serviceId = conn->serviceId;
1884     p->header.securityIndex = conn->securityIndex;
1885     p->header.callNumber = *call->callNumber;
1886     p->header.seq = call->tnext++;
1887     p->header.epoch = conn->epoch;
1888     p->header.type = RX_PACKET_TYPE_DATA;
1889     p->header.flags = 0;
1890     p->header.spare = 0;
1891     if (conn->type == RX_CLIENT_CONNECTION)
1892       p->header.flags |= RX_CLIENT_INITIATED;
1893
1894     if (last)
1895       p->header.flags |= RX_LAST_PACKET;
1896
1897     clock_Zero(&p->retryTime); /* Never yet transmitted */
1898     clock_Zero(&p->firstSent); /* Never yet transmitted */
1899     p->header.serial = 0;      /* Another way of saying never transmitted... */
1900     p->backoff = 0;
1901
1902     /* Now that we're sure this is the last data on the call, make sure
1903      * that the "length" and the sum of the iov_lens matches. */
1904     len = p->length + call->conn->securityHeaderSize;
1905
1906     for (i=1; i < p->niovecs && len > 0; i++) {
1907       len -=  p->wirevec[i].iov_len;
1908     }
1909     if (len > 0) {
1910       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1911     }
1912     else {
1913       /* Free any extra elements in the wirevec */
1914       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1915         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1916       }
1917       p->niovecs = i;
1918       p->wirevec[i-1].iov_len += len;
1919     }
1920     RXS_PreparePacket(conn->securityObject, call, p);
1921 }
1922
1923 /* Given an interface MTU size, calculate an adjusted MTU size that
1924  * will make efficient use of the RX buffers when the peer is sending
1925  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1926 int rxi_AdjustIfMTU(int mtu)
1927 {
1928     int adjMTU;
1929     int frags;
1930
1931     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1932     if (mtu <= adjMTU) {
1933         return mtu;
1934     }
1935     mtu -= adjMTU;
1936     if (mtu <= 0) {
1937         return adjMTU;
1938     }
1939     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1940     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1941 }
1942
1943 /* Given an interface MTU size, and the peer's advertised max receive
1944  * size, calculate an adjisted maxMTU size that makes efficient use
1945  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1946 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1947 {
1948     int maxMTU = mtu * rxi_nSendFrags;
1949     maxMTU = MIN(maxMTU, peerMaxMTU);
1950     return rxi_AdjustIfMTU(maxMTU);
1951 }
1952
1953 /* Given a packet size, figure out how many datagram packet will fit.
1954  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1955  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1956  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1957 int rxi_AdjustDgramPackets(int frags, int mtu)
1958 {
1959     int maxMTU;
1960     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1961         return 1;
1962     }
1963     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1964     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1965     /* subtract the size of the first and last packets */
1966     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1967     if (maxMTU < 0) {
1968         return 1;
1969     }
1970     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1971 }