src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #ifdef KERNEL
  11 #include "../afs/param.h"
  12 #include <afsconfig.h>
  13 #if defined(UKERNEL)
  14 #include "../afs/sysincludes.h"
  15 #include "../afs/afsincludes.h"
  16 #include "../rx/rx_kcommon.h"
  17 #include "../rx/rx_clock.h"
  18 #include "../rx/rx_queue.h"
  19 #include "../rx/rx_packet.h"
  20 #else /* defined(UKERNEL) */
  21 #include "../h/types.h"
  22 #ifndef AFS_LINUX20_ENV
  23 #include "../h/systm.h"
  24 #endif
  25 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  26 #include "../afs/sysincludes.h"
  27 #endif
  28 #include "../h/socket.h"
  29 #include "../netinet/in.h"
  30 #include "../afs/afs_osi.h"
  31 #include "../rx/rx_kmutex.h"
  32 #include "../rx/rx_clock.h"
  33 #include "../rx/rx_queue.h"
  34 #ifdef  AFS_SUN5_ENV
  35 #include <sys/sysmacros.h>
  36 #endif
  37 #include "../rx/rx_packet.h"
  38 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  39 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  40 #include "../sys/mount.h"   /* it gets pulled in by something later anyway */
  41 #endif
  42 #include "../h/mbuf.h"
  43 #endif
  44 #endif /* defined(UKERNEL) */
  45 #include "../rx/rx_globals.h"
  46 #else /* KERNEL */
  47 #include <afs/param.h>
  48 #include <afsconfig.h>
  49 #include "sys/types.h"
  50 #include <sys/stat.h>
  51 #include <errno.h>
  52 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  53 #ifdef AFS_NT40_ENV
  54 #include <winsock2.h>
  55 #else
  56 #include <sys/socket.h>
  57 #include <netinet/in.h>
  58 #endif /* AFS_NT40_ENV */
  59 #include "rx_xmit_nt.h"
  60 #include <stdlib.h>
  61 #else
  62 #include <sys/socket.h>
  63 #include <netinet/in.h>
  64 #endif
  65 #include "rx_clock.h"
  66 #include "rx.h"
  67 #include "rx_queue.h"
  68 #ifdef  AFS_SUN5_ENV
  69 #include <sys/sysmacros.h>
  70 #endif
  71 #include "rx_packet.h"
  72 #include "rx_globals.h"
  73 #include <lwp.h>
  74 #include "rx_internal.h"
  75 #ifdef HAVE_STRING_H
  76 #include <string.h>
  77 #endif
  78 #ifdef HAVE_STRINGS_H
  79 #include <strings.h>
  80 #endif
  81 #ifdef HAVE_UNISTD_H
  82 #include <unistd.h>
  83 #endif
  84 #endif /* KERNEL */
  85
  86 #ifdef RX_LOCKS_DB
  87 /* rxdb_fileID is used to identify the lock location, along with line#. */
  88 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  89 #endif /* RX_LOCKS_DB */
  90 struct rx_packet *rx_mallocedP = 0;
  91
  92 extern char cml_version_number[];
  93 extern int (*rx_almostSent)();
  94
  95 void rxi_FreePacketNoLock(struct rx_packet *p);
  96 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
  97                                afs_int32 ahost, short aport, afs_int32 istack);
  98
  99 extern char cml_version_number[];
 100 extern int (*rx_almostSent)();
 101 /* some rules about packets:
 102  * 1.  When a packet is allocated, the final iov_buf contains room for
 103  * a security trailer, but iov_len masks that fact.  If the security
 104  * package wants to add the trailer, it may do so, and then extend
 105  * iov_len appropriately.  For this reason, packet's niovecs and
 106  * iov_len fields should be accurate before calling PreparePacket.
 107 */
 108
 109 /* Preconditions:
 110  *        all packet buffers (iov_base) are integral multiples of
 111  *        the word size.
 112  *        offset is an integral multiple of the word size.
 113  */
 114 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 115 {
 116   unsigned int i;
 117   size_t l;
 118   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 119     if (l + packet->wirevec[i].iov_len > offset) {
 120       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 121     }
 122     l += packet->wirevec[i].iov_len;
 123   }
 124
 125   return 0;
 126 }
 127
 128 /* Preconditions:
 129  *        all packet buffers (iov_base) are integral multiples of the word size.
 130  *        offset is an integral multiple of the word size.
 131  */
 132 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 133 {
 134   unsigned int i;
 135   size_t l;
 136   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 137     if (l + packet->wirevec[i].iov_len > offset) {
 138       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 139           data;
 140       return 0;
 141     }
 142     l += packet->wirevec[i].iov_len;
 143   }
 144
 145   return 0;
 146 }
 147
 148 /* Preconditions:
 149  *        all packet buffers (iov_base) are integral multiples of the
 150  *        word size.
 151  *        offset is an integral multiple of the word size.
 152  * Packet Invariants:
 153  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 154  */
 155 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 156                         int resid, char *out)
 157 {
 158   unsigned int i, j, l, r;
 159   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 160     if (l + packet->wirevec[i].iov_len > offset) {
 161       break;
 162     }
 163     l += packet->wirevec[i].iov_len;
 164   }
 165
 166   /* i is the iovec which contains the first little bit of data in which we
 167    * are interested.  l is the total length of everything prior to this iovec.
 168    * j is the number of bytes we can safely copy out of this iovec.
 169    */
 170   r = resid;
 171   while ((resid > 0) && (i < packet->niovecs)) {
 172     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 173     bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
 174     resid -= j;
 175     l += packet->wirevec[i].iov_len;
 176     i++;
 177   }
 178
 179   return (resid ? (r - resid) : r);
 180 }
 181
 182
 183 /* Preconditions:
 184  *        all packet buffers (iov_base) are integral multiples of the
 185  *        word size.
 186  *        offset is an integral multiple of the word size.
 187  */
 188 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 189                          char *in)
 190 {
 191   int i, j, l, r;
 192   char * b;
 193
 194   for (l=0, i=1; i < packet->niovecs; i++ ) {
 195     if (l + packet->wirevec[i].iov_len > offset) {
 196       break;
 197     }
 198     l += packet->wirevec[i].iov_len;
 199   }
 200
 201   /* i is the iovec which contains the first little bit of data in which we
 202    * are interested.  l is the total length of everything prior to this iovec.
 203    * j is the number of bytes we can safely copy out of this iovec.
 204    */
 205   r = resid;
 206   while ((resid > 0) && (i < RX_MAXWVECS)) {
 207     if (i >= packet->niovecs)
 208       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 209         break;
 210
 211     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 212     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 213     bcopy (in, b, j);
 214     resid -= j;
 215     l += packet->wirevec[i].iov_len;
 216     i++;
 217   }
 218
 219   return (resid ? (r - resid) : r);
 220 }
 221
 222 static struct rx_packet * allocCBuf(int class)
 223 {
 224   struct rx_packet *c;
 225 #ifndef KERNEL
 226   extern void rxi_MorePacketsNoLock();
 227 #endif /* !KERNEL */
 228   SPLVAR;
 229
 230   NETPRI;
 231   MUTEX_ENTER(&rx_freePktQ_lock);
 232
 233 #ifdef KERNEL
 234   if (rxi_OverQuota(class)) {
 235     c = NULL;
 236     rxi_NeedMorePackets = TRUE;
 237     MUTEX_ENTER(&rx_stats_mutex);
 238     switch(class) {
 239         case RX_PACKET_CLASS_RECEIVE:
 240             rx_stats.receivePktAllocFailures++;
 241             break;
 242         case RX_PACKET_CLASS_SEND:
 243             rx_stats.sendPktAllocFailures++;
 244             break;
 245         case RX_PACKET_CLASS_SPECIAL:
 246             rx_stats.specialPktAllocFailures++;
 247             break;
 248         case RX_PACKET_CLASS_RECV_CBUF:
 249             rx_stats.receiveCbufPktAllocFailures++;
 250             break;
 251         case RX_PACKET_CLASS_SEND_CBUF:
 252             rx_stats.sendCbufPktAllocFailures++;
 253             break;
 254     }
 255     MUTEX_EXIT(&rx_stats_mutex);
 256     goto done;
 257   }
 258
 259   if (queue_IsEmpty(&rx_freePacketQueue)) {
 260     c = NULL;
 261     rxi_NeedMorePackets = TRUE;
 262     goto done;
 263   }
 264 #else /* KERNEL */
 265   if (queue_IsEmpty(&rx_freePacketQueue)) {
 266     rxi_MorePacketsNoLock(rx_initSendWindow);
 267   }
 268 #endif /* KERNEL */
 269
 270   rx_nFreePackets--;
 271   c = queue_First(&rx_freePacketQueue, rx_packet);
 272   queue_Remove(c);
 273   if (c->header.flags != RX_FREE_PACKET)
 274     osi_Panic("rxi_AllocPacket: packet not free\n");
 275   c->header.flags = 0;
 276
 277 #ifdef KERNEL
 278  done:
 279 #endif
 280   MUTEX_EXIT(&rx_freePktQ_lock);
 281
 282   USERPRI;
 283   return c;
 284 }
 285
 286 /*
 287  * Free a packet currently used as a continuation buffer
 288  */
 289 void rxi_freeCBuf(struct rx_packet *c)
 290 {
 291   extern void rxi_PacketsUnWait();
 292   SPLVAR;
 293
 294   NETPRI;
 295   MUTEX_ENTER(&rx_freePktQ_lock);
 296
 297   rxi_FreePacketNoLock(c);
 298   /* Wakeup anyone waiting for packets */
 299   rxi_PacketsUnWait();
 300
 301   MUTEX_EXIT(&rx_freePktQ_lock);
 302   USERPRI;
 303 }
 304
 305 /* this one is kind of awful.
 306  * In rxkad, the packet has been all shortened, and everything, ready for
 307  * sending.  All of a sudden, we discover we need some of that space back.
 308  * This isn't terribly general, because it knows that the packets are only
 309  * rounded up to the EBS (userdata + security header).
 310  */
 311 int rxi_RoundUpPacket(p, nb)
 312      struct rx_packet * p;
 313      unsigned int nb;
 314 {
 315   int i;
 316   i = p->niovecs - 1;
 317   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 318     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 319       p->wirevec[i].iov_len += nb;
 320       return 0;
 321     }
 322   }
 323   else {
 324     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 325       p->wirevec[i].iov_len += nb;
 326       return 0;
 327     }
 328   }
 329
 330 return 0;
 331 }
 332 /* get sufficient space to store nb bytes of data (or more), and hook
 333  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 334  * returns the number of bytes >0 which it failed to come up with.
 335  * Don't need to worry about locking on packet, since only
 336  * one thread can manipulate one at a time. Locking on continution
 337  * packets is handled by allocCBuf */
 338 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 339 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 340 {
 341   int i;
 342
 343   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 344       register struct rx_packet *cb;
 345       if ((cb = allocCBuf(class))) {
 346           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 347           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 348           nb -= RX_CBUFFERSIZE;
 349           p->length += RX_CBUFFERSIZE;
 350           p->niovecs++;
 351       }
 352       else break;
 353   }
 354
 355   return nb;
 356 }
 357
 358 /* Add more packet buffers */
 359 void rxi_MorePackets(int apackets)
 360 {
 361   extern void rxi_PacketsUnWait();
 362   struct rx_packet *p, *e;
 363   int getme;
 364   SPLVAR;
 365
 366   getme = apackets * sizeof(struct rx_packet);
 367   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 368
 369   PIN(p, getme);        /* XXXXX */
 370   bzero((char *)p, getme);
 371   NETPRI;
 372   AFS_RXGLOCK();
 373   MUTEX_ENTER(&rx_freePktQ_lock);
 374
 375   for (e = p + apackets; p<e; p++) {
 376     p->wirevec[0].iov_base = (char *) (p->wirehead);
 377     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 378     p->wirevec[1].iov_base = (char *) (p->localdata);
 379     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 380     p->header.flags = RX_FREE_PACKET;
 381     p->niovecs = 2;
 382
 383     queue_Append(&rx_freePacketQueue, p);
 384   }
 385   rx_nFreePackets += apackets;
 386   rxi_NeedMorePackets = FALSE;
 387   rxi_PacketsUnWait();
 388
 389   AFS_RXGUNLOCK();
 390   MUTEX_EXIT(&rx_freePktQ_lock);
 391   USERPRI;
 392 }
 393
 394 #ifndef KERNEL
 395 /* Add more packet buffers */
 396 void rxi_MorePacketsNoLock(int apackets)
 397 {
 398   extern void rxi_PacketsUnWait();
 399   struct rx_packet *p, *e;
 400   int getme;
 401
 402   /* allocate enough packets that 1/4 of the packets will be able
 403    * to hold maximal amounts of data */
 404   apackets += (apackets/4)
 405               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 406   getme = apackets * sizeof(struct rx_packet);
 407   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 408
 409   bzero((char *)p, getme);
 410
 411   for (e = p + apackets; p<e; p++) {
 412     p->wirevec[0].iov_base = (char *) (p->wirehead);
 413     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 414     p->wirevec[1].iov_base = (char *) (p->localdata);
 415     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 416     p->header.flags = RX_FREE_PACKET;
 417     p->niovecs = 2;
 418
 419     queue_Append(&rx_freePacketQueue, p);
 420   }
 421   rx_nFreePackets += apackets;
 422   rxi_NeedMorePackets = FALSE;
 423   rxi_PacketsUnWait();
 424 }
 425 #endif /* !KERNEL */
 426
 427 void rxi_FreeAllPackets(void)
 428 {
 429   /* must be called at proper interrupt level, etcetera */
 430   /* MTUXXX need to free all Packets */
 431   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 432   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 433 }
 434
 435 /* Allocate more packets iff we need more continuation buffers */
 436 /* In kernel, can't page in memory with interrupts disabled, so we
 437  * don't use the event mechanism. */
 438 void rx_CheckPackets()
 439 {
 440   if (rxi_NeedMorePackets) {
 441     rxi_MorePackets(rx_initSendWindow);
 442   }
 443 }
 444
 445 /* In the packet freeing routine below, the assumption is that
 446    we want all of the packets to be used equally frequently, so that we
 447    don't get packet buffers paging out.  It would be just as valid to
 448    assume that we DO want them to page out if not many are being used.
 449    In any event, we assume the former, and append the packets to the end
 450    of the free list.  */
 451 /* This explanation is bogus.  The free list doesn't remain in any kind of
 452    useful order for afs_int32: the packets in use get pretty much randomly scattered
 453    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 454    must be stored so that packets which are adjacent in memory are adjacent in the
 455    free list.  An array springs rapidly to mind.
 456    */
 457
 458 /* Actually free the packet p. */
 459 void rxi_FreePacketNoLock(struct rx_packet *p)
 460 {
 461   dpf(("Free %x\n", p));
 462
 463   if (p->header.flags & RX_FREE_PACKET)
 464     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 465   rx_nFreePackets++;
 466   p->header.flags = RX_FREE_PACKET;
 467   queue_Append(&rx_freePacketQueue, p);
 468 }
 469
 470 int rxi_FreeDataBufsNoLock(p, first)
 471      struct rx_packet * p;
 472      int first;
 473 {
 474   struct iovec *iov, *end;
 475
 476   if (first != 1)          /* MTUXXX */
 477       osi_Panic("FreeDataBufs 1: first must be 1");
 478   iov = &p->wirevec[1];
 479   end = iov + (p->niovecs-1);
 480   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 481         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 482   for (iov++ ; iov < end ; iov++) {
 483     if (!iov->iov_base)
 484         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 485     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 486   }
 487   p->length = 0;
 488   p->niovecs = 0;
 489
 490   return 0;
 491 }
 492
 493 int rxi_nBadIovecs = 0;
 494
 495 /* rxi_RestoreDataBufs
 496  *
 497  * Restore the correct sizes to the iovecs. Called when reusing a packet
 498  * for reading off the wire.
 499  */
 500 void rxi_RestoreDataBufs(struct rx_packet *p)
 501 {
 502     int i;
 503     struct iovec *iov = &p->wirevec[2];
 504
 505     p->wirevec[0].iov_base = (char *) (p->wirehead);
 506     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 507     p->wirevec[1].iov_base = (char *) (p->localdata);
 508     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 509
 510     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 511         if (!iov->iov_base) {
 512             rxi_nBadIovecs ++;
 513             p->niovecs = i;
 514             break;
 515         }
 516         iov->iov_len = RX_CBUFFERSIZE;
 517     }
 518 }
 519
 520 int rxi_TrimDataBufs(p, first)
 521      struct rx_packet * p;
 522      int first;
 523 {
 524   extern void rxi_PacketsUnWait();
 525   int length;
 526   struct iovec *iov, *end;
 527   SPLVAR;
 528
 529   if (first != 1)
 530       osi_Panic("TrimDataBufs 1: first must be 1");
 531
 532   /* Skip over continuation buffers containing message data */
 533   iov = &p->wirevec[2];
 534   end = iov + (p->niovecs-2);
 535   length = p->length - p->wirevec[1].iov_len;
 536   for (; iov < end && length > 0 ; iov++) {
 537     if (!iov->iov_base)
 538         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 539     length -= iov->iov_len;
 540   }
 541
 542   /* iov now points to the first empty data buffer. */
 543   if (iov >= end)
 544     return 0;
 545
 546   NETPRI;
 547   MUTEX_ENTER(&rx_freePktQ_lock);
 548
 549   for (; iov < end ; iov++) {
 550     if (!iov->iov_base)
 551         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 552     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 553     p->niovecs--;
 554   }
 555   rxi_PacketsUnWait();
 556
 557   MUTEX_EXIT(&rx_freePktQ_lock);
 558   USERPRI;
 559
 560   return 0;
 561 }
 562
 563 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 564  * remove it yourself first if you call this routine. */
 565 void rxi_FreePacket(struct rx_packet *p)
 566 {
 567   extern void rxi_PacketsUnWait();
 568   SPLVAR;
 569
 570   NETPRI;
 571   MUTEX_ENTER(&rx_freePktQ_lock);
 572
 573   rxi_FreeDataBufsNoLock(p,1);
 574   rxi_FreePacketNoLock(p);
 575   /* Wakeup anyone waiting for packets */
 576   rxi_PacketsUnWait();
 577
 578   MUTEX_EXIT(&rx_freePktQ_lock);
 579   USERPRI;
 580 }
 581
 582
 583 /* rxi_AllocPacket sets up p->length so it reflects the number of
 584  * bytes in the packet at this point, **not including** the header.
 585  * The header is absolutely necessary, besides, this is the way the
 586  * length field is usually used */
 587 struct rx_packet *rxi_AllocPacketNoLock(class)
 588      int class;
 589 {
 590   register struct rx_packet *p;
 591
 592 #ifdef KERNEL
 593   if (rxi_OverQuota(class)) {
 594     rxi_NeedMorePackets = TRUE;
 595     MUTEX_ENTER(&rx_stats_mutex);
 596     switch(class) {
 597         case RX_PACKET_CLASS_RECEIVE:
 598             rx_stats.receivePktAllocFailures++;
 599             break;
 600         case RX_PACKET_CLASS_SEND:
 601             rx_stats.sendPktAllocFailures++;
 602             break;
 603         case RX_PACKET_CLASS_SPECIAL:
 604             rx_stats.specialPktAllocFailures++;
 605             break;
 606         case RX_PACKET_CLASS_RECV_CBUF:
 607             rx_stats.receiveCbufPktAllocFailures++;
 608             break;
 609         case RX_PACKET_CLASS_SEND_CBUF:
 610             rx_stats.sendCbufPktAllocFailures++;
 611             break;
 612     }
 613     MUTEX_EXIT(&rx_stats_mutex);
 614     return (struct rx_packet *) 0;
 615   }
 616 #endif /* KERNEL */
 617
 618   MUTEX_ENTER(&rx_stats_mutex);
 619   rx_stats.packetRequests++;
 620   MUTEX_EXIT(&rx_stats_mutex);
 621
 622 #ifdef KERNEL
 623   if (queue_IsEmpty(&rx_freePacketQueue))
 624     osi_Panic("rxi_AllocPacket error");
 625 #else /* KERNEL */
 626   if (queue_IsEmpty(&rx_freePacketQueue))
 627     rxi_MorePacketsNoLock(rx_initSendWindow);
 628 #endif /* KERNEL */
 629
 630   rx_nFreePackets--;
 631   p = queue_First(&rx_freePacketQueue, rx_packet);
 632   if (p->header.flags != RX_FREE_PACKET)
 633     osi_Panic("rxi_AllocPacket: packet not free\n");
 634
 635   dpf(("Alloc %x, class %d\n", p, class));
 636
 637   queue_Remove(p);
 638   p->header.flags = 0;
 639
 640   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 641    * order to truncate outbound packets.  In the near future, may need
 642    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 643    */
 644   p->wirevec[0].iov_base = (char *) (p->wirehead);
 645   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 646   p->wirevec[1].iov_base = (char *) (p->localdata);
 647   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 648   p->niovecs = 2;
 649   p->length = RX_FIRSTBUFFERSIZE;
 650   return p;
 651 }
 652
 653 struct rx_packet *rxi_AllocPacket(class)
 654      int class;
 655 {
 656     register struct rx_packet *p;
 657
 658     MUTEX_ENTER(&rx_freePktQ_lock);
 659     p = rxi_AllocPacketNoLock(class);
 660     MUTEX_EXIT(&rx_freePktQ_lock);
 661     return p;
 662 }
 663
 664 /* This guy comes up with as many buffers as it {takes,can get} given
 665  * the MTU for this call. It also sets the packet length before
 666  * returning.  caution: this is often called at NETPRI
 667  * Called with call locked.
 668  */
 669 struct rx_packet *rxi_AllocSendPacket(call, want)
 670 register struct rx_call *call;
 671 int want;
 672 {
 673     register struct rx_packet *p = (struct rx_packet *) 0;
 674     register int mud;
 675     register unsigned delta;
 676
 677     SPLVAR;
 678     mud = call->MTU - RX_HEADER_SIZE;
 679     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 680         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 681
 682     while (!(call->error)) {
 683       MUTEX_ENTER(&rx_freePktQ_lock);
 684       /* if an error occurred, or we get the packet we want, we're done */
 685       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 686         MUTEX_EXIT(&rx_freePktQ_lock);
 687
 688         want += delta;
 689         want = MIN(want, mud);
 690
 691         if ((unsigned) want > p->length)
 692           (void) rxi_AllocDataBuf(p, (want - p->length),
 693                                   RX_PACKET_CLASS_SEND_CBUF);
 694
 695         if ((unsigned) p->length > mud)
 696             p->length = mud;
 697
 698         if (delta >= p->length) {
 699           rxi_FreePacket(p);
 700           p = NULL;
 701         } else {
 702             p->length -= delta;
 703         }
 704         break;
 705       }
 706
 707       /* no error occurred, and we didn't get a packet, so we sleep.
 708        * At this point, we assume that packets will be returned
 709        * sooner or later, as packets are acknowledged, and so we
 710        * just wait.  */
 711       NETPRI;
 712       call->flags |= RX_CALL_WAIT_PACKETS;
 713       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 714       MUTEX_EXIT(&call->lock);
 715       rx_waitingForPackets = 1;
 716
 717 #ifdef  RX_ENABLE_LOCKS
 718       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 719 #else
 720       osi_rxSleep(&rx_waitingForPackets);
 721 #endif
 722       MUTEX_EXIT(&rx_freePktQ_lock);
 723       MUTEX_ENTER(&call->lock);
 724       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 725       call->flags &= ~RX_CALL_WAIT_PACKETS;
 726       USERPRI;
 727     }
 728
 729     return p;
 730 }
 731
 732 #ifndef KERNEL
 733
 734 /* count the number of used FDs */
 735 static int CountFDs(amax)
 736 register int amax; {
 737     struct stat tstat;
 738     register int i, code;
 739     register int count;
 740
 741     count = 0;
 742     for(i=0;i<amax;i++) {
 743         code = fstat(i, &tstat);
 744         if (code == 0) count++;
 745     }
 746     return count;
 747 }
 748
 749 #else /* KERNEL */
 750
 751 #define CountFDs(amax) amax
 752
 753 #endif /* KERNEL */
 754
 755 #if !defined(KERNEL) || defined(UKERNEL)
 756
 757 /* This function reads a single packet from the interface into the
 758  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 759  * (host,port) of the sender are stored in the supplied variables, and
 760  * the data length of the packet is stored in the packet structure.
 761  * The header is decoded. */
 762 int rxi_ReadPacket(socket, p, host, port)
 763      int socket;
 764      register struct rx_packet *p;
 765      afs_uint32 *host;
 766      u_short *port;
 767 {
 768     struct sockaddr_in from;
 769     int nbytes;
 770     afs_int32 rlen;
 771     register afs_int32 tlen, savelen;
 772     struct msghdr msg;
 773     rx_computelen(p, tlen);
 774     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 775
 776     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 777     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 778                                  * it once in order to avoid races.  */
 779     tlen = rlen - tlen;
 780     if (tlen > 0) {
 781       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 782       if (tlen >0) {
 783         tlen = rlen - tlen;
 784       }
 785       else tlen = rlen;
 786     }
 787     else tlen = rlen;
 788
 789    /* Extend the last iovec for padding, it's just to make sure that the
 790     * read doesn't return more data than we expect, and is done to get around
 791     * our problems caused by the lack of a length field in the rx header.
 792     * Use the extra buffer that follows the localdata in each packet
 793     * structure. */
 794     savelen = p->wirevec[p->niovecs].iov_len;
 795     p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
 796
 797     bzero((char *)&msg, sizeof(msg));
 798     msg.msg_name = (char *) &from;
 799     msg.msg_namelen = sizeof(struct sockaddr_in);
 800     msg.msg_iov = p->wirevec;
 801     msg.msg_iovlen = p->niovecs;
 802     nbytes = rxi_Recvmsg(socket, &msg, 0);
 803
 804    /* restore the vec to its correct state */
 805     p->wirevec[p->niovecs].iov_len = savelen;
 806
 807     p->length = (nbytes - RX_HEADER_SIZE);
 808     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 809       if (nbytes > 0)
 810         rxi_MorePackets(rx_initSendWindow);
 811 #ifndef AFS_NT40_ENV
 812       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 813         MUTEX_ENTER(&rx_stats_mutex);
 814         rx_stats.noPacketOnRead++;
 815         MUTEX_EXIT(&rx_stats_mutex);
 816       }
 817 #endif
 818       else {
 819         MUTEX_ENTER(&rx_stats_mutex);
 820         rx_stats.bogusPacketOnRead++;
 821         rx_stats.bogusHost = from.sin_addr.s_addr;
 822         MUTEX_EXIT(&rx_stats_mutex);
 823         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 824              from.sin_port,nbytes));
 825       }
 826       return  0;
 827     }
 828     else {
 829       /* Extract packet header. */
 830       rxi_DecodePacketHeader(p);
 831
 832       *host = from.sin_addr.s_addr;
 833       *port = from.sin_port;
 834       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 835         struct rx_peer *peer;
 836         MUTEX_ENTER(&rx_stats_mutex);
 837         rx_stats.packetsRead[p->header.type-1]++;
 838         MUTEX_EXIT(&rx_stats_mutex);
 839         /*
 840          * Try to look up this peer structure.  If it doesn't exist,
 841          * don't create a new one -
 842          * we don't keep count of the bytes sent/received if a peer
 843          * structure doesn't already exist.
 844          *
 845          * The peer/connection cleanup code assumes that there is 1 peer
 846          * per connection.  If we actually created a peer structure here
 847          * and this packet was an rxdebug packet, the peer structure would
 848          * never be cleaned up.
 849          */
 850         peer = rxi_FindPeer(*host, *port, 0, 0);
 851         if (peer) {
 852             MUTEX_ENTER(&peer->peer_lock);
 853             hadd32(peer->bytesReceived, p->length);
 854             MUTEX_EXIT(&peer->peer_lock);
 855         }
 856       }
 857
 858       /* Free any empty packet buffers at the end of this packet */
 859       rxi_TrimDataBufs(p, 1);
 860
 861       return  1;
 862     }
 863 }
 864
 865 #endif /* !KERNEL || UKERNEL */
 866
 867 /* This function splits off the first packet in a jumbo packet.
 868  * As of AFS 3.5, jumbograms contain more than one fixed size
 869  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 870  * last packet header. All packets (except the last) are padded to
 871  * fall on RX_CBUFFERSIZE boundaries.
 872  * HACK: We store the length of the first n-1 packets in the
 873  * last two pad bytes. */
 874
 875 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
 876      register struct rx_packet *p;
 877      afs_int32 host;
 878      short port;
 879      int first;
 880 {
 881     struct rx_packet *np;
 882     struct rx_jumboHeader *jp;
 883     int niov, i;
 884     struct iovec *iov;
 885     int length;
 886     afs_uint32 temp;
 887
 888     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 889      * bytes in length. All but the first packet are preceded by
 890      * an abbreviated four byte header. The length of the last packet
 891      * is calculated from the size of the jumbogram. */
 892     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 893
 894     if ((int)p->length < length) {
 895         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 896         return NULL;
 897     }
 898     niov = p->niovecs - 2;
 899     if (niov < 1) {
 900         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 901         return NULL;
 902     }
 903     iov = &p->wirevec[2];
 904     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 905
 906     /* Get a pointer to the abbreviated packet header */
 907     jp = (struct rx_jumboHeader *)
 908          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 909
 910     /* Set up the iovecs for the next packet */
 911     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 912     np->wirevec[0].iov_len = sizeof(struct rx_header);
 913     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 914     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 915     np->niovecs = niov+1;
 916     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 917         np->wirevec[i] = *iov;
 918     }
 919     np->length = p->length - length;
 920     p->length = RX_JUMBOBUFFERSIZE;
 921     p->niovecs = 2;
 922
 923     /* Convert the jumbo packet header to host byte order */
 924     temp = ntohl(*(afs_uint32 *)jp);
 925     jp->flags = (u_char)(temp >> 24);
 926     jp->cksum = (u_short)(temp);
 927
 928     /* Fill in the packet header */
 929     np->header = p->header;
 930     np->header.serial = p->header.serial + 1;
 931     np->header.seq = p->header.seq + 1;
 932     np->header.flags = jp->flags;
 933     np->header.spare = jp->cksum;
 934
 935     return np;
 936 }
 937
 938 #ifndef KERNEL
 939 /* Send a udp datagram */
 940 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
 941     osi_socket socket;
 942     char * addr;
 943     struct iovec *dvec;
 944     int nvecs;
 945     int length;
 946     int istack;
 947 {
 948     struct msghdr msg;
 949
 950     memset(&msg, 0, sizeof(msg));
 951     msg.msg_iov = dvec;
 952     msg.msg_iovlen = nvecs;
 953     msg.msg_name = addr;
 954     msg.msg_namelen = sizeof(struct sockaddr_in);
 955
 956     rxi_Sendmsg(socket, &msg, 0);
 957
 958     return 0;
 959 }
 960 #elif !defined(UKERNEL)
 961 /* osi_NetSend is defined in afs/afs_osinet.c
 962  * message receipt is done in rxk_input or rx_put.
 963  */
 964
 965 #ifdef AFS_SUN5_ENV
 966 /*
 967  * Copy an mblock to the contiguous area pointed to by cp.
 968  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 969  * but it doesn't really.
 970  * Returns the number of bytes not transferred.
 971  * The message is NOT changed.
 972  */
 973 static int cpytoc(mp, off, len, cp)
 974     mblk_t *mp;
 975     register int off, len;
 976     register char * cp;
 977 {
 978     register int n;
 979
 980     for (;mp && len > 0; mp = mp->b_cont) {
 981         if (mp->b_datap->db_type != M_DATA) {
 982             return -1;
 983         }
 984         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 985         bcopy((char *)mp->b_rptr, cp, n);
 986         cp += n;
 987         len -= n;
 988         mp->b_rptr += n;
 989     }
 990     return (len);
 991 }
 992
 993 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 994  * but it doesn't really.
 995  * This sucks, anyway, do it like m_cpy.... below
 996  */
 997 static int cpytoiovec(mp, off, len, iovs, niovs)
 998     mblk_t *mp;
 999     int off, len, niovs;
1000     register struct iovec *iovs;
1001 {
1002     register int m,n,o,t,i;
1003
1004     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1005         if (mp->b_datap->db_type != M_DATA) {
1006             return -1;
1007         }
1008         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1009         len -= n;
1010         while (n) {
1011           if (!t) {
1012             o=0;
1013             i++;
1014             t = iovs[i].iov_len;
1015           }
1016           m = MIN(n,t);
1017           bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1018           mp->b_rptr += m;
1019           o += m;
1020           t -= m;
1021           n -= m;
1022         }
1023     }
1024     return (len);
1025 }
1026 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1027 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1028 #else
1029 #if !defined(AFS_LINUX20_ENV)
1030 static int m_cpytoiovec(m, off, len, iovs, niovs)
1031      struct mbuf *m;
1032      int off, len, niovs;
1033      struct iovec iovs[];
1034 {
1035   caddr_t p1, p2;
1036   unsigned int l1, l2, i, t;
1037
1038   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1039     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1040
1041   while (off && m)
1042     if (m->m_len <= off) {
1043       off -= m->m_len;
1044       m = m->m_next;
1045       continue;
1046     } else
1047       break;
1048
1049   if (m == NULL)
1050     return len;
1051
1052   p1 = mtod(m, caddr_t)+off;
1053   l1 = m->m_len - off;
1054   i = 0;
1055   p2 = iovs[0].iov_base;
1056   l2 = iovs[0].iov_len;
1057
1058   while (len) {
1059     t = MIN(l1, MIN(l2, (unsigned int)len));
1060     bcopy (p1, p2, t);
1061     p1 += t;    p2 += t;
1062     l1 -= t;    l2 -= t;
1063     len -= t;
1064     if (!l1) {
1065       m = m->m_next;
1066       if (!m)
1067         break;
1068       p1 = mtod(m, caddr_t);
1069       l1 = m->m_len;
1070     }
1071     if (!l2) {
1072       if (++i >= niovs)
1073         break;
1074       p2 = iovs[i].iov_base;
1075       l2 = iovs[i].iov_len;
1076     }
1077
1078   }
1079
1080 return len;
1081 }
1082 #endif /* LINUX */
1083 #endif /* AFS_SUN5_ENV */
1084
1085 #if !defined(AFS_LINUX20_ENV)
1086 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1087 #ifdef  AFS_SUN5_ENV
1088 mblk_t *amb;
1089 #else
1090 struct mbuf *amb;
1091 #endif
1092 void (*free)();
1093 struct rx_packet *phandle;
1094 int hdr_len, data_len;
1095 {
1096   register int code;
1097
1098   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1099   (*free)(amb);
1100
1101   return code;
1102 }
1103 #endif /* LINUX */
1104 #endif /*KERNEL && !UKERNEL*/
1105
1106
1107 /* send a response to a debug packet */
1108
1109 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1110   osi_socket asocket;
1111   afs_int32 ahost;
1112   short aport;
1113   register struct rx_packet *ap;
1114   int istack;
1115 {
1116     struct rx_debugIn tin;
1117     afs_int32 tl;
1118     struct rx_serverQueueEntry *np, *nqe;
1119
1120     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1121     /* all done with packet, now set length to the truth, so we can
1122      * reuse this packet */
1123     rx_computelen(ap, ap->length);
1124
1125     tin.type = ntohl(tin.type);
1126     tin.index = ntohl(tin.index);
1127     switch(tin.type) {
1128         case RX_DEBUGI_GETSTATS: {
1129             struct rx_debugStats tstat;
1130
1131             /* get basic stats */
1132             bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1133             tstat.version = RX_DEBUGI_VERSION;
1134 #ifndef RX_ENABLE_LOCKS
1135             tstat.waitingForPackets = rx_waitingForPackets;
1136 #endif
1137             tstat.nFreePackets = htonl(rx_nFreePackets);
1138             tstat.callsExecuted = htonl(rxi_nCalls);
1139             tstat.packetReclaims = htonl(rx_packetReclaims);
1140             tstat.usedFDs = CountFDs(64);
1141             tstat.nWaiting = htonl(rx_nWaiting);
1142             queue_Count( &rx_idleServerQueue, np, nqe,
1143                                 rx_serverQueueEntry, tstat.idleThreads);
1144             tstat.idleThreads = htonl(tstat.idleThreads);
1145             tl = sizeof(struct rx_debugStats) - ap->length;
1146             if (tl > 0)
1147               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1148
1149             if (tl <= 0) {
1150               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1151               ap->length = sizeof(struct rx_debugStats);
1152               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1153               rx_computelen(ap, ap->length);
1154             }
1155             break;
1156         }
1157
1158         case RX_DEBUGI_GETALLCONN:
1159         case RX_DEBUGI_GETCONN: {
1160             int i, j;
1161             register struct rx_connection *tc;
1162             struct rx_call *tcall;
1163             struct rx_debugConn tconn;
1164             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1165
1166
1167             tl = sizeof(struct rx_debugConn) - ap->length;
1168             if (tl > 0)
1169               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1170             if (tl > 0)
1171               return ap;
1172
1173             bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1174             /* get N'th (maybe) "interesting" connection info */
1175             for(i=0;i<rx_hashTableSize;i++) {
1176 #if !defined(KERNEL)
1177                 /* the time complexity of the algorithm used here
1178                  * exponentially increses with the number of connections.
1179                  */
1180 #ifdef AFS_PTHREAD_ENV
1181                 pthread_yield();
1182 #else
1183                 (void) IOMGR_Poll();
1184 #endif
1185 #endif
1186                 MUTEX_ENTER(&rx_connHashTable_lock);
1187                 /* We might be slightly out of step since we are not
1188                  * locking each call, but this is only debugging output.
1189                  */
1190                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1191                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1192                         tconn.host = tc->peer->host;
1193                         tconn.port = tc->peer->port;
1194                         tconn.cid = htonl(tc->cid);
1195                         tconn.epoch = htonl(tc->epoch);
1196                         tconn.serial = htonl(tc->serial);
1197                         for(j=0;j<RX_MAXCALLS;j++) {
1198                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1199                             if ((tcall=tc->call[j])) {
1200                                 tconn.callState[j] = tcall->state;
1201                                 tconn.callMode[j] = tcall->mode;
1202                                 tconn.callFlags[j] = tcall->flags;
1203                                 if (queue_IsNotEmpty(&tcall->rq))
1204                                     tconn.callOther[j] |= RX_OTHER_IN;
1205                                 if (queue_IsNotEmpty(&tcall->tq))
1206                                     tconn.callOther[j] |= RX_OTHER_OUT;
1207                             }
1208                             else tconn.callState[j] = RX_STATE_NOTINIT;
1209                         }
1210
1211                         tconn.natMTU = htonl(tc->peer->natMTU);
1212                         tconn.error = htonl(tc->error);
1213                         tconn.flags = tc->flags;
1214                         tconn.type = tc->type;
1215                         tconn.securityIndex = tc->securityIndex;
1216                         if (tc->securityObject) {
1217                             RXS_GetStats (tc->securityObject, tc,
1218                                           &tconn.secStats);
1219 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1220 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1221                             DOHTONL(flags);
1222                             DOHTONL(expires);
1223                             DOHTONL(packetsReceived);
1224                             DOHTONL(packetsSent);
1225                             DOHTONL(bytesReceived);
1226                             DOHTONL(bytesSent);
1227                             for (i=0;
1228                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1229                                  i++)
1230                                 DOHTONS(spares[i]);
1231                             for (i=0;
1232                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1233                                  i++)
1234                                 DOHTONL(sparel[i]);
1235                         }
1236
1237                         MUTEX_EXIT(&rx_connHashTable_lock);
1238                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1239                         tl = ap->length;
1240                         ap->length = sizeof(struct rx_debugConn);
1241                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1242                         ap->length = tl;
1243                         return ap;
1244                     }
1245                 }
1246                 MUTEX_EXIT(&rx_connHashTable_lock);
1247             }
1248             /* if we make it here, there are no interesting packets */
1249             tconn.cid = htonl(0xffffffff); /* means end */
1250             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1251             tl = ap->length;
1252             ap->length = sizeof(struct rx_debugConn);
1253             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1254             ap->length = tl;
1255             break;
1256         }
1257
1258         /*
1259          * Pass back all the peer structures we have available
1260          */
1261
1262         case RX_DEBUGI_GETPEER: {
1263             int i;
1264             register struct rx_peer *tp;
1265             struct rx_debugPeer tpeer;
1266
1267
1268             tl = sizeof(struct rx_debugPeer) - ap->length;
1269             if (tl > 0)
1270               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1271             if (tl > 0)
1272               return ap;
1273
1274             bzero ((char *)&tpeer, sizeof(tpeer));
1275             for(i=0;i<rx_hashTableSize;i++) {
1276 #if !defined(KERNEL)
1277                 /* the time complexity of the algorithm used here
1278                  * exponentially increses with the number of peers.
1279                  *
1280                  * Yielding after processing each hash table entry
1281                  * and dropping rx_peerHashTable_lock.
1282                  * also increases the risk that we will miss a new
1283                  * entry - but we are willing to live with this
1284                  * limitation since this is meant for debugging only
1285                  */
1286 #ifdef AFS_PTHREAD_ENV
1287                 pthread_yield();
1288 #else
1289                 (void) IOMGR_Poll();
1290 #endif
1291 #endif
1292                 MUTEX_ENTER(&rx_peerHashTable_lock);
1293                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1294                     if (tin.index-- <= 0) {
1295                         tpeer.host = tp->host;
1296                         tpeer.port = tp->port;
1297                         tpeer.ifMTU = htons(tp->ifMTU);
1298                         tpeer.idleWhen = htonl(tp->idleWhen);
1299                         tpeer.refCount = htons(tp->refCount);
1300                         tpeer.burstSize = tp->burstSize;
1301                         tpeer.burst = tp->burst;
1302                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1303                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1304                         tpeer.rtt = htonl(tp->rtt);
1305                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1306                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1307                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1308                         tpeer.nSent = htonl(tp->nSent);
1309                         tpeer.reSends = htonl(tp->reSends);
1310                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1311                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1312                         tpeer.rateFlag = htonl(tp->rateFlag);
1313                         tpeer.natMTU = htons(tp->natMTU);
1314                         tpeer.maxMTU = htons(tp->maxMTU);
1315                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1316                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1317                         tpeer.MTU = htons(tp->MTU);
1318                         tpeer.cwind = htons(tp->cwind);
1319                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1320                         tpeer.congestSeq = htons(tp->congestSeq);
1321                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1322                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1323                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1324                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1325
1326                         MUTEX_EXIT(&rx_peerHashTable_lock);
1327                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1328                         tl = ap->length;
1329                         ap->length = sizeof(struct rx_debugPeer);
1330                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1331                         ap->length = tl;
1332                         return ap;
1333                     }
1334                 }
1335                 MUTEX_EXIT(&rx_peerHashTable_lock);
1336             }
1337             /* if we make it here, there are no interesting packets */
1338             tpeer.host = htonl(0xffffffff); /* means end */
1339             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1340             tl = ap->length;
1341             ap->length = sizeof(struct rx_debugPeer);
1342             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1343             ap->length = tl;
1344             break;
1345         }
1346
1347         case RX_DEBUGI_RXSTATS: {
1348             int i;
1349             afs_int32 *s;
1350
1351             tl = sizeof(rx_stats) - ap->length;
1352             if (tl > 0)
1353               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1354             if (tl > 0)
1355               return ap;
1356
1357             /* Since its all int32s convert to network order with a loop. */
1358             MUTEX_ENTER(&rx_stats_mutex);
1359             s = (afs_int32 *)&rx_stats;
1360             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1361                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1362
1363             tl = ap->length;
1364             ap->length = sizeof(rx_stats);
1365             MUTEX_EXIT(&rx_stats_mutex);
1366             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1367             ap->length = tl;
1368             break;
1369         }
1370
1371         default:
1372             /* error response packet */
1373             tin.type = htonl(RX_DEBUGI_BADTYPE);
1374             tin.index = tin.type;
1375             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1376             tl = ap->length;
1377             ap->length = sizeof(struct rx_debugIn);
1378             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1379             ap->length = tl;
1380             break;
1381     }
1382     return ap;
1383 }
1384
1385 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1386   osi_socket asocket;
1387   afs_int32 ahost;
1388   short aport;
1389   register struct rx_packet *ap;
1390   int istack;
1391 {
1392   afs_int32 tl;
1393         rx_packetwrite(ap, 0, 65, cml_version_number+4);
1394         tl = ap->length;
1395         ap->length = 65;
1396         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1397         ap->length = tl;
1398         return ap;
1399 }
1400
1401
1402 /* send a debug packet back to the sender */
1403 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1404                                afs_int32 ahost, short aport, afs_int32 istack)
1405 {
1406     struct sockaddr_in taddr;
1407     int i;
1408     int nbytes;
1409     int saven = 0;
1410     size_t savelen = 0;
1411 #ifdef KERNEL
1412     int waslocked = ISAFS_GLOCK();
1413 #endif
1414
1415     taddr.sin_family = AF_INET;
1416     taddr.sin_port = aport;
1417     taddr.sin_addr.s_addr = ahost;
1418
1419
1420     /* We need to trim the niovecs. */
1421     nbytes = apacket->length;
1422     for (i=1; i < apacket->niovecs; i++) {
1423       if (nbytes <= apacket->wirevec[i].iov_len) {
1424         savelen = apacket->wirevec[i].iov_len;
1425         saven = apacket->niovecs;
1426         apacket->wirevec[i].iov_len = nbytes;
1427         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1428       }
1429       else nbytes -= apacket->wirevec[i].iov_len;
1430     }
1431     AFS_RXGUNLOCK();
1432 #ifdef KERNEL
1433     if (waslocked) AFS_GUNLOCK();
1434 #endif
1435     /* debug packets are not reliably delivered, hence the cast below. */
1436     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1437                        apacket->length+RX_HEADER_SIZE, istack);
1438 #ifdef KERNEL
1439     if (waslocked) AFS_GLOCK();
1440 #endif
1441     AFS_RXGLOCK();
1442     if (saven) {  /* means we truncated the packet above. */
1443       apacket->wirevec[i-1].iov_len = savelen;
1444       apacket->niovecs = saven;
1445     }
1446
1447 }
1448
1449 /* Send the packet to appropriate destination for the specified
1450  * connection.  The header is first encoded and placed in the packet.
1451  */
1452 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1453                     int istack)
1454 {
1455 #if defined(KERNEL)
1456     int waslocked;
1457 #endif
1458     struct sockaddr_in addr;
1459     register struct rx_peer *peer = conn->peer;
1460     osi_socket socket;
1461 #ifdef RXDEBUG
1462     char deliveryType = 'S';
1463 #endif
1464     /* The address we're sending the packet to */
1465     addr.sin_family = AF_INET;
1466     addr.sin_port = peer->port;
1467     addr.sin_addr.s_addr = peer->host;
1468
1469     /* This stuff should be revamped, I think, so that most, if not
1470      * all, of the header stuff is always added here.  We could
1471      * probably do away with the encode/decode routines. XXXXX */
1472
1473     /* Stamp each packet with a unique serial number.  The serial
1474      * number is maintained on a connection basis because some types
1475      * of security may be based on the serial number of the packet,
1476      * and security is handled on a per authenticated-connection
1477      * basis. */
1478     /* Pre-increment, to guarantee no zero serial number; a zero
1479      * serial number means the packet was never sent. */
1480     MUTEX_ENTER(&conn->conn_data_lock);
1481     p->header.serial = ++conn->serial;
1482     MUTEX_EXIT(&conn->conn_data_lock);
1483     /* This is so we can adjust retransmit time-outs better in the face of
1484      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1485      */
1486     if (p->firstSerial == 0) {
1487        p->firstSerial = p->header.serial;
1488      }
1489
1490 #ifdef RXDEBUG
1491     /* If an output tracer function is defined, call it with the packet and
1492      * network address.  Note this function may modify its arguments. */
1493     if (rx_almostSent) {
1494         int drop = (*rx_almostSent) (p, &addr);
1495         /* drop packet if return value is non-zero? */
1496         if (drop) deliveryType = 'D';   /* Drop the packet */
1497     }
1498 #endif
1499
1500     /* Get network byte order header */
1501     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1502                                  * touch ALL the fields */
1503
1504     /* Send the packet out on the same socket that related packets are being
1505      * received on */
1506     socket = (conn->type == RX_CLIENT_CONNECTION
1507               ? rx_socket : conn->service->socket);
1508
1509 #ifdef RXDEBUG
1510     /* Possibly drop this packet,  for testing purposes */
1511     if ((deliveryType == 'D') ||
1512         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1513          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1514         deliveryType = 'D';             /* Drop the packet */
1515     }
1516     else {
1517         deliveryType = 'S';             /* Send the packet */
1518 #endif /* RXDEBUG */
1519
1520         /* Loop until the packet is sent.  We'd prefer just to use a
1521          * blocking socket, but unfortunately the interface doesn't
1522          * allow us to have the socket block in send mode, and not
1523          * block in receive mode */
1524         AFS_RXGUNLOCK();
1525 #ifdef KERNEL
1526         waslocked = ISAFS_GLOCK();
1527         if (waslocked) AFS_GUNLOCK();
1528 #endif
1529         if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1530                         p->length+RX_HEADER_SIZE, istack)){
1531           /* send failed, so let's hurry up the resend, eh? */
1532           MUTEX_ENTER(&rx_stats_mutex);
1533           rx_stats.netSendFailures++;
1534           MUTEX_EXIT(&rx_stats_mutex);
1535           p->retryTime = p->timeSent;  /* resend it very soon */
1536           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1537         }
1538 #ifdef KERNEL
1539         if (waslocked) AFS_GLOCK();
1540 #endif
1541         AFS_RXGLOCK();
1542 #ifdef RXDEBUG
1543     }
1544     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1545          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1546          peer->host, peer->port, p->header.serial, p->header.epoch,
1547          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1548          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1549 #endif
1550     MUTEX_ENTER(&rx_stats_mutex);
1551     rx_stats.packetsSent[p->header.type-1]++;
1552     MUTEX_EXIT(&rx_stats_mutex);
1553     MUTEX_ENTER(&peer->peer_lock);
1554     hadd32(peer->bytesSent, p->length);
1555     MUTEX_EXIT(&peer->peer_lock);
1556 }
1557
1558 /* Send a list of packets to appropriate destination for the specified
1559  * connection.  The headers are first encoded and placed in the packets.
1560  */
1561 void rxi_SendPacketList(struct rx_connection * conn,
1562                         struct rx_packet **list,
1563                         int len,
1564                         int istack)
1565 {
1566 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1567     int waslocked;
1568 #endif
1569     struct sockaddr_in addr;
1570     register struct rx_peer *peer = conn->peer;
1571     osi_socket socket;
1572     struct rx_packet *p = NULL;
1573     struct iovec wirevec[RX_MAXIOVECS];
1574     int i, length;
1575     afs_uint32 serial;
1576     afs_uint32 temp;
1577     struct rx_jumboHeader *jp;
1578 #ifdef RXDEBUG
1579     char deliveryType = 'S';
1580 #endif
1581     /* The address we're sending the packet to */
1582     addr.sin_family = AF_INET;
1583     addr.sin_port = peer->port;
1584     addr.sin_addr.s_addr = peer->host;
1585
1586     if (len+1 > RX_MAXIOVECS) {
1587         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1588     }
1589
1590     /*
1591      * Stamp the packets in this jumbogram with consecutive serial numbers
1592      */
1593     MUTEX_ENTER(&conn->conn_data_lock);
1594     serial = conn->serial;
1595     conn->serial += len;
1596     MUTEX_EXIT(&conn->conn_data_lock);
1597
1598
1599     /* This stuff should be revamped, I think, so that most, if not
1600      * all, of the header stuff is always added here.  We could
1601      * probably do away with the encode/decode routines. XXXXX */
1602
1603     jp = NULL;
1604     length = RX_HEADER_SIZE;
1605     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1606     wirevec[0].iov_len = RX_HEADER_SIZE;
1607     for (i = 0 ; i < len ; i++) {
1608         p = list[i];
1609
1610         /* The whole 3.5 jumbogram scheme relies on packets fitting
1611          * in a single packet buffer. */
1612         if (p->niovecs > 2) {
1613             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1614         }
1615
1616         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1617          * in this chunk.  */
1618         if (i < len-1) {
1619             if (p->length != RX_JUMBOBUFFERSIZE) {
1620                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1621             }
1622             p->header.flags |= RX_JUMBO_PACKET;
1623             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1624             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1625         } else {
1626             wirevec[i+1].iov_len = p->length;
1627             length += p->length;
1628         }
1629         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1630         if (jp != NULL) {
1631             /* Convert jumbo packet header to network byte order */
1632             temp = (afs_uint32)(p->header.flags) << 24;
1633             temp |= (afs_uint32)(p->header.spare);
1634             *(afs_uint32 *)jp = htonl(temp);
1635         }
1636         jp = (struct rx_jumboHeader *)
1637              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1638
1639         /* Stamp each packet with a unique serial number.  The serial
1640          * number is maintained on a connection basis because some types
1641          * of security may be based on the serial number of the packet,
1642          * and security is handled on a per authenticated-connection
1643          * basis. */
1644         /* Pre-increment, to guarantee no zero serial number; a zero
1645          * serial number means the packet was never sent. */
1646         p->header.serial = ++serial;
1647         /* This is so we can adjust retransmit time-outs better in the face of
1648          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1649          */
1650         if (p->firstSerial == 0) {
1651            p->firstSerial = p->header.serial;
1652         }
1653
1654 #ifdef RXDEBUG
1655         /* If an output tracer function is defined, call it with the packet and
1656          * network address.  Note this function may modify its arguments. */
1657         if (rx_almostSent) {
1658             int drop = (*rx_almostSent) (p, &addr);
1659             /* drop packet if return value is non-zero? */
1660             if (drop) deliveryType = 'D';       /* Drop the packet */
1661         }
1662 #endif
1663
1664         /* Get network byte order header */
1665         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1666                                      * touch ALL the fields */
1667     }
1668
1669     /* Send the packet out on the same socket that related packets are being
1670      * received on */
1671     socket = (conn->type == RX_CLIENT_CONNECTION
1672               ? rx_socket : conn->service->socket);
1673
1674 #ifdef RXDEBUG
1675     /* Possibly drop this packet,  for testing purposes */
1676     if ((deliveryType == 'D') ||
1677         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1678          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1679         deliveryType = 'D';             /* Drop the packet */
1680     }
1681     else {
1682         deliveryType = 'S';             /* Send the packet */
1683 #endif /* RXDEBUG */
1684
1685         /* Loop until the packet is sent.  We'd prefer just to use a
1686          * blocking socket, but unfortunately the interface doesn't
1687          * allow us to have the socket block in send mode, and not
1688          * block in receive mode */
1689         AFS_RXGUNLOCK();
1690 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1691         waslocked = ISAFS_GLOCK();
1692         if (!istack && waslocked) AFS_GUNLOCK();
1693 #endif
1694         if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1695           /* send failed, so let's hurry up the resend, eh? */
1696           MUTEX_ENTER(&rx_stats_mutex);
1697           rx_stats.netSendFailures++;
1698           MUTEX_EXIT(&rx_stats_mutex);
1699           for (i = 0 ; i < len ; i++) {
1700             p = list[i];
1701             p->retryTime = p->timeSent;  /* resend it very soon */
1702             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1703           }
1704         }
1705 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1706         if (!istack && waslocked) AFS_GLOCK();
1707 #endif
1708         AFS_RXGLOCK();
1709 #ifdef RXDEBUG
1710     }
1711     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1712          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1713          peer->host, peer->port, p->header.serial, p->header.epoch,
1714          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1715          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1716 #endif
1717     MUTEX_ENTER(&rx_stats_mutex);
1718     rx_stats.packetsSent[p->header.type-1]++;
1719     MUTEX_EXIT(&rx_stats_mutex);
1720     MUTEX_ENTER(&peer->peer_lock);
1721     hadd32(peer->bytesSent, p->length);
1722     MUTEX_EXIT(&peer->peer_lock);
1723 }
1724
1725
1726 /* Send a "special" packet to the peer connection.  If call is
1727  * specified, then the packet is directed to a specific call channel
1728  * associated with the connection, otherwise it is directed to the
1729  * connection only. Uses optionalPacket if it is supplied, rather than
1730  * allocating a new packet buffer.  Nbytes is the length of the data
1731  * portion of the packet.  If data is non-null, nbytes of data are
1732  * copied into the packet.  Type is the type of the packet, as defined
1733  * in rx.h.  Bug: there's a lot of duplication between this and other
1734  * routines.  This needs to be cleaned up. */
1735 struct rx_packet *
1736 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1737     register struct rx_call *call;
1738     register struct rx_connection *conn;
1739     struct rx_packet *optionalPacket;
1740     int type;
1741     char *data;
1742     int nbytes, istack;
1743 {
1744     /* Some of the following stuff should be common code for all
1745      * packet sends (it's repeated elsewhere) */
1746     register struct rx_packet *p;
1747     unsigned int i = 0;
1748     int savelen = 0, saven = 0;
1749     int channel, callNumber;
1750     if (call) {
1751         channel = call->channel;
1752         callNumber = *call->callNumber;
1753         /* BUSY packets refer to the next call on this connection */
1754         if (type == RX_PACKET_TYPE_BUSY) {
1755             callNumber++;
1756         }
1757     } else {
1758         channel = 0;
1759         callNumber = 0;
1760     }
1761     p = optionalPacket;
1762     if (!p) {
1763         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1764         if (!p) osi_Panic("rxi_SendSpecial failure");
1765     }
1766
1767     if (nbytes != -1)
1768       p->length = nbytes;
1769     else
1770       nbytes = p->length;
1771     p->header.serviceId = conn->serviceId;
1772     p->header.securityIndex = conn->securityIndex;
1773     p->header.cid = (conn->cid | channel);
1774     p->header.callNumber = callNumber;
1775     p->header.seq = 0;
1776     p->header.epoch = conn->epoch;
1777     p->header.type = type;
1778     p->header.flags = 0;
1779     if (conn->type == RX_CLIENT_CONNECTION)
1780        p->header.flags |= RX_CLIENT_INITIATED;
1781     if (data)
1782       rx_packetwrite(p, 0, nbytes, data);
1783
1784     for (i=1; i < p->niovecs; i++) {
1785       if (nbytes <= p->wirevec[i].iov_len) {
1786         savelen = p->wirevec[i].iov_len;
1787         saven = p->niovecs;
1788         p->wirevec[i].iov_len = nbytes;
1789         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1790       }
1791       else nbytes -= p->wirevec[i].iov_len;
1792     }
1793
1794     if (call) rxi_Send(call, p, istack);
1795     else rxi_SendPacket(conn, p, istack);
1796     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1797       /* really need to do this, but it seems safer this way, given that  */
1798       /* sneaky optionalPacket... */
1799       p->wirevec[i-1].iov_len = savelen;
1800       p->niovecs = saven;
1801     }
1802     if (!optionalPacket) rxi_FreePacket(p);
1803     return optionalPacket;
1804 }
1805
1806
1807 /* Encode the packet's header (from the struct header in the packet to
1808  * the net byte order representation in the wire representation of the
1809  * packet, which is what is actually sent out on the wire) */
1810 void rxi_EncodePacketHeader(p)
1811 register struct rx_packet *p;
1812 {
1813     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1814
1815     bzero((char *)buf, RX_HEADER_SIZE);
1816     *buf++ = htonl(p->header.epoch);
1817     *buf++ = htonl(p->header.cid);
1818     *buf++ = htonl(p->header.callNumber);
1819     *buf++ = htonl(p->header.seq);
1820     *buf++ = htonl(p->header.serial);
1821     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1822                    | (((afs_uint32)p->header.flags)<<16)
1823                    | (p->header.userStatus<<8) | p->header.securityIndex);
1824     /* Note: top 16 bits of this next word were reserved */
1825     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1826 }
1827
1828 /* Decode the packet's header (from net byte order to a struct header) */
1829 void rxi_DecodePacketHeader(p)
1830 register struct rx_packet *p;
1831 {
1832     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1833     afs_uint32 temp;
1834
1835     p->header.epoch = ntohl(*buf++);
1836     p->header.cid = ntohl(*buf++);
1837     p->header.callNumber = ntohl(*buf++);
1838     p->header.seq = ntohl(*buf++);
1839     p->header.serial = ntohl(*buf++);
1840     temp = ntohl(*buf++);
1841     /* C will truncate byte fields to bytes for me */
1842     p->header.type = temp>>24;
1843     p->header.flags = temp>>16;
1844     p->header.userStatus = temp>>8;
1845     p->header.securityIndex = temp>>0;
1846     temp = ntohl(*buf++);
1847     p->header.serviceId = (temp&0xffff);
1848     p->header.spare = temp>>16;
1849     /* Note: top 16 bits of this last word are the security checksum */
1850 }
1851
1852 void rxi_PrepareSendPacket(call, p, last)
1853     register struct rx_call *call;
1854     register struct rx_packet *p;
1855     register int last;
1856 {
1857     register struct rx_connection *conn = call->conn;
1858     int i, j;
1859     ssize_t len;        /* len must be a signed type; it can go negative */
1860
1861     p->acked = 0;
1862     p->header.cid = (conn->cid | call->channel);
1863     p->header.serviceId = conn->serviceId;
1864     p->header.securityIndex = conn->securityIndex;
1865     p->header.callNumber = *call->callNumber;
1866     p->header.seq = call->tnext++;
1867     p->header.epoch = conn->epoch;
1868     p->header.type = RX_PACKET_TYPE_DATA;
1869     p->header.flags = 0;
1870     p->header.spare = 0;
1871     if (conn->type == RX_CLIENT_CONNECTION)
1872       p->header.flags |= RX_CLIENT_INITIATED;
1873
1874     if (last)
1875       p->header.flags |= RX_LAST_PACKET;
1876
1877     clock_Zero(&p->retryTime); /* Never yet transmitted */
1878     clock_Zero(&p->firstSent); /* Never yet transmitted */
1879     p->header.serial = 0;      /* Another way of saying never transmitted... */
1880     p->backoff = 0;
1881
1882     /* Now that we're sure this is the last data on the call, make sure
1883      * that the "length" and the sum of the iov_lens matches. */
1884     len = p->length + call->conn->securityHeaderSize;
1885
1886     for (i=1; i < p->niovecs && len > 0; i++) {
1887       len -=  p->wirevec[i].iov_len;
1888     }
1889     if (len > 0) {
1890       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1891     }
1892     else {
1893       /* Free any extra elements in the wirevec */
1894       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1895         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1896       }
1897       p->niovecs = i;
1898       p->wirevec[i-1].iov_len += len;
1899     }
1900     RXS_PreparePacket(conn->securityObject, call, p);
1901 }
1902
1903 /* Given an interface MTU size, calculate an adjusted MTU size that
1904  * will make efficient use of the RX buffers when the peer is sending
1905  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1906 int rxi_AdjustIfMTU(int mtu)
1907 {
1908     int adjMTU;
1909     int frags;
1910
1911     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1912     if (mtu <= adjMTU) {
1913         return mtu;
1914     }
1915     mtu -= adjMTU;
1916     if (mtu <= 0) {
1917         return adjMTU;
1918     }
1919     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1920     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1921 }
1922
1923 /* Given an interface MTU size, and the peer's advertised max receive
1924  * size, calculate an adjisted maxMTU size that makes efficient use
1925  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1926 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1927 {
1928     int maxMTU = mtu * rxi_nSendFrags;
1929     maxMTU = MIN(maxMTU, peerMaxMTU);
1930     return rxi_AdjustIfMTU(maxMTU);
1931 }
1932
1933 /* Given a packet size, figure out how many datagram packet will fit.
1934  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1935  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1936  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1937 int rxi_AdjustDgramPackets(int frags, int mtu)
1938 {
1939     int maxMTU;
1940     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1941         return 1;
1942     }
1943     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1944     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1945     /* subtract the size of the first and last packets */
1946     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1947     if (maxMTU < 0) {
1948         return 1;
1949     }
1950     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1951 }