src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #ifdef KERNEL
  11 #include "../afs/param.h"
  12 #include <afsconfig.h>
  13 #if defined(UKERNEL)
  14 #include "../afs/sysincludes.h"
  15 #include "../afs/afsincludes.h"
  16 #include "../rx/rx_kcommon.h"
  17 #include "../rx/rx_clock.h"
  18 #include "../rx/rx_queue.h"
  19 #include "../rx/rx_packet.h"
  20 #else /* defined(UKERNEL) */
  21 #include "../h/types.h"
  22 #ifndef AFS_LINUX20_ENV
  23 #include "../h/systm.h"
  24 #endif
  25 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  26 #include "../afs/sysincludes.h"
  27 #endif
  28 #include "../h/socket.h"
  29 #include "../netinet/in.h"
  30 #include "../afs/afs_osi.h"
  31 #include "../rx/rx_kmutex.h"
  32 #include "../rx/rx_clock.h"
  33 #include "../rx/rx_queue.h"
  34 #ifdef  AFS_SUN5_ENV
  35 #include <sys/sysmacros.h>
  36 #endif
  37 #include "../rx/rx_packet.h"
  38 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  39 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  40 #include "../sys/mount.h"   /* it gets pulled in by something later anyway */
  41 #endif
  42 #include "../h/mbuf.h"
  43 #endif
  44 #endif /* defined(UKERNEL) */
  45 #include "../rx/rx_globals.h"
  46 #else /* KERNEL */
  47 #include <afs/param.h>
  48 #include <afsconfig.h>
  49 #include "sys/types.h"
  50 #include <sys/stat.h>
  51 #include <errno.h>
  52 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  53 #ifdef AFS_NT40_ENV
  54 #include <winsock2.h>
  55 #else
  56 #include <sys/socket.h>
  57 #include <netinet/in.h>
  58 #endif /* AFS_NT40_ENV */
  59 #include "rx_xmit_nt.h"
  60 #include <stdlib.h>
  61 #else
  62 #include <sys/socket.h>
  63 #include <netinet/in.h>
  64 #endif
  65 #include "rx_clock.h"
  66 #include "rx.h"
  67 #include "rx_queue.h"
  68 #ifdef  AFS_SUN5_ENV
  69 #include <sys/sysmacros.h>
  70 #endif
  71 #include "rx_packet.h"
  72 #include "rx_globals.h"
  73 #include <lwp.h>
  74 #include "rx_internal.h"
  75 #endif /* KERNEL */
  76 #ifdef HAVE_STRING_H
  77 #include <string.h>
  78 #endif
  79 #ifdef HAVE_STRINGS_H
  80 #include <strings.h>
  81 #endif
  82 #ifdef HAVE_UNISTD_H
  83 #include <unistd.h>
  84 #endif
  85
  86
  87 #ifdef RX_LOCKS_DB
  88 /* rxdb_fileID is used to identify the lock location, along with line#. */
  89 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  90 #endif /* RX_LOCKS_DB */
  91 struct rx_packet *rx_mallocedP = 0;
  92
  93 extern char cml_version_number[];
  94 extern int (*rx_almostSent)();
  95
  96 void rxi_FreePacketNoLock(struct rx_packet *p);
  97 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
  98                                afs_int32 ahost, short aport, afs_int32 istack);
  99
 100 extern char cml_version_number[];
 101 extern int (*rx_almostSent)();
 102 /* some rules about packets:
 103  * 1.  When a packet is allocated, the final iov_buf contains room for
 104  * a security trailer, but iov_len masks that fact.  If the security
 105  * package wants to add the trailer, it may do so, and then extend
 106  * iov_len appropriately.  For this reason, packet's niovecs and
 107  * iov_len fields should be accurate before calling PreparePacket.
 108 */
 109
 110 /* Preconditions:
 111  *        all packet buffers (iov_base) are integral multiples of
 112  *        the word size.
 113  *        offset is an integral multiple of the word size.
 114  */
 115 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 116 {
 117   unsigned int i;
 118   size_t l;
 119   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 120     if (l + packet->wirevec[i].iov_len > offset) {
 121       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 122     }
 123     l += packet->wirevec[i].iov_len;
 124   }
 125
 126   return 0;
 127 }
 128
 129 /* Preconditions:
 130  *        all packet buffers (iov_base) are integral multiples of the word size.
 131  *        offset is an integral multiple of the word size.
 132  */
 133 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 134 {
 135   unsigned int i;
 136   size_t l;
 137   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 138     if (l + packet->wirevec[i].iov_len > offset) {
 139       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 140           data;
 141       return 0;
 142     }
 143     l += packet->wirevec[i].iov_len;
 144   }
 145
 146   return 0;
 147 }
 148
 149 /* Preconditions:
 150  *        all packet buffers (iov_base) are integral multiples of the
 151  *        word size.
 152  *        offset is an integral multiple of the word size.
 153  * Packet Invariants:
 154  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 155  */
 156 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 157                         int resid, char *out)
 158 {
 159   unsigned int i, j, l, r;
 160   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 161     if (l + packet->wirevec[i].iov_len > offset) {
 162       break;
 163     }
 164     l += packet->wirevec[i].iov_len;
 165   }
 166
 167   /* i is the iovec which contains the first little bit of data in which we
 168    * are interested.  l is the total length of everything prior to this iovec.
 169    * j is the number of bytes we can safely copy out of this iovec.
 170    */
 171   r = resid;
 172   while ((resid > 0) && (i < packet->niovecs)) {
 173     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 174     bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
 175     resid -= j;
 176     l += packet->wirevec[i].iov_len;
 177     i++;
 178   }
 179
 180   return (resid ? (r - resid) : r);
 181 }
 182
 183
 184 /* Preconditions:
 185  *        all packet buffers (iov_base) are integral multiples of the
 186  *        word size.
 187  *        offset is an integral multiple of the word size.
 188  */
 189 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 190                          char *in)
 191 {
 192   int i, j, l, r;
 193   char * b;
 194
 195   for (l=0, i=1; i < packet->niovecs; i++ ) {
 196     if (l + packet->wirevec[i].iov_len > offset) {
 197       break;
 198     }
 199     l += packet->wirevec[i].iov_len;
 200   }
 201
 202   /* i is the iovec which contains the first little bit of data in which we
 203    * are interested.  l is the total length of everything prior to this iovec.
 204    * j is the number of bytes we can safely copy out of this iovec.
 205    */
 206   r = resid;
 207   while ((resid > 0) && (i < RX_MAXWVECS)) {
 208     if (i >= packet->niovecs)
 209       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 210         break;
 211
 212     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 213     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 214     bcopy (in, b, j);
 215     resid -= j;
 216     l += packet->wirevec[i].iov_len;
 217     i++;
 218   }
 219
 220   return (resid ? (r - resid) : r);
 221 }
 222
 223 static struct rx_packet * allocCBuf(int class)
 224 {
 225   struct rx_packet *c;
 226 #ifndef KERNEL
 227   extern void rxi_MorePacketsNoLock();
 228 #endif /* !KERNEL */
 229   SPLVAR;
 230
 231   NETPRI;
 232   MUTEX_ENTER(&rx_freePktQ_lock);
 233
 234 #ifdef KERNEL
 235   if (rxi_OverQuota(class)) {
 236     c = NULL;
 237     rxi_NeedMorePackets = TRUE;
 238     MUTEX_ENTER(&rx_stats_mutex);
 239     switch(class) {
 240         case RX_PACKET_CLASS_RECEIVE:
 241             rx_stats.receivePktAllocFailures++;
 242             break;
 243         case RX_PACKET_CLASS_SEND:
 244             rx_stats.sendPktAllocFailures++;
 245             break;
 246         case RX_PACKET_CLASS_SPECIAL:
 247             rx_stats.specialPktAllocFailures++;
 248             break;
 249         case RX_PACKET_CLASS_RECV_CBUF:
 250             rx_stats.receiveCbufPktAllocFailures++;
 251             break;
 252         case RX_PACKET_CLASS_SEND_CBUF:
 253             rx_stats.sendCbufPktAllocFailures++;
 254             break;
 255     }
 256     MUTEX_EXIT(&rx_stats_mutex);
 257     goto done;
 258   }
 259
 260   if (queue_IsEmpty(&rx_freePacketQueue)) {
 261     c = NULL;
 262     rxi_NeedMorePackets = TRUE;
 263     goto done;
 264   }
 265 #else /* KERNEL */
 266   if (queue_IsEmpty(&rx_freePacketQueue)) {
 267     rxi_MorePacketsNoLock(rx_initSendWindow);
 268   }
 269 #endif /* KERNEL */
 270
 271   rx_nFreePackets--;
 272   c = queue_First(&rx_freePacketQueue, rx_packet);
 273   queue_Remove(c);
 274   if (c->header.flags != RX_FREE_PACKET)
 275     osi_Panic("rxi_AllocPacket: packet not free\n");
 276   c->header.flags = 0;
 277
 278 #ifdef KERNEL
 279  done:
 280 #endif
 281   MUTEX_EXIT(&rx_freePktQ_lock);
 282
 283   USERPRI;
 284   return c;
 285 }
 286
 287 /*
 288  * Free a packet currently used as a continuation buffer
 289  */
 290 void rxi_freeCBuf(struct rx_packet *c)
 291 {
 292   extern void rxi_PacketsUnWait();
 293   SPLVAR;
 294
 295   NETPRI;
 296   MUTEX_ENTER(&rx_freePktQ_lock);
 297
 298   rxi_FreePacketNoLock(c);
 299   /* Wakeup anyone waiting for packets */
 300   rxi_PacketsUnWait();
 301
 302   MUTEX_EXIT(&rx_freePktQ_lock);
 303   USERPRI;
 304 }
 305
 306 /* this one is kind of awful.
 307  * In rxkad, the packet has been all shortened, and everything, ready for
 308  * sending.  All of a sudden, we discover we need some of that space back.
 309  * This isn't terribly general, because it knows that the packets are only
 310  * rounded up to the EBS (userdata + security header).
 311  */
 312 int rxi_RoundUpPacket(p, nb)
 313      struct rx_packet * p;
 314      unsigned int nb;
 315 {
 316   int i;
 317   i = p->niovecs - 1;
 318   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 319     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 320       p->wirevec[i].iov_len += nb;
 321       return 0;
 322     }
 323   }
 324   else {
 325     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 326       p->wirevec[i].iov_len += nb;
 327       return 0;
 328     }
 329   }
 330
 331 return 0;
 332 }
 333 /* get sufficient space to store nb bytes of data (or more), and hook
 334  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 335  * returns the number of bytes >0 which it failed to come up with.
 336  * Don't need to worry about locking on packet, since only
 337  * one thread can manipulate one at a time. Locking on continution
 338  * packets is handled by allocCBuf */
 339 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 340 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 341 {
 342   int i;
 343
 344   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 345       register struct rx_packet *cb;
 346       if ((cb = allocCBuf(class))) {
 347           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 348           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 349           nb -= RX_CBUFFERSIZE;
 350           p->length += RX_CBUFFERSIZE;
 351           p->niovecs++;
 352       }
 353       else break;
 354   }
 355
 356   return nb;
 357 }
 358
 359 /* Add more packet buffers */
 360 void rxi_MorePackets(int apackets)
 361 {
 362   extern void rxi_PacketsUnWait();
 363   struct rx_packet *p, *e;
 364   int getme;
 365   SPLVAR;
 366
 367   getme = apackets * sizeof(struct rx_packet);
 368   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 369
 370   PIN(p, getme);        /* XXXXX */
 371   bzero((char *)p, getme);
 372   NETPRI;
 373   AFS_RXGLOCK();
 374   MUTEX_ENTER(&rx_freePktQ_lock);
 375
 376   for (e = p + apackets; p<e; p++) {
 377     p->wirevec[0].iov_base = (char *) (p->wirehead);
 378     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 379     p->wirevec[1].iov_base = (char *) (p->localdata);
 380     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 381     p->header.flags = RX_FREE_PACKET;
 382     p->niovecs = 2;
 383
 384     queue_Append(&rx_freePacketQueue, p);
 385   }
 386   rx_nFreePackets += apackets;
 387   rxi_NeedMorePackets = FALSE;
 388   rxi_PacketsUnWait();
 389
 390   AFS_RXGUNLOCK();
 391   MUTEX_EXIT(&rx_freePktQ_lock);
 392   USERPRI;
 393 }
 394
 395 #ifndef KERNEL
 396 /* Add more packet buffers */
 397 void rxi_MorePacketsNoLock(int apackets)
 398 {
 399   extern void rxi_PacketsUnWait();
 400   struct rx_packet *p, *e;
 401   int getme;
 402
 403   /* allocate enough packets that 1/4 of the packets will be able
 404    * to hold maximal amounts of data */
 405   apackets += (apackets/4)
 406               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 407   getme = apackets * sizeof(struct rx_packet);
 408   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 409
 410   bzero((char *)p, getme);
 411
 412   for (e = p + apackets; p<e; p++) {
 413     p->wirevec[0].iov_base = (char *) (p->wirehead);
 414     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 415     p->wirevec[1].iov_base = (char *) (p->localdata);
 416     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 417     p->header.flags = RX_FREE_PACKET;
 418     p->niovecs = 2;
 419
 420     queue_Append(&rx_freePacketQueue, p);
 421   }
 422   rx_nFreePackets += apackets;
 423   rxi_NeedMorePackets = FALSE;
 424   rxi_PacketsUnWait();
 425 }
 426 #endif /* !KERNEL */
 427
 428 void rxi_FreeAllPackets(void)
 429 {
 430   /* must be called at proper interrupt level, etcetera */
 431   /* MTUXXX need to free all Packets */
 432   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 433   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 434 }
 435
 436 /* Allocate more packets iff we need more continuation buffers */
 437 /* In kernel, can't page in memory with interrupts disabled, so we
 438  * don't use the event mechanism. */
 439 void rx_CheckPackets()
 440 {
 441   if (rxi_NeedMorePackets) {
 442     rxi_MorePackets(rx_initSendWindow);
 443   }
 444 }
 445
 446 /* In the packet freeing routine below, the assumption is that
 447    we want all of the packets to be used equally frequently, so that we
 448    don't get packet buffers paging out.  It would be just as valid to
 449    assume that we DO want them to page out if not many are being used.
 450    In any event, we assume the former, and append the packets to the end
 451    of the free list.  */
 452 /* This explanation is bogus.  The free list doesn't remain in any kind of
 453    useful order for afs_int32: the packets in use get pretty much randomly scattered
 454    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 455    must be stored so that packets which are adjacent in memory are adjacent in the
 456    free list.  An array springs rapidly to mind.
 457    */
 458
 459 /* Actually free the packet p. */
 460 void rxi_FreePacketNoLock(struct rx_packet *p)
 461 {
 462   dpf(("Free %x\n", p));
 463
 464   if (p->header.flags & RX_FREE_PACKET)
 465     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 466   rx_nFreePackets++;
 467   p->header.flags = RX_FREE_PACKET;
 468   queue_Append(&rx_freePacketQueue, p);
 469 }
 470
 471 int rxi_FreeDataBufsNoLock(p, first)
 472      struct rx_packet * p;
 473      int first;
 474 {
 475   struct iovec *iov, *end;
 476
 477   if (first != 1)          /* MTUXXX */
 478       osi_Panic("FreeDataBufs 1: first must be 1");
 479   iov = &p->wirevec[1];
 480   end = iov + (p->niovecs-1);
 481   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 482         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 483   for (iov++ ; iov < end ; iov++) {
 484     if (!iov->iov_base)
 485         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 486     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 487   }
 488   p->length = 0;
 489   p->niovecs = 0;
 490
 491   return 0;
 492 }
 493
 494 int rxi_nBadIovecs = 0;
 495
 496 /* rxi_RestoreDataBufs
 497  *
 498  * Restore the correct sizes to the iovecs. Called when reusing a packet
 499  * for reading off the wire.
 500  */
 501 void rxi_RestoreDataBufs(struct rx_packet *p)
 502 {
 503     int i;
 504     struct iovec *iov = &p->wirevec[2];
 505
 506     p->wirevec[0].iov_base = (char *) (p->wirehead);
 507     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 508     p->wirevec[1].iov_base = (char *) (p->localdata);
 509     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 510
 511     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 512         if (!iov->iov_base) {
 513             rxi_nBadIovecs ++;
 514             p->niovecs = i;
 515             break;
 516         }
 517         iov->iov_len = RX_CBUFFERSIZE;
 518     }
 519 }
 520
 521 int rxi_TrimDataBufs(p, first)
 522      struct rx_packet * p;
 523      int first;
 524 {
 525   extern void rxi_PacketsUnWait();
 526   int length;
 527   struct iovec *iov, *end;
 528   SPLVAR;
 529
 530   if (first != 1)
 531       osi_Panic("TrimDataBufs 1: first must be 1");
 532
 533   /* Skip over continuation buffers containing message data */
 534   iov = &p->wirevec[2];
 535   end = iov + (p->niovecs-2);
 536   length = p->length - p->wirevec[1].iov_len;
 537   for (; iov < end && length > 0 ; iov++) {
 538     if (!iov->iov_base)
 539         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 540     length -= iov->iov_len;
 541   }
 542
 543   /* iov now points to the first empty data buffer. */
 544   if (iov >= end)
 545     return 0;
 546
 547   NETPRI;
 548   MUTEX_ENTER(&rx_freePktQ_lock);
 549
 550   for (; iov < end ; iov++) {
 551     if (!iov->iov_base)
 552         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 553     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 554     p->niovecs--;
 555   }
 556   rxi_PacketsUnWait();
 557
 558   MUTEX_EXIT(&rx_freePktQ_lock);
 559   USERPRI;
 560
 561   return 0;
 562 }
 563
 564 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 565  * remove it yourself first if you call this routine. */
 566 void rxi_FreePacket(struct rx_packet *p)
 567 {
 568   extern void rxi_PacketsUnWait();
 569   SPLVAR;
 570
 571   NETPRI;
 572   MUTEX_ENTER(&rx_freePktQ_lock);
 573
 574   rxi_FreeDataBufsNoLock(p,1);
 575   rxi_FreePacketNoLock(p);
 576   /* Wakeup anyone waiting for packets */
 577   rxi_PacketsUnWait();
 578
 579   MUTEX_EXIT(&rx_freePktQ_lock);
 580   USERPRI;
 581 }
 582
 583
 584 /* rxi_AllocPacket sets up p->length so it reflects the number of
 585  * bytes in the packet at this point, **not including** the header.
 586  * The header is absolutely necessary, besides, this is the way the
 587  * length field is usually used */
 588 struct rx_packet *rxi_AllocPacketNoLock(class)
 589      int class;
 590 {
 591   register struct rx_packet *p;
 592
 593 #ifdef KERNEL
 594   if (rxi_OverQuota(class)) {
 595     rxi_NeedMorePackets = TRUE;
 596     MUTEX_ENTER(&rx_stats_mutex);
 597     switch(class) {
 598         case RX_PACKET_CLASS_RECEIVE:
 599             rx_stats.receivePktAllocFailures++;
 600             break;
 601         case RX_PACKET_CLASS_SEND:
 602             rx_stats.sendPktAllocFailures++;
 603             break;
 604         case RX_PACKET_CLASS_SPECIAL:
 605             rx_stats.specialPktAllocFailures++;
 606             break;
 607         case RX_PACKET_CLASS_RECV_CBUF:
 608             rx_stats.receiveCbufPktAllocFailures++;
 609             break;
 610         case RX_PACKET_CLASS_SEND_CBUF:
 611             rx_stats.sendCbufPktAllocFailures++;
 612             break;
 613     }
 614     MUTEX_EXIT(&rx_stats_mutex);
 615     return (struct rx_packet *) 0;
 616   }
 617 #endif /* KERNEL */
 618
 619   MUTEX_ENTER(&rx_stats_mutex);
 620   rx_stats.packetRequests++;
 621   MUTEX_EXIT(&rx_stats_mutex);
 622
 623 #ifdef KERNEL
 624   if (queue_IsEmpty(&rx_freePacketQueue))
 625     osi_Panic("rxi_AllocPacket error");
 626 #else /* KERNEL */
 627   if (queue_IsEmpty(&rx_freePacketQueue))
 628     rxi_MorePacketsNoLock(rx_initSendWindow);
 629 #endif /* KERNEL */
 630
 631   rx_nFreePackets--;
 632   p = queue_First(&rx_freePacketQueue, rx_packet);
 633   if (p->header.flags != RX_FREE_PACKET)
 634     osi_Panic("rxi_AllocPacket: packet not free\n");
 635
 636   dpf(("Alloc %x, class %d\n", p, class));
 637
 638   queue_Remove(p);
 639   p->header.flags = 0;
 640
 641   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 642    * order to truncate outbound packets.  In the near future, may need
 643    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 644    */
 645   p->wirevec[0].iov_base = (char *) (p->wirehead);
 646   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 647   p->wirevec[1].iov_base = (char *) (p->localdata);
 648   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 649   p->niovecs = 2;
 650   p->length = RX_FIRSTBUFFERSIZE;
 651   return p;
 652 }
 653
 654 struct rx_packet *rxi_AllocPacket(class)
 655      int class;
 656 {
 657     register struct rx_packet *p;
 658
 659     MUTEX_ENTER(&rx_freePktQ_lock);
 660     p = rxi_AllocPacketNoLock(class);
 661     MUTEX_EXIT(&rx_freePktQ_lock);
 662     return p;
 663 }
 664
 665 /* This guy comes up with as many buffers as it {takes,can get} given
 666  * the MTU for this call. It also sets the packet length before
 667  * returning.  caution: this is often called at NETPRI
 668  * Called with call locked.
 669  */
 670 struct rx_packet *rxi_AllocSendPacket(call, want)
 671 register struct rx_call *call;
 672 int want;
 673 {
 674     register struct rx_packet *p = (struct rx_packet *) 0;
 675     register int mud;
 676     register unsigned delta;
 677
 678     SPLVAR;
 679     mud = call->MTU - RX_HEADER_SIZE;
 680     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 681         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 682
 683     while (!(call->error)) {
 684       MUTEX_ENTER(&rx_freePktQ_lock);
 685       /* if an error occurred, or we get the packet we want, we're done */
 686       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 687         MUTEX_EXIT(&rx_freePktQ_lock);
 688
 689         want += delta;
 690         want = MIN(want, mud);
 691
 692         if ((unsigned) want > p->length)
 693           (void) rxi_AllocDataBuf(p, (want - p->length),
 694                                   RX_PACKET_CLASS_SEND_CBUF);
 695
 696         if ((unsigned) p->length > mud)
 697             p->length = mud;
 698
 699         if (delta >= p->length) {
 700           rxi_FreePacket(p);
 701           p = NULL;
 702         } else {
 703             p->length -= delta;
 704         }
 705         break;
 706       }
 707
 708       /* no error occurred, and we didn't get a packet, so we sleep.
 709        * At this point, we assume that packets will be returned
 710        * sooner or later, as packets are acknowledged, and so we
 711        * just wait.  */
 712       NETPRI;
 713       call->flags |= RX_CALL_WAIT_PACKETS;
 714       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 715       MUTEX_EXIT(&call->lock);
 716       rx_waitingForPackets = 1;
 717
 718 #ifdef  RX_ENABLE_LOCKS
 719       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 720 #else
 721       osi_rxSleep(&rx_waitingForPackets);
 722 #endif
 723       MUTEX_EXIT(&rx_freePktQ_lock);
 724       MUTEX_ENTER(&call->lock);
 725       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 726       call->flags &= ~RX_CALL_WAIT_PACKETS;
 727       USERPRI;
 728     }
 729
 730     return p;
 731 }
 732
 733 #ifndef KERNEL
 734
 735 /* count the number of used FDs */
 736 static int CountFDs(amax)
 737 register int amax; {
 738     struct stat tstat;
 739     register int i, code;
 740     register int count;
 741
 742     count = 0;
 743     for(i=0;i<amax;i++) {
 744         code = fstat(i, &tstat);
 745         if (code == 0) count++;
 746     }
 747     return count;
 748 }
 749
 750 #else /* KERNEL */
 751
 752 #define CountFDs(amax) amax
 753
 754 #endif /* KERNEL */
 755
 756 #if !defined(KERNEL) || defined(UKERNEL)
 757
 758 /* This function reads a single packet from the interface into the
 759  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 760  * (host,port) of the sender are stored in the supplied variables, and
 761  * the data length of the packet is stored in the packet structure.
 762  * The header is decoded. */
 763 int rxi_ReadPacket(socket, p, host, port)
 764      int socket;
 765      register struct rx_packet *p;
 766      afs_uint32 *host;
 767      u_short *port;
 768 {
 769     struct sockaddr_in from;
 770     int nbytes;
 771     afs_int32 rlen;
 772     register afs_int32 tlen, savelen;
 773     struct msghdr msg;
 774     rx_computelen(p, tlen);
 775     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 776
 777     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 778     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 779                                  * it once in order to avoid races.  */
 780     tlen = rlen - tlen;
 781     if (tlen > 0) {
 782       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 783       if (tlen >0) {
 784         tlen = rlen - tlen;
 785       }
 786       else tlen = rlen;
 787     }
 788     else tlen = rlen;
 789
 790    /* Extend the last iovec for padding, it's just to make sure that the
 791     * read doesn't return more data than we expect, and is done to get around
 792     * our problems caused by the lack of a length field in the rx header.
 793     * Use the extra buffer that follows the localdata in each packet
 794     * structure. */
 795     savelen = p->wirevec[p->niovecs].iov_len;
 796     p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
 797
 798     bzero((char *)&msg, sizeof(msg));
 799     msg.msg_name = (char *) &from;
 800     msg.msg_namelen = sizeof(struct sockaddr_in);
 801     msg.msg_iov = p->wirevec;
 802     msg.msg_iovlen = p->niovecs;
 803     nbytes = rxi_Recvmsg(socket, &msg, 0);
 804
 805    /* restore the vec to its correct state */
 806     p->wirevec[p->niovecs].iov_len = savelen;
 807
 808     p->length = (nbytes - RX_HEADER_SIZE);
 809     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 810       if (nbytes > 0)
 811         rxi_MorePackets(rx_initSendWindow);
 812 #ifndef AFS_NT40_ENV
 813       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 814         MUTEX_ENTER(&rx_stats_mutex);
 815         rx_stats.noPacketOnRead++;
 816         MUTEX_EXIT(&rx_stats_mutex);
 817       }
 818 #endif
 819       else {
 820         MUTEX_ENTER(&rx_stats_mutex);
 821         rx_stats.bogusPacketOnRead++;
 822         rx_stats.bogusHost = from.sin_addr.s_addr;
 823         MUTEX_EXIT(&rx_stats_mutex);
 824         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 825              from.sin_port,nbytes));
 826       }
 827       return  0;
 828     }
 829     else {
 830       /* Extract packet header. */
 831       rxi_DecodePacketHeader(p);
 832
 833       *host = from.sin_addr.s_addr;
 834       *port = from.sin_port;
 835       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 836         struct rx_peer *peer;
 837         MUTEX_ENTER(&rx_stats_mutex);
 838         rx_stats.packetsRead[p->header.type-1]++;
 839         MUTEX_EXIT(&rx_stats_mutex);
 840         /*
 841          * Try to look up this peer structure.  If it doesn't exist,
 842          * don't create a new one -
 843          * we don't keep count of the bytes sent/received if a peer
 844          * structure doesn't already exist.
 845          *
 846          * The peer/connection cleanup code assumes that there is 1 peer
 847          * per connection.  If we actually created a peer structure here
 848          * and this packet was an rxdebug packet, the peer structure would
 849          * never be cleaned up.
 850          */
 851         peer = rxi_FindPeer(*host, *port, 0, 0);
 852         if (peer) {
 853             MUTEX_ENTER(&peer->peer_lock);
 854             hadd32(peer->bytesReceived, p->length);
 855             MUTEX_EXIT(&peer->peer_lock);
 856         }
 857       }
 858
 859       /* Free any empty packet buffers at the end of this packet */
 860       rxi_TrimDataBufs(p, 1);
 861
 862       return  1;
 863     }
 864 }
 865
 866 #endif /* !KERNEL || UKERNEL */
 867
 868 /* This function splits off the first packet in a jumbo packet.
 869  * As of AFS 3.5, jumbograms contain more than one fixed size
 870  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 871  * last packet header. All packets (except the last) are padded to
 872  * fall on RX_CBUFFERSIZE boundaries.
 873  * HACK: We store the length of the first n-1 packets in the
 874  * last two pad bytes. */
 875
 876 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
 877      register struct rx_packet *p;
 878      afs_int32 host;
 879      short port;
 880      int first;
 881 {
 882     struct rx_packet *np;
 883     struct rx_jumboHeader *jp;
 884     int niov, i;
 885     struct iovec *iov;
 886     int length;
 887     afs_uint32 temp;
 888
 889     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 890      * bytes in length. All but the first packet are preceded by
 891      * an abbreviated four byte header. The length of the last packet
 892      * is calculated from the size of the jumbogram. */
 893     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 894
 895     if ((int)p->length < length) {
 896         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 897         return NULL;
 898     }
 899     niov = p->niovecs - 2;
 900     if (niov < 1) {
 901         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 902         return NULL;
 903     }
 904     iov = &p->wirevec[2];
 905     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 906
 907     /* Get a pointer to the abbreviated packet header */
 908     jp = (struct rx_jumboHeader *)
 909          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 910
 911     /* Set up the iovecs for the next packet */
 912     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 913     np->wirevec[0].iov_len = sizeof(struct rx_header);
 914     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 915     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 916     np->niovecs = niov+1;
 917     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 918         np->wirevec[i] = *iov;
 919     }
 920     np->length = p->length - length;
 921     p->length = RX_JUMBOBUFFERSIZE;
 922     p->niovecs = 2;
 923
 924     /* Convert the jumbo packet header to host byte order */
 925     temp = ntohl(*(afs_uint32 *)jp);
 926     jp->flags = (u_char)(temp >> 24);
 927     jp->cksum = (u_short)(temp);
 928
 929     /* Fill in the packet header */
 930     np->header = p->header;
 931     np->header.serial = p->header.serial + 1;
 932     np->header.seq = p->header.seq + 1;
 933     np->header.flags = jp->flags;
 934     np->header.spare = jp->cksum;
 935
 936     return np;
 937 }
 938
 939 #ifndef KERNEL
 940 /* Send a udp datagram */
 941 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
 942     osi_socket socket;
 943     char * addr;
 944     struct iovec *dvec;
 945     int nvecs;
 946     int length;
 947     int istack;
 948 {
 949     struct msghdr msg;
 950
 951     memset(&msg, 0, sizeof(msg));
 952     msg.msg_iov = dvec;
 953     msg.msg_iovlen = nvecs;
 954     msg.msg_name = addr;
 955     msg.msg_namelen = sizeof(struct sockaddr_in);
 956
 957     rxi_Sendmsg(socket, &msg, 0);
 958
 959     return 0;
 960 }
 961 #elif !defined(UKERNEL)
 962 /* osi_NetSend is defined in afs/afs_osinet.c
 963  * message receipt is done in rxk_input or rx_put.
 964  */
 965
 966 #ifdef AFS_SUN5_ENV
 967 /*
 968  * Copy an mblock to the contiguous area pointed to by cp.
 969  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 970  * but it doesn't really.
 971  * Returns the number of bytes not transferred.
 972  * The message is NOT changed.
 973  */
 974 static int cpytoc(mp, off, len, cp)
 975     mblk_t *mp;
 976     register int off, len;
 977     register char * cp;
 978 {
 979     register int n;
 980
 981     for (;mp && len > 0; mp = mp->b_cont) {
 982         if (mp->b_datap->db_type != M_DATA) {
 983             return -1;
 984         }
 985         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 986         bcopy((char *)mp->b_rptr, cp, n);
 987         cp += n;
 988         len -= n;
 989         mp->b_rptr += n;
 990     }
 991     return (len);
 992 }
 993
 994 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 995  * but it doesn't really.
 996  * This sucks, anyway, do it like m_cpy.... below
 997  */
 998 static int cpytoiovec(mp, off, len, iovs, niovs)
 999     mblk_t *mp;
1000     int off, len, niovs;
1001     register struct iovec *iovs;
1002 {
1003     register int m,n,o,t,i;
1004
1005     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1006         if (mp->b_datap->db_type != M_DATA) {
1007             return -1;
1008         }
1009         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1010         len -= n;
1011         while (n) {
1012           if (!t) {
1013             o=0;
1014             i++;
1015             t = iovs[i].iov_len;
1016           }
1017           m = MIN(n,t);
1018           bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1019           mp->b_rptr += m;
1020           o += m;
1021           t -= m;
1022           n -= m;
1023         }
1024     }
1025     return (len);
1026 }
1027 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1028 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1029 #else
1030 #if !defined(AFS_LINUX20_ENV)
1031 static int m_cpytoiovec(m, off, len, iovs, niovs)
1032      struct mbuf *m;
1033      int off, len, niovs;
1034      struct iovec iovs[];
1035 {
1036   caddr_t p1, p2;
1037   unsigned int l1, l2, i, t;
1038
1039   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1040     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1041
1042   while (off && m)
1043     if (m->m_len <= off) {
1044       off -= m->m_len;
1045       m = m->m_next;
1046       continue;
1047     } else
1048       break;
1049
1050   if (m == NULL)
1051     return len;
1052
1053   p1 = mtod(m, caddr_t)+off;
1054   l1 = m->m_len - off;
1055   i = 0;
1056   p2 = iovs[0].iov_base;
1057   l2 = iovs[0].iov_len;
1058
1059   while (len) {
1060     t = MIN(l1, MIN(l2, (unsigned int)len));
1061     bcopy (p1, p2, t);
1062     p1 += t;    p2 += t;
1063     l1 -= t;    l2 -= t;
1064     len -= t;
1065     if (!l1) {
1066       m = m->m_next;
1067       if (!m)
1068         break;
1069       p1 = mtod(m, caddr_t);
1070       l1 = m->m_len;
1071     }
1072     if (!l2) {
1073       if (++i >= niovs)
1074         break;
1075       p2 = iovs[i].iov_base;
1076       l2 = iovs[i].iov_len;
1077     }
1078
1079   }
1080
1081 return len;
1082 }
1083 #endif /* LINUX */
1084 #endif /* AFS_SUN5_ENV */
1085
1086 #if !defined(AFS_LINUX20_ENV)
1087 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1088 #ifdef  AFS_SUN5_ENV
1089 mblk_t *amb;
1090 #else
1091 struct mbuf *amb;
1092 #endif
1093 void (*free)();
1094 struct rx_packet *phandle;
1095 int hdr_len, data_len;
1096 {
1097   register int code;
1098
1099   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1100   (*free)(amb);
1101
1102   return code;
1103 }
1104 #endif /* LINUX */
1105 #endif /*KERNEL && !UKERNEL*/
1106
1107
1108 /* send a response to a debug packet */
1109
1110 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1111   osi_socket asocket;
1112   afs_int32 ahost;
1113   short aport;
1114   register struct rx_packet *ap;
1115   int istack;
1116 {
1117     struct rx_debugIn tin;
1118     afs_int32 tl;
1119     struct rx_serverQueueEntry *np, *nqe;
1120
1121     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1122     /* all done with packet, now set length to the truth, so we can
1123      * reuse this packet */
1124     rx_computelen(ap, ap->length);
1125
1126     tin.type = ntohl(tin.type);
1127     tin.index = ntohl(tin.index);
1128     switch(tin.type) {
1129         case RX_DEBUGI_GETSTATS: {
1130             struct rx_debugStats tstat;
1131
1132             /* get basic stats */
1133             bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1134             tstat.version = RX_DEBUGI_VERSION;
1135 #ifndef RX_ENABLE_LOCKS
1136             tstat.waitingForPackets = rx_waitingForPackets;
1137 #endif
1138             tstat.nFreePackets = htonl(rx_nFreePackets);
1139             tstat.callsExecuted = htonl(rxi_nCalls);
1140             tstat.packetReclaims = htonl(rx_packetReclaims);
1141             tstat.usedFDs = CountFDs(64);
1142             tstat.nWaiting = htonl(rx_nWaiting);
1143             queue_Count( &rx_idleServerQueue, np, nqe,
1144                                 rx_serverQueueEntry, tstat.idleThreads);
1145             tstat.idleThreads = htonl(tstat.idleThreads);
1146             tl = sizeof(struct rx_debugStats) - ap->length;
1147             if (tl > 0)
1148               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1149
1150             if (tl <= 0) {
1151               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1152               ap->length = sizeof(struct rx_debugStats);
1153               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1154               rx_computelen(ap, ap->length);
1155             }
1156             break;
1157         }
1158
1159         case RX_DEBUGI_GETALLCONN:
1160         case RX_DEBUGI_GETCONN: {
1161             int i, j;
1162             register struct rx_connection *tc;
1163             struct rx_call *tcall;
1164             struct rx_debugConn tconn;
1165             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1166
1167
1168             tl = sizeof(struct rx_debugConn) - ap->length;
1169             if (tl > 0)
1170               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1171             if (tl > 0)
1172               return ap;
1173
1174             bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1175             /* get N'th (maybe) "interesting" connection info */
1176             for(i=0;i<rx_hashTableSize;i++) {
1177 #if !defined(KERNEL)
1178                 /* the time complexity of the algorithm used here
1179                  * exponentially increses with the number of connections.
1180                  */
1181 #ifdef AFS_PTHREAD_ENV
1182                 pthread_yield();
1183 #else
1184                 (void) IOMGR_Poll();
1185 #endif
1186 #endif
1187                 MUTEX_ENTER(&rx_connHashTable_lock);
1188                 /* We might be slightly out of step since we are not
1189                  * locking each call, but this is only debugging output.
1190                  */
1191                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1192                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1193                         tconn.host = tc->peer->host;
1194                         tconn.port = tc->peer->port;
1195                         tconn.cid = htonl(tc->cid);
1196                         tconn.epoch = htonl(tc->epoch);
1197                         tconn.serial = htonl(tc->serial);
1198                         for(j=0;j<RX_MAXCALLS;j++) {
1199                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1200                             if ((tcall=tc->call[j])) {
1201                                 tconn.callState[j] = tcall->state;
1202                                 tconn.callMode[j] = tcall->mode;
1203                                 tconn.callFlags[j] = tcall->flags;
1204                                 if (queue_IsNotEmpty(&tcall->rq))
1205                                     tconn.callOther[j] |= RX_OTHER_IN;
1206                                 if (queue_IsNotEmpty(&tcall->tq))
1207                                     tconn.callOther[j] |= RX_OTHER_OUT;
1208                             }
1209                             else tconn.callState[j] = RX_STATE_NOTINIT;
1210                         }
1211
1212                         tconn.natMTU = htonl(tc->peer->natMTU);
1213                         tconn.error = htonl(tc->error);
1214                         tconn.flags = tc->flags;
1215                         tconn.type = tc->type;
1216                         tconn.securityIndex = tc->securityIndex;
1217                         if (tc->securityObject) {
1218                             RXS_GetStats (tc->securityObject, tc,
1219                                           &tconn.secStats);
1220 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1221 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1222                             DOHTONL(flags);
1223                             DOHTONL(expires);
1224                             DOHTONL(packetsReceived);
1225                             DOHTONL(packetsSent);
1226                             DOHTONL(bytesReceived);
1227                             DOHTONL(bytesSent);
1228                             for (i=0;
1229                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1230                                  i++)
1231                                 DOHTONS(spares[i]);
1232                             for (i=0;
1233                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1234                                  i++)
1235                                 DOHTONL(sparel[i]);
1236                         }
1237
1238                         MUTEX_EXIT(&rx_connHashTable_lock);
1239                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1240                         tl = ap->length;
1241                         ap->length = sizeof(struct rx_debugConn);
1242                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1243                         ap->length = tl;
1244                         return ap;
1245                     }
1246                 }
1247                 MUTEX_EXIT(&rx_connHashTable_lock);
1248             }
1249             /* if we make it here, there are no interesting packets */
1250             tconn.cid = htonl(0xffffffff); /* means end */
1251             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1252             tl = ap->length;
1253             ap->length = sizeof(struct rx_debugConn);
1254             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1255             ap->length = tl;
1256             break;
1257         }
1258
1259         /*
1260          * Pass back all the peer structures we have available
1261          */
1262
1263         case RX_DEBUGI_GETPEER: {
1264             int i;
1265             register struct rx_peer *tp;
1266             struct rx_debugPeer tpeer;
1267
1268
1269             tl = sizeof(struct rx_debugPeer) - ap->length;
1270             if (tl > 0)
1271               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1272             if (tl > 0)
1273               return ap;
1274
1275             bzero ((char *)&tpeer, sizeof(tpeer));
1276             for(i=0;i<rx_hashTableSize;i++) {
1277 #if !defined(KERNEL)
1278                 /* the time complexity of the algorithm used here
1279                  * exponentially increses with the number of peers.
1280                  *
1281                  * Yielding after processing each hash table entry
1282                  * and dropping rx_peerHashTable_lock.
1283                  * also increases the risk that we will miss a new
1284                  * entry - but we are willing to live with this
1285                  * limitation since this is meant for debugging only
1286                  */
1287 #ifdef AFS_PTHREAD_ENV
1288                 pthread_yield();
1289 #else
1290                 (void) IOMGR_Poll();
1291 #endif
1292 #endif
1293                 MUTEX_ENTER(&rx_peerHashTable_lock);
1294                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1295                     if (tin.index-- <= 0) {
1296                         tpeer.host = tp->host;
1297                         tpeer.port = tp->port;
1298                         tpeer.ifMTU = htons(tp->ifMTU);
1299                         tpeer.idleWhen = htonl(tp->idleWhen);
1300                         tpeer.refCount = htons(tp->refCount);
1301                         tpeer.burstSize = tp->burstSize;
1302                         tpeer.burst = tp->burst;
1303                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1304                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1305                         tpeer.rtt = htonl(tp->rtt);
1306                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1307                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1308                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1309                         tpeer.nSent = htonl(tp->nSent);
1310                         tpeer.reSends = htonl(tp->reSends);
1311                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1312                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1313                         tpeer.rateFlag = htonl(tp->rateFlag);
1314                         tpeer.natMTU = htons(tp->natMTU);
1315                         tpeer.maxMTU = htons(tp->maxMTU);
1316                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1317                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1318                         tpeer.MTU = htons(tp->MTU);
1319                         tpeer.cwind = htons(tp->cwind);
1320                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1321                         tpeer.congestSeq = htons(tp->congestSeq);
1322                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1323                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1324                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1325                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1326
1327                         MUTEX_EXIT(&rx_peerHashTable_lock);
1328                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1329                         tl = ap->length;
1330                         ap->length = sizeof(struct rx_debugPeer);
1331                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1332                         ap->length = tl;
1333                         return ap;
1334                     }
1335                 }
1336                 MUTEX_EXIT(&rx_peerHashTable_lock);
1337             }
1338             /* if we make it here, there are no interesting packets */
1339             tpeer.host = htonl(0xffffffff); /* means end */
1340             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1341             tl = ap->length;
1342             ap->length = sizeof(struct rx_debugPeer);
1343             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1344             ap->length = tl;
1345             break;
1346         }
1347
1348         case RX_DEBUGI_RXSTATS: {
1349             int i;
1350             afs_int32 *s;
1351
1352             tl = sizeof(rx_stats) - ap->length;
1353             if (tl > 0)
1354               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1355             if (tl > 0)
1356               return ap;
1357
1358             /* Since its all int32s convert to network order with a loop. */
1359             MUTEX_ENTER(&rx_stats_mutex);
1360             s = (afs_int32 *)&rx_stats;
1361             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1362                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1363
1364             tl = ap->length;
1365             ap->length = sizeof(rx_stats);
1366             MUTEX_EXIT(&rx_stats_mutex);
1367             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1368             ap->length = tl;
1369             break;
1370         }
1371
1372         default:
1373             /* error response packet */
1374             tin.type = htonl(RX_DEBUGI_BADTYPE);
1375             tin.index = tin.type;
1376             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1377             tl = ap->length;
1378             ap->length = sizeof(struct rx_debugIn);
1379             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1380             ap->length = tl;
1381             break;
1382     }
1383     return ap;
1384 }
1385
1386 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1387   osi_socket asocket;
1388   afs_int32 ahost;
1389   short aport;
1390   register struct rx_packet *ap;
1391   int istack;
1392 {
1393   afs_int32 tl;
1394         rx_packetwrite(ap, 0, 65, cml_version_number+4);
1395         tl = ap->length;
1396         ap->length = 65;
1397         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1398         ap->length = tl;
1399         return ap;
1400 }
1401
1402
1403 /* send a debug packet back to the sender */
1404 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1405                                afs_int32 ahost, short aport, afs_int32 istack)
1406 {
1407     struct sockaddr_in taddr;
1408     int i;
1409     int nbytes;
1410     int saven = 0;
1411     size_t savelen = 0;
1412 #ifdef KERNEL
1413     int waslocked = ISAFS_GLOCK();
1414 #endif
1415
1416     taddr.sin_family = AF_INET;
1417     taddr.sin_port = aport;
1418     taddr.sin_addr.s_addr = ahost;
1419
1420
1421     /* We need to trim the niovecs. */
1422     nbytes = apacket->length;
1423     for (i=1; i < apacket->niovecs; i++) {
1424       if (nbytes <= apacket->wirevec[i].iov_len) {
1425         savelen = apacket->wirevec[i].iov_len;
1426         saven = apacket->niovecs;
1427         apacket->wirevec[i].iov_len = nbytes;
1428         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1429       }
1430       else nbytes -= apacket->wirevec[i].iov_len;
1431     }
1432     AFS_RXGUNLOCK();
1433 #ifdef KERNEL
1434     if (waslocked) AFS_GUNLOCK();
1435 #endif
1436     /* debug packets are not reliably delivered, hence the cast below. */
1437     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1438                        apacket->length+RX_HEADER_SIZE, istack);
1439 #ifdef KERNEL
1440     if (waslocked) AFS_GLOCK();
1441 #endif
1442     AFS_RXGLOCK();
1443     if (saven) {  /* means we truncated the packet above. */
1444       apacket->wirevec[i-1].iov_len = savelen;
1445       apacket->niovecs = saven;
1446     }
1447
1448 }
1449
1450 /* Send the packet to appropriate destination for the specified
1451  * connection.  The header is first encoded and placed in the packet.
1452  */
1453 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1454                     int istack)
1455 {
1456 #if defined(KERNEL)
1457     int waslocked;
1458 #endif
1459     struct sockaddr_in addr;
1460     register struct rx_peer *peer = conn->peer;
1461     osi_socket socket;
1462 #ifdef RXDEBUG
1463     char deliveryType = 'S';
1464 #endif
1465     /* The address we're sending the packet to */
1466     addr.sin_family = AF_INET;
1467     addr.sin_port = peer->port;
1468     addr.sin_addr.s_addr = peer->host;
1469
1470     /* This stuff should be revamped, I think, so that most, if not
1471      * all, of the header stuff is always added here.  We could
1472      * probably do away with the encode/decode routines. XXXXX */
1473
1474     /* Stamp each packet with a unique serial number.  The serial
1475      * number is maintained on a connection basis because some types
1476      * of security may be based on the serial number of the packet,
1477      * and security is handled on a per authenticated-connection
1478      * basis. */
1479     /* Pre-increment, to guarantee no zero serial number; a zero
1480      * serial number means the packet was never sent. */
1481     MUTEX_ENTER(&conn->conn_data_lock);
1482     p->header.serial = ++conn->serial;
1483     MUTEX_EXIT(&conn->conn_data_lock);
1484     /* This is so we can adjust retransmit time-outs better in the face of
1485      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1486      */
1487     if (p->firstSerial == 0) {
1488        p->firstSerial = p->header.serial;
1489      }
1490
1491 #ifdef RXDEBUG
1492     /* If an output tracer function is defined, call it with the packet and
1493      * network address.  Note this function may modify its arguments. */
1494     if (rx_almostSent) {
1495         int drop = (*rx_almostSent) (p, &addr);
1496         /* drop packet if return value is non-zero? */
1497         if (drop) deliveryType = 'D';   /* Drop the packet */
1498     }
1499 #endif
1500
1501     /* Get network byte order header */
1502     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1503                                  * touch ALL the fields */
1504
1505     /* Send the packet out on the same socket that related packets are being
1506      * received on */
1507     socket = (conn->type == RX_CLIENT_CONNECTION
1508               ? rx_socket : conn->service->socket);
1509
1510 #ifdef RXDEBUG
1511     /* Possibly drop this packet,  for testing purposes */
1512     if ((deliveryType == 'D') ||
1513         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1514          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1515         deliveryType = 'D';             /* Drop the packet */
1516     }
1517     else {
1518         deliveryType = 'S';             /* Send the packet */
1519 #endif /* RXDEBUG */
1520
1521         /* Loop until the packet is sent.  We'd prefer just to use a
1522          * blocking socket, but unfortunately the interface doesn't
1523          * allow us to have the socket block in send mode, and not
1524          * block in receive mode */
1525         AFS_RXGUNLOCK();
1526 #ifdef KERNEL
1527         waslocked = ISAFS_GLOCK();
1528         if (waslocked) AFS_GUNLOCK();
1529 #endif
1530         if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1531                         p->length+RX_HEADER_SIZE, istack)){
1532           /* send failed, so let's hurry up the resend, eh? */
1533           MUTEX_ENTER(&rx_stats_mutex);
1534           rx_stats.netSendFailures++;
1535           MUTEX_EXIT(&rx_stats_mutex);
1536           p->retryTime = p->timeSent;  /* resend it very soon */
1537           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1538         }
1539 #ifdef KERNEL
1540         if (waslocked) AFS_GLOCK();
1541 #endif
1542         AFS_RXGLOCK();
1543 #ifdef RXDEBUG
1544     }
1545     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1546          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1547          peer->host, peer->port, p->header.serial, p->header.epoch,
1548          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1549          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1550 #endif
1551     MUTEX_ENTER(&rx_stats_mutex);
1552     rx_stats.packetsSent[p->header.type-1]++;
1553     MUTEX_EXIT(&rx_stats_mutex);
1554     MUTEX_ENTER(&peer->peer_lock);
1555     hadd32(peer->bytesSent, p->length);
1556     MUTEX_EXIT(&peer->peer_lock);
1557 }
1558
1559 /* Send a list of packets to appropriate destination for the specified
1560  * connection.  The headers are first encoded and placed in the packets.
1561  */
1562 void rxi_SendPacketList(struct rx_connection * conn,
1563                         struct rx_packet **list,
1564                         int len,
1565                         int istack)
1566 {
1567 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1568     int waslocked;
1569 #endif
1570     struct sockaddr_in addr;
1571     register struct rx_peer *peer = conn->peer;
1572     osi_socket socket;
1573     struct rx_packet *p = NULL;
1574     struct iovec wirevec[RX_MAXIOVECS];
1575     int i, length;
1576     afs_uint32 serial;
1577     afs_uint32 temp;
1578     struct rx_jumboHeader *jp;
1579 #ifdef RXDEBUG
1580     char deliveryType = 'S';
1581 #endif
1582     /* The address we're sending the packet to */
1583     addr.sin_family = AF_INET;
1584     addr.sin_port = peer->port;
1585     addr.sin_addr.s_addr = peer->host;
1586
1587     if (len+1 > RX_MAXIOVECS) {
1588         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1589     }
1590
1591     /*
1592      * Stamp the packets in this jumbogram with consecutive serial numbers
1593      */
1594     MUTEX_ENTER(&conn->conn_data_lock);
1595     serial = conn->serial;
1596     conn->serial += len;
1597     MUTEX_EXIT(&conn->conn_data_lock);
1598
1599
1600     /* This stuff should be revamped, I think, so that most, if not
1601      * all, of the header stuff is always added here.  We could
1602      * probably do away with the encode/decode routines. XXXXX */
1603
1604     jp = NULL;
1605     length = RX_HEADER_SIZE;
1606     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1607     wirevec[0].iov_len = RX_HEADER_SIZE;
1608     for (i = 0 ; i < len ; i++) {
1609         p = list[i];
1610
1611         /* The whole 3.5 jumbogram scheme relies on packets fitting
1612          * in a single packet buffer. */
1613         if (p->niovecs > 2) {
1614             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1615         }
1616
1617         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1618          * in this chunk.  */
1619         if (i < len-1) {
1620             if (p->length != RX_JUMBOBUFFERSIZE) {
1621                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1622             }
1623             p->header.flags |= RX_JUMBO_PACKET;
1624             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1625             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1626         } else {
1627             wirevec[i+1].iov_len = p->length;
1628             length += p->length;
1629         }
1630         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1631         if (jp != NULL) {
1632             /* Convert jumbo packet header to network byte order */
1633             temp = (afs_uint32)(p->header.flags) << 24;
1634             temp |= (afs_uint32)(p->header.spare);
1635             *(afs_uint32 *)jp = htonl(temp);
1636         }
1637         jp = (struct rx_jumboHeader *)
1638              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1639
1640         /* Stamp each packet with a unique serial number.  The serial
1641          * number is maintained on a connection basis because some types
1642          * of security may be based on the serial number of the packet,
1643          * and security is handled on a per authenticated-connection
1644          * basis. */
1645         /* Pre-increment, to guarantee no zero serial number; a zero
1646          * serial number means the packet was never sent. */
1647         p->header.serial = ++serial;
1648         /* This is so we can adjust retransmit time-outs better in the face of
1649          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1650          */
1651         if (p->firstSerial == 0) {
1652            p->firstSerial = p->header.serial;
1653         }
1654
1655 #ifdef RXDEBUG
1656         /* If an output tracer function is defined, call it with the packet and
1657          * network address.  Note this function may modify its arguments. */
1658         if (rx_almostSent) {
1659             int drop = (*rx_almostSent) (p, &addr);
1660             /* drop packet if return value is non-zero? */
1661             if (drop) deliveryType = 'D';       /* Drop the packet */
1662         }
1663 #endif
1664
1665         /* Get network byte order header */
1666         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1667                                      * touch ALL the fields */
1668     }
1669
1670     /* Send the packet out on the same socket that related packets are being
1671      * received on */
1672     socket = (conn->type == RX_CLIENT_CONNECTION
1673               ? rx_socket : conn->service->socket);
1674
1675 #ifdef RXDEBUG
1676     /* Possibly drop this packet,  for testing purposes */
1677     if ((deliveryType == 'D') ||
1678         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1679          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1680         deliveryType = 'D';             /* Drop the packet */
1681     }
1682     else {
1683         deliveryType = 'S';             /* Send the packet */
1684 #endif /* RXDEBUG */
1685
1686         /* Loop until the packet is sent.  We'd prefer just to use a
1687          * blocking socket, but unfortunately the interface doesn't
1688          * allow us to have the socket block in send mode, and not
1689          * block in receive mode */
1690         AFS_RXGUNLOCK();
1691 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1692         waslocked = ISAFS_GLOCK();
1693         if (!istack && waslocked) AFS_GUNLOCK();
1694 #endif
1695         if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1696           /* send failed, so let's hurry up the resend, eh? */
1697           MUTEX_ENTER(&rx_stats_mutex);
1698           rx_stats.netSendFailures++;
1699           MUTEX_EXIT(&rx_stats_mutex);
1700           for (i = 0 ; i < len ; i++) {
1701             p = list[i];
1702             p->retryTime = p->timeSent;  /* resend it very soon */
1703             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1704           }
1705         }
1706 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1707         if (!istack && waslocked) AFS_GLOCK();
1708 #endif
1709         AFS_RXGLOCK();
1710 #ifdef RXDEBUG
1711     }
1712     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1713          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1714          peer->host, peer->port, p->header.serial, p->header.epoch,
1715          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1716          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1717 #endif
1718     MUTEX_ENTER(&rx_stats_mutex);
1719     rx_stats.packetsSent[p->header.type-1]++;
1720     MUTEX_EXIT(&rx_stats_mutex);
1721     MUTEX_ENTER(&peer->peer_lock);
1722     hadd32(peer->bytesSent, p->length);
1723     MUTEX_EXIT(&peer->peer_lock);
1724 }
1725
1726
1727 /* Send a "special" packet to the peer connection.  If call is
1728  * specified, then the packet is directed to a specific call channel
1729  * associated with the connection, otherwise it is directed to the
1730  * connection only. Uses optionalPacket if it is supplied, rather than
1731  * allocating a new packet buffer.  Nbytes is the length of the data
1732  * portion of the packet.  If data is non-null, nbytes of data are
1733  * copied into the packet.  Type is the type of the packet, as defined
1734  * in rx.h.  Bug: there's a lot of duplication between this and other
1735  * routines.  This needs to be cleaned up. */
1736 struct rx_packet *
1737 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1738     register struct rx_call *call;
1739     register struct rx_connection *conn;
1740     struct rx_packet *optionalPacket;
1741     int type;
1742     char *data;
1743     int nbytes, istack;
1744 {
1745     /* Some of the following stuff should be common code for all
1746      * packet sends (it's repeated elsewhere) */
1747     register struct rx_packet *p;
1748     unsigned int i = 0;
1749     int savelen = 0, saven = 0;
1750     int channel, callNumber;
1751     if (call) {
1752         channel = call->channel;
1753         callNumber = *call->callNumber;
1754         /* BUSY packets refer to the next call on this connection */
1755         if (type == RX_PACKET_TYPE_BUSY) {
1756             callNumber++;
1757         }
1758     } else {
1759         channel = 0;
1760         callNumber = 0;
1761     }
1762     p = optionalPacket;
1763     if (!p) {
1764         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1765         if (!p) osi_Panic("rxi_SendSpecial failure");
1766     }
1767
1768     if (nbytes != -1)
1769       p->length = nbytes;
1770     else
1771       nbytes = p->length;
1772     p->header.serviceId = conn->serviceId;
1773     p->header.securityIndex = conn->securityIndex;
1774     p->header.cid = (conn->cid | channel);
1775     p->header.callNumber = callNumber;
1776     p->header.seq = 0;
1777     p->header.epoch = conn->epoch;
1778     p->header.type = type;
1779     p->header.flags = 0;
1780     if (conn->type == RX_CLIENT_CONNECTION)
1781        p->header.flags |= RX_CLIENT_INITIATED;
1782     if (data)
1783       rx_packetwrite(p, 0, nbytes, data);
1784
1785     for (i=1; i < p->niovecs; i++) {
1786       if (nbytes <= p->wirevec[i].iov_len) {
1787         savelen = p->wirevec[i].iov_len;
1788         saven = p->niovecs;
1789         p->wirevec[i].iov_len = nbytes;
1790         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1791       }
1792       else nbytes -= p->wirevec[i].iov_len;
1793     }
1794
1795     if (call) rxi_Send(call, p, istack);
1796     else rxi_SendPacket(conn, p, istack);
1797     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1798       /* really need to do this, but it seems safer this way, given that  */
1799       /* sneaky optionalPacket... */
1800       p->wirevec[i-1].iov_len = savelen;
1801       p->niovecs = saven;
1802     }
1803     if (!optionalPacket) rxi_FreePacket(p);
1804     return optionalPacket;
1805 }
1806
1807
1808 /* Encode the packet's header (from the struct header in the packet to
1809  * the net byte order representation in the wire representation of the
1810  * packet, which is what is actually sent out on the wire) */
1811 void rxi_EncodePacketHeader(p)
1812 register struct rx_packet *p;
1813 {
1814     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1815
1816     bzero((char *)buf, RX_HEADER_SIZE);
1817     *buf++ = htonl(p->header.epoch);
1818     *buf++ = htonl(p->header.cid);
1819     *buf++ = htonl(p->header.callNumber);
1820     *buf++ = htonl(p->header.seq);
1821     *buf++ = htonl(p->header.serial);
1822     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1823                    | (((afs_uint32)p->header.flags)<<16)
1824                    | (p->header.userStatus<<8) | p->header.securityIndex);
1825     /* Note: top 16 bits of this next word were reserved */
1826     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1827 }
1828
1829 /* Decode the packet's header (from net byte order to a struct header) */
1830 void rxi_DecodePacketHeader(p)
1831 register struct rx_packet *p;
1832 {
1833     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1834     afs_uint32 temp;
1835
1836     p->header.epoch = ntohl(*buf++);
1837     p->header.cid = ntohl(*buf++);
1838     p->header.callNumber = ntohl(*buf++);
1839     p->header.seq = ntohl(*buf++);
1840     p->header.serial = ntohl(*buf++);
1841     temp = ntohl(*buf++);
1842     /* C will truncate byte fields to bytes for me */
1843     p->header.type = temp>>24;
1844     p->header.flags = temp>>16;
1845     p->header.userStatus = temp>>8;
1846     p->header.securityIndex = temp>>0;
1847     temp = ntohl(*buf++);
1848     p->header.serviceId = (temp&0xffff);
1849     p->header.spare = temp>>16;
1850     /* Note: top 16 bits of this last word are the security checksum */
1851 }
1852
1853 void rxi_PrepareSendPacket(call, p, last)
1854     register struct rx_call *call;
1855     register struct rx_packet *p;
1856     register int last;
1857 {
1858     register struct rx_connection *conn = call->conn;
1859     int i, j;
1860     ssize_t len;        /* len must be a signed type; it can go negative */
1861
1862     p->acked = 0;
1863     p->header.cid = (conn->cid | call->channel);
1864     p->header.serviceId = conn->serviceId;
1865     p->header.securityIndex = conn->securityIndex;
1866     p->header.callNumber = *call->callNumber;
1867     p->header.seq = call->tnext++;
1868     p->header.epoch = conn->epoch;
1869     p->header.type = RX_PACKET_TYPE_DATA;
1870     p->header.flags = 0;
1871     p->header.spare = 0;
1872     if (conn->type == RX_CLIENT_CONNECTION)
1873       p->header.flags |= RX_CLIENT_INITIATED;
1874
1875     if (last)
1876       p->header.flags |= RX_LAST_PACKET;
1877
1878     clock_Zero(&p->retryTime); /* Never yet transmitted */
1879     clock_Zero(&p->firstSent); /* Never yet transmitted */
1880     p->header.serial = 0;      /* Another way of saying never transmitted... */
1881     p->backoff = 0;
1882
1883     /* Now that we're sure this is the last data on the call, make sure
1884      * that the "length" and the sum of the iov_lens matches. */
1885     len = p->length + call->conn->securityHeaderSize;
1886
1887     for (i=1; i < p->niovecs && len > 0; i++) {
1888       len -=  p->wirevec[i].iov_len;
1889     }
1890     if (len > 0) {
1891       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1892     }
1893     else {
1894       /* Free any extra elements in the wirevec */
1895       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1896         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1897       }
1898       p->niovecs = i;
1899       p->wirevec[i-1].iov_len += len;
1900     }
1901     RXS_PreparePacket(conn->securityObject, call, p);
1902 }
1903
1904 /* Given an interface MTU size, calculate an adjusted MTU size that
1905  * will make efficient use of the RX buffers when the peer is sending
1906  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1907 int rxi_AdjustIfMTU(int mtu)
1908 {
1909     int adjMTU;
1910     int frags;
1911
1912     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1913     if (mtu <= adjMTU) {
1914         return mtu;
1915     }
1916     mtu -= adjMTU;
1917     if (mtu <= 0) {
1918         return adjMTU;
1919     }
1920     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1921     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1922 }
1923
1924 /* Given an interface MTU size, and the peer's advertised max receive
1925  * size, calculate an adjisted maxMTU size that makes efficient use
1926  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1927 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1928 {
1929     int maxMTU = mtu * rxi_nSendFrags;
1930     maxMTU = MIN(maxMTU, peerMaxMTU);
1931     return rxi_AdjustIfMTU(maxMTU);
1932 }
1933
1934 /* Given a packet size, figure out how many datagram packet will fit.
1935  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1936  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1937  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1938 int rxi_AdjustDgramPackets(int frags, int mtu)
1939 {
1940     int maxMTU;
1941     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1942         return 1;
1943     }
1944     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1945     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1946     /* subtract the size of the first and last packets */
1947     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1948     if (maxMTU < 0) {
1949         return 1;
1950     }
1951     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1952 }