src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "../afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID("$Header$");
  18
  19 #ifdef KERNEL
  20 #if defined(UKERNEL)
  21 #include "../afs/sysincludes.h"
  22 #include "../afs/afsincludes.h"
  23 #include "../rx/rx_kcommon.h"
  24 #include "../rx/rx_clock.h"
  25 #include "../rx/rx_queue.h"
  26 #include "../rx/rx_packet.h"
  27 #else /* defined(UKERNEL) */
  28 #include "../h/types.h"
  29 #ifndef AFS_LINUX20_ENV
  30 #include "../h/systm.h"
  31 #endif
  32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  33 #include "../afs/sysincludes.h"
  34 #endif
  35 #include "../h/socket.h"
  36 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  37 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  38 #include "../sys/mount.h"   /* it gets pulled in by something later anyway */
  39 #endif
  40 #include "../h/mbuf.h"
  41 #endif
  42 #include "../netinet/in.h"
  43 #include "../afs/afs_osi.h"
  44 #include "../rx/rx_kmutex.h"
  45 #include "../rx/rx_clock.h"
  46 #include "../rx/rx_queue.h"
  47 #ifdef  AFS_SUN5_ENV
  48 #include <sys/sysmacros.h>
  49 #endif
  50 #include "../rx/rx_packet.h"
  51 #endif /* defined(UKERNEL) */
  52 #include "../rx/rx_globals.h"
  53 #else /* KERNEL */
  54 #include "sys/types.h"
  55 #include <sys/stat.h>
  56 #include <errno.h>
  57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  58 #ifdef AFS_NT40_ENV
  59 #include <winsock2.h>
  60 #else
  61 #include <sys/socket.h>
  62 #include <netinet/in.h>
  63 #endif /* AFS_NT40_ENV */
  64 #include "rx_xmit_nt.h"
  65 #include <stdlib.h>
  66 #else
  67 #include <sys/socket.h>
  68 #include <netinet/in.h>
  69 #endif
  70 #include "rx_clock.h"
  71 #include "rx.h"
  72 #include "rx_queue.h"
  73 #ifdef  AFS_SUN5_ENV
  74 #include <sys/sysmacros.h>
  75 #endif
  76 #include "rx_packet.h"
  77 #include "rx_globals.h"
  78 #include <lwp.h>
  79 #ifdef HAVE_STRING_H
  80 #include <string.h>
  81 #else
  82 #ifdef HAVE_STRINGS_H
  83 #include <strings.h>
  84 #endif
  85 #endif
  86 #ifdef HAVE_UNISTD_H
  87 #include <unistd.h>
  88 #endif
  89 #endif /* KERNEL */
  90
  91 #ifdef RX_LOCKS_DB
  92 /* rxdb_fileID is used to identify the lock location, along with line#. */
  93 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  94 #endif /* RX_LOCKS_DB */
  95 struct rx_packet *rx_mallocedP = 0;
  96
  97 extern char cml_version_number[];
  98 extern int (*rx_almostSent)();
  99
 100 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 101                                afs_int32 ahost, short aport, afs_int32 istack);
 102
 103 /* some rules about packets:
 104  * 1.  When a packet is allocated, the final iov_buf contains room for
 105  * a security trailer, but iov_len masks that fact.  If the security
 106  * package wants to add the trailer, it may do so, and then extend
 107  * iov_len appropriately.  For this reason, packet's niovecs and
 108  * iov_len fields should be accurate before calling PreparePacket.
 109 */
 110
 111 /* Preconditions:
 112  *        all packet buffers (iov_base) are integral multiples of
 113  *        the word size.
 114  *        offset is an integral multiple of the word size.
 115  */
 116 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 117 {
 118   unsigned int i;
 119   size_t l;
 120   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 121     if (l + packet->wirevec[i].iov_len > offset) {
 122       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 123     }
 124     l += packet->wirevec[i].iov_len;
 125   }
 126
 127   return 0;
 128 }
 129
 130 /* Preconditions:
 131  *        all packet buffers (iov_base) are integral multiples of the word size.
 132  *        offset is an integral multiple of the word size.
 133  */
 134 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 135 {
 136   unsigned int i;
 137   size_t l;
 138   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 139     if (l + packet->wirevec[i].iov_len > offset) {
 140       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 141           data;
 142       return 0;
 143     }
 144     l += packet->wirevec[i].iov_len;
 145   }
 146
 147   return 0;
 148 }
 149
 150 /* Preconditions:
 151  *        all packet buffers (iov_base) are integral multiples of the
 152  *        word size.
 153  *        offset is an integral multiple of the word size.
 154  * Packet Invariants:
 155  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 156  */
 157 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 158         int resid, char *out)
 159 {
 160   unsigned int i, j, l, r;
 161   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 162     if (l + packet->wirevec[i].iov_len > offset) {
 163       break;
 164     }
 165     l += packet->wirevec[i].iov_len;
 166   }
 167
 168   /* i is the iovec which contains the first little bit of data in which we
 169    * are interested.  l is the total length of everything prior to this iovec.
 170    * j is the number of bytes we can safely copy out of this iovec.
 171    */
 172   r = resid;
 173   while ((resid > 0) && (i < packet->niovecs)) {
 174     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 175     memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 176     resid -= j;
 177     l += packet->wirevec[i].iov_len;
 178     i++;
 179   }
 180
 181   return (resid ? (r - resid) : r);
 182 }
 183
 184
 185 /* Preconditions:
 186  *        all packet buffers (iov_base) are integral multiples of the
 187  *        word size.
 188  *        offset is an integral multiple of the word size.
 189  */
 190 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 191         char *in)
 192 {
 193   int i, j, l, r;
 194   char * b;
 195
 196   for (l=0, i=1; i < packet->niovecs; i++ ) {
 197     if (l + packet->wirevec[i].iov_len > offset) {
 198       break;
 199     }
 200     l += packet->wirevec[i].iov_len;
 201   }
 202
 203   /* i is the iovec which contains the first little bit of data in which we
 204    * are interested.  l is the total length of everything prior to this iovec.
 205    * j is the number of bytes we can safely copy out of this iovec.
 206    */
 207   r = resid;
 208   while ((resid > 0) && (i < RX_MAXWVECS)) {
 209     if (i >= packet->niovecs)
 210       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 211         break;
 212
 213     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 214     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 215     memcpy(b, in, j);
 216     resid -= j;
 217     l += packet->wirevec[i].iov_len;
 218     i++;
 219   }
 220
 221   return (resid ? (r - resid) : r);
 222 }
 223
 224 static struct rx_packet *allocCBuf(int class)
 225 {
 226   struct rx_packet *c;
 227   SPLVAR;
 228
 229   NETPRI;
 230   MUTEX_ENTER(&rx_freePktQ_lock);
 231
 232 #ifdef KERNEL
 233   if (rxi_OverQuota(class)) {
 234     c = NULL;
 235     rxi_NeedMorePackets = TRUE;
 236     MUTEX_ENTER(&rx_stats_mutex);
 237     switch(class) {
 238         case RX_PACKET_CLASS_RECEIVE:
 239             rx_stats.receivePktAllocFailures++;
 240             break;
 241         case RX_PACKET_CLASS_SEND:
 242             rx_stats.sendPktAllocFailures++;
 243             break;
 244         case RX_PACKET_CLASS_SPECIAL:
 245             rx_stats.specialPktAllocFailures++;
 246             break;
 247         case RX_PACKET_CLASS_RECV_CBUF:
 248             rx_stats.receiveCbufPktAllocFailures++;
 249             break;
 250         case RX_PACKET_CLASS_SEND_CBUF:
 251             rx_stats.sendCbufPktAllocFailures++;
 252             break;
 253     }
 254     MUTEX_EXIT(&rx_stats_mutex);
 255     goto done;
 256   }
 257
 258   if (queue_IsEmpty(&rx_freePacketQueue)) {
 259     c = NULL;
 260     rxi_NeedMorePackets = TRUE;
 261     goto done;
 262   }
 263 #else /* KERNEL */
 264   if (queue_IsEmpty(&rx_freePacketQueue)) {
 265     rxi_MorePacketsNoLock(rx_initSendWindow);
 266   }
 267 #endif /* KERNEL */
 268
 269   rx_nFreePackets--;
 270   c = queue_First(&rx_freePacketQueue, rx_packet);
 271   queue_Remove(c);
 272   if (!(c->flags & RX_PKTFLAG_FREE))
 273     osi_Panic("rxi_AllocPacket: packet not free\n");
 274   c->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 275   c->header.flags = 0;
 276
 277 #ifdef KERNEL
 278  done:
 279 #endif
 280   MUTEX_EXIT(&rx_freePktQ_lock);
 281
 282   USERPRI;
 283   return c;
 284 }
 285
 286 /*
 287  * Free a packet currently used as a continuation buffer
 288  */
 289 void rxi_freeCBuf(struct rx_packet *c)
 290 {
 291   SPLVAR;
 292
 293   NETPRI;
 294   MUTEX_ENTER(&rx_freePktQ_lock);
 295
 296   rxi_FreePacketNoLock(c);
 297   /* Wakeup anyone waiting for packets */
 298   rxi_PacketsUnWait();
 299
 300   MUTEX_EXIT(&rx_freePktQ_lock);
 301   USERPRI;
 302 }
 303
 304 /* this one is kind of awful.
 305  * In rxkad, the packet has been all shortened, and everything, ready for
 306  * sending.  All of a sudden, we discover we need some of that space back.
 307  * This isn't terribly general, because it knows that the packets are only
 308  * rounded up to the EBS (userdata + security header).
 309  */
 310 int rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 311 {
 312   int i;
 313   i = p->niovecs - 1;
 314   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 315     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 316       p->wirevec[i].iov_len += nb;
 317       return 0;
 318     }
 319   }
 320   else {
 321     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 322       p->wirevec[i].iov_len += nb;
 323       return 0;
 324     }
 325   }
 326
 327 return 0;
 328 }
 329 /* get sufficient space to store nb bytes of data (or more), and hook
 330  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 331  * returns the number of bytes >0 which it failed to come up with.
 332  * Don't need to worry about locking on packet, since only
 333  * one thread can manipulate one at a time. Locking on continution
 334  * packets is handled by allocCBuf */
 335 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 336 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 337 {
 338   int i;
 339
 340   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 341       register struct rx_packet *cb;
 342       if ((cb = allocCBuf(class))) {
 343           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 344           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 345           nb -= RX_CBUFFERSIZE;
 346           p->length += RX_CBUFFERSIZE;
 347           p->niovecs++;
 348       }
 349       else break;
 350   }
 351
 352   return nb;
 353 }
 354
 355 /* Add more packet buffers */
 356 void rxi_MorePackets(int apackets)
 357 {
 358   struct rx_packet *p, *e;
 359   int getme;
 360   SPLVAR;
 361
 362   getme = apackets * sizeof(struct rx_packet);
 363   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 364
 365   PIN(p, getme);        /* XXXXX */
 366   memset((char *)p, 0, getme);
 367   NETPRI;
 368   AFS_RXGLOCK();
 369   MUTEX_ENTER(&rx_freePktQ_lock);
 370
 371   for (e = p + apackets; p<e; p++) {
 372     p->wirevec[0].iov_base = (char *) (p->wirehead);
 373     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 374     p->wirevec[1].iov_base = (char *) (p->localdata);
 375     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 376     p->flags |= RX_PKTFLAG_FREE;
 377     p->niovecs = 2;
 378
 379     queue_Append(&rx_freePacketQueue, p);
 380   }
 381   rx_nFreePackets += apackets;
 382   rxi_NeedMorePackets = FALSE;
 383   rxi_PacketsUnWait();
 384
 385   AFS_RXGUNLOCK();
 386   MUTEX_EXIT(&rx_freePktQ_lock);
 387   USERPRI;
 388 }
 389
 390 #ifndef KERNEL
 391 /* Add more packet buffers */
 392 void rxi_MorePacketsNoLock(int apackets)
 393 {
 394   struct rx_packet *p, *e;
 395   int getme;
 396
 397   /* allocate enough packets that 1/4 of the packets will be able
 398    * to hold maximal amounts of data */
 399   apackets += (apackets/4)
 400               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 401   getme = apackets * sizeof(struct rx_packet);
 402   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 403
 404   memset((char *)p, 0, getme);
 405
 406   for (e = p + apackets; p<e; p++) {
 407     p->wirevec[0].iov_base = (char *) (p->wirehead);
 408     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 409     p->wirevec[1].iov_base = (char *) (p->localdata);
 410     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 411     p->flags |= RX_PKTFLAG_FREE;
 412     p->niovecs = 2;
 413
 414     queue_Append(&rx_freePacketQueue, p);
 415   }
 416   rx_nFreePackets += apackets;
 417   rxi_NeedMorePackets = FALSE;
 418   rxi_PacketsUnWait();
 419 }
 420 #endif /* !KERNEL */
 421
 422 void rxi_FreeAllPackets(void)
 423 {
 424   /* must be called at proper interrupt level, etcetera */
 425   /* MTUXXX need to free all Packets */
 426   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 427   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 428 }
 429
 430 /* Allocate more packets iff we need more continuation buffers */
 431 /* In kernel, can't page in memory with interrupts disabled, so we
 432  * don't use the event mechanism. */
 433 void rx_CheckPackets(void)
 434 {
 435   if (rxi_NeedMorePackets) {
 436     rxi_MorePackets(rx_initSendWindow);
 437   }
 438 }
 439
 440 /* In the packet freeing routine below, the assumption is that
 441    we want all of the packets to be used equally frequently, so that we
 442    don't get packet buffers paging out.  It would be just as valid to
 443    assume that we DO want them to page out if not many are being used.
 444    In any event, we assume the former, and append the packets to the end
 445    of the free list.  */
 446 /* This explanation is bogus.  The free list doesn't remain in any kind of
 447    useful order for afs_int32: the packets in use get pretty much randomly scattered
 448    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 449    must be stored so that packets which are adjacent in memory are adjacent in the
 450    free list.  An array springs rapidly to mind.
 451    */
 452
 453 /* Actually free the packet p. */
 454 void rxi_FreePacketNoLock(struct rx_packet *p)
 455 {
 456   dpf(("Free %x\n", p));
 457
 458   if (p->flags & RX_PKTFLAG_FREE)
 459     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 460   rx_nFreePackets++;
 461   p->flags |= RX_PKTFLAG_FREE;
 462   queue_Append(&rx_freePacketQueue, p);
 463 }
 464
 465 int rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 466 {
 467   struct iovec *iov, *end;
 468
 469   if (first != 1)          /* MTUXXX */
 470       osi_Panic("FreeDataBufs 1: first must be 1");
 471   iov = &p->wirevec[1];
 472   end = iov + (p->niovecs-1);
 473   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 474         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 475   for (iov++ ; iov < end ; iov++) {
 476     if (!iov->iov_base)
 477         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 478     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 479   }
 480   p->length = 0;
 481   p->niovecs = 0;
 482
 483   return 0;
 484 }
 485
 486 int rxi_nBadIovecs = 0;
 487
 488 /* rxi_RestoreDataBufs
 489  *
 490  * Restore the correct sizes to the iovecs. Called when reusing a packet
 491  * for reading off the wire.
 492  */
 493 void rxi_RestoreDataBufs(struct rx_packet *p)
 494 {
 495     int i;
 496     struct iovec *iov = &p->wirevec[2];
 497
 498     p->wirevec[0].iov_base = (char *) (p->wirehead);
 499     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 500     p->wirevec[1].iov_base = (char *) (p->localdata);
 501     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 502
 503     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 504         if (!iov->iov_base) {
 505             rxi_nBadIovecs ++;
 506             p->niovecs = i;
 507             break;
 508         }
 509         iov->iov_len = RX_CBUFFERSIZE;
 510     }
 511 }
 512
 513 int rxi_TrimDataBufs(struct rx_packet *p, int first)
 514 {
 515   int length;
 516   struct iovec *iov, *end;
 517   SPLVAR;
 518
 519   if (first != 1)
 520       osi_Panic("TrimDataBufs 1: first must be 1");
 521
 522   /* Skip over continuation buffers containing message data */
 523   iov = &p->wirevec[2];
 524   end = iov + (p->niovecs-2);
 525   length = p->length - p->wirevec[1].iov_len;
 526   for (; iov < end && length > 0 ; iov++) {
 527     if (!iov->iov_base)
 528         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 529     length -= iov->iov_len;
 530   }
 531
 532   /* iov now points to the first empty data buffer. */
 533   if (iov >= end)
 534     return 0;
 535
 536   NETPRI;
 537   MUTEX_ENTER(&rx_freePktQ_lock);
 538
 539   for (; iov < end ; iov++) {
 540     if (!iov->iov_base)
 541         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 542     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 543     p->niovecs--;
 544   }
 545   rxi_PacketsUnWait();
 546
 547   MUTEX_EXIT(&rx_freePktQ_lock);
 548   USERPRI;
 549
 550   return 0;
 551 }
 552
 553 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 554  * remove it yourself first if you call this routine. */
 555 void rxi_FreePacket(struct rx_packet *p)
 556 {
 557   SPLVAR;
 558
 559   NETPRI;
 560   MUTEX_ENTER(&rx_freePktQ_lock);
 561
 562   rxi_FreeDataBufsNoLock(p,1);
 563   rxi_FreePacketNoLock(p);
 564   /* Wakeup anyone waiting for packets */
 565   rxi_PacketsUnWait();
 566
 567   MUTEX_EXIT(&rx_freePktQ_lock);
 568   USERPRI;
 569 }
 570
 571
 572 /* rxi_AllocPacket sets up p->length so it reflects the number of
 573  * bytes in the packet at this point, **not including** the header.
 574  * The header is absolutely necessary, besides, this is the way the
 575  * length field is usually used */
 576 struct rx_packet *rxi_AllocPacketNoLock(int class)
 577 {
 578   register struct rx_packet *p;
 579
 580 #ifdef KERNEL
 581   if (rxi_OverQuota(class)) {
 582     rxi_NeedMorePackets = TRUE;
 583     MUTEX_ENTER(&rx_stats_mutex);
 584     switch(class) {
 585         case RX_PACKET_CLASS_RECEIVE:
 586             rx_stats.receivePktAllocFailures++;
 587             break;
 588         case RX_PACKET_CLASS_SEND:
 589             rx_stats.sendPktAllocFailures++;
 590             break;
 591         case RX_PACKET_CLASS_SPECIAL:
 592             rx_stats.specialPktAllocFailures++;
 593             break;
 594         case RX_PACKET_CLASS_RECV_CBUF:
 595             rx_stats.receiveCbufPktAllocFailures++;
 596             break;
 597         case RX_PACKET_CLASS_SEND_CBUF:
 598             rx_stats.sendCbufPktAllocFailures++;
 599             break;
 600     }
 601     MUTEX_EXIT(&rx_stats_mutex);
 602     return (struct rx_packet *) 0;
 603   }
 604 #endif /* KERNEL */
 605
 606   MUTEX_ENTER(&rx_stats_mutex);
 607   rx_stats.packetRequests++;
 608   MUTEX_EXIT(&rx_stats_mutex);
 609
 610 #ifdef KERNEL
 611   if (queue_IsEmpty(&rx_freePacketQueue))
 612     osi_Panic("rxi_AllocPacket error");
 613 #else /* KERNEL */
 614   if (queue_IsEmpty(&rx_freePacketQueue))
 615     rxi_MorePacketsNoLock(rx_initSendWindow);
 616 #endif /* KERNEL */
 617
 618   rx_nFreePackets--;
 619   p = queue_First(&rx_freePacketQueue, rx_packet);
 620   if (!(p->flags & RX_PKTFLAG_FREE))
 621     osi_Panic("rxi_AllocPacket: packet not free\n");
 622
 623   dpf(("Alloc %x, class %d\n", p, class));
 624
 625   queue_Remove(p);
 626   p->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 627   p->header.flags = 0;
 628
 629   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 630    * order to truncate outbound packets.  In the near future, may need
 631    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 632    */
 633   p->wirevec[0].iov_base = (char *) (p->wirehead);
 634   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 635   p->wirevec[1].iov_base = (char *) (p->localdata);
 636   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 637   p->niovecs = 2;
 638   p->length = RX_FIRSTBUFFERSIZE;
 639   return p;
 640 }
 641
 642 struct rx_packet *rxi_AllocPacket(int class)
 643 {
 644     register struct rx_packet *p;
 645
 646     MUTEX_ENTER(&rx_freePktQ_lock);
 647     p = rxi_AllocPacketNoLock(class);
 648     MUTEX_EXIT(&rx_freePktQ_lock);
 649     return p;
 650 }
 651
 652 /* This guy comes up with as many buffers as it {takes,can get} given
 653  * the MTU for this call. It also sets the packet length before
 654  * returning.  caution: this is often called at NETPRI
 655  * Called with call locked.
 656  */
 657 struct rx_packet *rxi_AllocSendPacket(register struct rx_call *call, int want)
 658 {
 659     register struct rx_packet *p = (struct rx_packet *) 0;
 660     register int mud;
 661     register unsigned delta;
 662
 663     SPLVAR;
 664     mud = call->MTU - RX_HEADER_SIZE;
 665     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 666         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 667
 668     while (!(call->error)) {
 669       MUTEX_ENTER(&rx_freePktQ_lock);
 670       /* if an error occurred, or we get the packet we want, we're done */
 671       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 672         MUTEX_EXIT(&rx_freePktQ_lock);
 673
 674         want += delta;
 675         want = MIN(want, mud);
 676
 677         if ((unsigned) want > p->length)
 678           (void) rxi_AllocDataBuf(p, (want - p->length),
 679                                   RX_PACKET_CLASS_SEND_CBUF);
 680
 681         if ((unsigned) p->length > mud)
 682             p->length = mud;
 683
 684         if (delta >= p->length) {
 685           rxi_FreePacket(p);
 686           p = NULL;
 687         } else {
 688             p->length -= delta;
 689         }
 690         break;
 691       }
 692
 693       /* no error occurred, and we didn't get a packet, so we sleep.
 694        * At this point, we assume that packets will be returned
 695        * sooner or later, as packets are acknowledged, and so we
 696        * just wait.  */
 697       NETPRI;
 698       call->flags |= RX_CALL_WAIT_PACKETS;
 699       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 700       MUTEX_EXIT(&call->lock);
 701       rx_waitingForPackets = 1;
 702
 703 #ifdef  RX_ENABLE_LOCKS
 704       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 705 #else
 706       osi_rxSleep(&rx_waitingForPackets);
 707 #endif
 708       MUTEX_EXIT(&rx_freePktQ_lock);
 709       MUTEX_ENTER(&call->lock);
 710       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 711       call->flags &= ~RX_CALL_WAIT_PACKETS;
 712       USERPRI;
 713     }
 714
 715     return p;
 716 }
 717
 718 #ifndef KERNEL
 719
 720 /* count the number of used FDs */
 721 static int CountFDs(register int amax)
 722 {
 723     struct stat tstat;
 724     register int i, code;
 725     register int count;
 726
 727     count = 0;
 728     for(i=0;i<amax;i++) {
 729         code = fstat(i, &tstat);
 730         if (code == 0) count++;
 731     }
 732     return count;
 733 }
 734
 735 #else /* KERNEL */
 736
 737 #define CountFDs(amax) amax
 738
 739 #endif /* KERNEL */
 740
 741 #if !defined(KERNEL) || defined(UKERNEL)
 742
 743 /* This function reads a single packet from the interface into the
 744  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 745  * (host,port) of the sender are stored in the supplied variables, and
 746  * the data length of the packet is stored in the packet structure.
 747  * The header is decoded. */
 748 int rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 *host, u_short *port)
 749 {
 750     struct sockaddr_in from;
 751     int nbytes;
 752     afs_int32 rlen;
 753     register afs_int32 tlen, savelen;
 754     struct msghdr msg;
 755     rx_computelen(p, tlen);
 756     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 757
 758     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 759     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 760                                  * it once in order to avoid races.  */
 761     tlen = rlen - tlen;
 762     if (tlen > 0) {
 763       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 764       if (tlen >0) {
 765         tlen = rlen - tlen;
 766       }
 767       else tlen = rlen;
 768     }
 769     else tlen = rlen;
 770
 771    /* Extend the last iovec for padding, it's just to make sure that the
 772     * read doesn't return more data than we expect, and is done to get around
 773     * our problems caused by the lack of a length field in the rx header.
 774     * Use the extra buffer that follows the localdata in each packet
 775     * structure. */
 776     savelen = p->wirevec[p->niovecs-1].iov_len;
 777     p->wirevec[p->niovecs-1].iov_len += RX_EXTRABUFFERSIZE;
 778
 779     memset((char *)&msg, 0, sizeof(msg));
 780     msg.msg_name = (char *) &from;
 781     msg.msg_namelen = sizeof(struct sockaddr_in);
 782     msg.msg_iov = p->wirevec;
 783     msg.msg_iovlen = p->niovecs;
 784     nbytes = rxi_Recvmsg(socket, &msg, 0);
 785
 786    /* restore the vec to its correct state */
 787     p->wirevec[p->niovecs-1].iov_len = savelen;
 788
 789     p->length = (nbytes - RX_HEADER_SIZE);
 790     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 791       if (nbytes > 0)
 792         rxi_MorePackets(rx_initSendWindow);
 793 #ifndef AFS_NT40_ENV
 794       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 795         MUTEX_ENTER(&rx_stats_mutex);
 796         rx_stats.noPacketOnRead++;
 797         MUTEX_EXIT(&rx_stats_mutex);
 798       }
 799 #endif
 800       else {
 801         MUTEX_ENTER(&rx_stats_mutex);
 802         rx_stats.bogusPacketOnRead++;
 803         rx_stats.bogusHost = from.sin_addr.s_addr;
 804         MUTEX_EXIT(&rx_stats_mutex);
 805         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 806              from.sin_port,nbytes));
 807       }
 808       return  0;
 809     }
 810     else {
 811       /* Extract packet header. */
 812       rxi_DecodePacketHeader(p);
 813
 814       *host = from.sin_addr.s_addr;
 815       *port = from.sin_port;
 816       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 817         struct rx_peer *peer;
 818         MUTEX_ENTER(&rx_stats_mutex);
 819         rx_stats.packetsRead[p->header.type-1]++;
 820         MUTEX_EXIT(&rx_stats_mutex);
 821         /*
 822          * Try to look up this peer structure.  If it doesn't exist,
 823          * don't create a new one -
 824          * we don't keep count of the bytes sent/received if a peer
 825          * structure doesn't already exist.
 826          *
 827          * The peer/connection cleanup code assumes that there is 1 peer
 828          * per connection.  If we actually created a peer structure here
 829          * and this packet was an rxdebug packet, the peer structure would
 830          * never be cleaned up.
 831          */
 832         peer = rxi_FindPeer(*host, *port, 0, 0);
 833         if (peer) {
 834             MUTEX_ENTER(&peer->peer_lock);
 835             hadd32(peer->bytesReceived, p->length);
 836             MUTEX_EXIT(&peer->peer_lock);
 837         }
 838       }
 839
 840       /* Free any empty packet buffers at the end of this packet */
 841       rxi_TrimDataBufs(p, 1);
 842
 843       return  1;
 844     }
 845 }
 846
 847 #endif /* !KERNEL || UKERNEL */
 848
 849 /* This function splits off the first packet in a jumbo packet.
 850  * As of AFS 3.5, jumbograms contain more than one fixed size
 851  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 852  * last packet header. All packets (except the last) are padded to
 853  * fall on RX_CBUFFERSIZE boundaries.
 854  * HACK: We store the length of the first n-1 packets in the
 855  * last two pad bytes. */
 856
 857 struct rx_packet *rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host,
 858         short port, int first)
 859 {
 860     struct rx_packet *np;
 861     struct rx_jumboHeader *jp;
 862     int niov, i;
 863     struct iovec *iov;
 864     int length;
 865     afs_uint32 temp;
 866
 867     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 868      * bytes in length. All but the first packet are preceded by
 869      * an abbreviated four byte header. The length of the last packet
 870      * is calculated from the size of the jumbogram. */
 871     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 872
 873     if ((int)p->length < length) {
 874         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 875         return NULL;
 876     }
 877     niov = p->niovecs - 2;
 878     if (niov < 1) {
 879         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 880         return NULL;
 881     }
 882     iov = &p->wirevec[2];
 883     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 884
 885     /* Get a pointer to the abbreviated packet header */
 886     jp = (struct rx_jumboHeader *)
 887          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 888
 889     /* Set up the iovecs for the next packet */
 890     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 891     np->wirevec[0].iov_len = sizeof(struct rx_header);
 892     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 893     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 894     np->niovecs = niov+1;
 895     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 896         np->wirevec[i] = *iov;
 897     }
 898     np->length = p->length - length;
 899     p->length = RX_JUMBOBUFFERSIZE;
 900     p->niovecs = 2;
 901
 902     /* Convert the jumbo packet header to host byte order */
 903     temp = ntohl(*(afs_uint32 *)jp);
 904     jp->flags = (u_char)(temp >> 24);
 905     jp->cksum = (u_short)(temp);
 906
 907     /* Fill in the packet header */
 908     np->header = p->header;
 909     np->header.serial = p->header.serial + 1;
 910     np->header.seq = p->header.seq + 1;
 911     np->header.flags = jp->flags;
 912     np->header.spare = jp->cksum;
 913
 914     return np;
 915 }
 916
 917 #ifndef KERNEL
 918 /* Send a udp datagram */
 919 int osi_NetSend(osi_socket socket, char *addr, struct iovec *dvec, int nvecs,
 920         int length, int istack)
 921 {
 922     struct msghdr msg;
 923
 924     memset(&msg, 0, sizeof(msg));
 925     msg.msg_iov = dvec;
 926     msg.msg_iovlen = nvecs;
 927     msg.msg_name = addr;
 928     msg.msg_namelen = sizeof(struct sockaddr_in);
 929
 930     rxi_Sendmsg(socket, &msg, 0);
 931
 932     return 0;
 933 }
 934 #elif !defined(UKERNEL)
 935 /* osi_NetSend is defined in afs/afs_osinet.c
 936  * message receipt is done in rxk_input or rx_put.
 937  */
 938
 939 #ifdef AFS_SUN5_ENV
 940 /*
 941  * Copy an mblock to the contiguous area pointed to by cp.
 942  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 943  * but it doesn't really.
 944  * Returns the number of bytes not transferred.
 945  * The message is NOT changed.
 946  */
 947 static int cpytoc(mblk_t *mp, register int off, register int len, register char *cp)
 948 {
 949     register int n;
 950
 951     for (;mp && len > 0; mp = mp->b_cont) {
 952         if (mp->b_datap->db_type != M_DATA) {
 953             return -1;
 954         }
 955         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 956         memcpy(cp, (char *)mp->b_rptr, n);
 957         cp += n;
 958         len -= n;
 959         mp->b_rptr += n;
 960     }
 961     return (len);
 962 }
 963
 964 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 965  * but it doesn't really.
 966  * This sucks, anyway, do it like m_cpy.... below
 967  */
 968 static int cpytoiovec(mblk_t *mp, int off, int len, register struct iovec *iovs, int niovs)
 969 {
 970     register int m,n,o,t,i;
 971
 972     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
 973         if (mp->b_datap->db_type != M_DATA) {
 974             return -1;
 975         }
 976         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 977         len -= n;
 978         while (n) {
 979           if (!t) {
 980             o=0;
 981             i++;
 982             t = iovs[i].iov_len;
 983           }
 984           m = MIN(n,t);
 985           memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
 986           mp->b_rptr += m;
 987           o += m;
 988           t -= m;
 989           n -= m;
 990         }
 991     }
 992     return (len);
 993 }
 994 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
 995 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
 996 #else
 997 #if !defined(AFS_LINUX20_ENV)
 998 static int m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
 999 {
1000   caddr_t p1, p2;
1001   unsigned int l1, l2, i, t;
1002
1003   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1004     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1005
1006   while (off && m)
1007     if (m->m_len <= off) {
1008       off -= m->m_len;
1009       m = m->m_next;
1010       continue;
1011     } else
1012       break;
1013
1014   if (m == NULL)
1015     return len;
1016
1017   p1 = mtod(m, caddr_t)+off;
1018   l1 = m->m_len - off;
1019   i = 0;
1020   p2 = iovs[0].iov_base;
1021   l2 = iovs[0].iov_len;
1022
1023   while (len) {
1024     t = MIN(l1, MIN(l2, (unsigned int)len));
1025     memcpy(p2, p1, t);
1026     p1 += t;    p2 += t;
1027     l1 -= t;    l2 -= t;
1028     len -= t;
1029     if (!l1) {
1030       m = m->m_next;
1031       if (!m)
1032         break;
1033       p1 = mtod(m, caddr_t);
1034       l1 = m->m_len;
1035     }
1036     if (!l2) {
1037       if (++i >= niovs)
1038         break;
1039       p2 = iovs[i].iov_base;
1040       l2 = iovs[i].iov_len;
1041     }
1042
1043   }
1044
1045 return len;
1046 }
1047 #endif /* LINUX */
1048 #endif /* AFS_SUN5_ENV */
1049
1050 #if !defined(AFS_LINUX20_ENV)
1051 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1052 #ifdef  AFS_SUN5_ENV
1053 mblk_t *amb;
1054 #else
1055 struct mbuf *amb;
1056 #endif
1057 void (*free)();
1058 struct rx_packet *phandle;
1059 int hdr_len, data_len;
1060 {
1061   register int code;
1062
1063   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1064   (*free)(amb);
1065
1066   return code;
1067 }
1068 #endif /* LINUX */
1069 #endif /*KERNEL && !UKERNEL*/
1070
1071
1072 /* send a response to a debug packet */
1073
1074 struct rx_packet *rxi_ReceiveDebugPacket(register struct rx_packet *ap,
1075         osi_socket asocket, afs_int32 ahost, short aport, int istack)
1076 {
1077     struct rx_debugIn tin;
1078     afs_int32 tl;
1079     struct rx_serverQueueEntry *np, *nqe;
1080
1081     /*
1082      * Only respond to client-initiated Rx debug packets,
1083      * and clear the client flag in the response.
1084      */
1085     if (ap->header.flags & RX_CLIENT_INITIATED) {
1086         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1087         rxi_EncodePacketHeader(ap);
1088     } else {
1089         return ap;
1090     }
1091
1092     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1093     /* all done with packet, now set length to the truth, so we can
1094      * reuse this packet */
1095     rx_computelen(ap, ap->length);
1096
1097     tin.type = ntohl(tin.type);
1098     tin.index = ntohl(tin.index);
1099     switch(tin.type) {
1100         case RX_DEBUGI_GETSTATS: {
1101             struct rx_debugStats tstat;
1102
1103             /* get basic stats */
1104             memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1105             tstat.version = RX_DEBUGI_VERSION;
1106 #ifndef RX_ENABLE_LOCKS
1107             tstat.waitingForPackets = rx_waitingForPackets;
1108 #endif
1109             tstat.nFreePackets = htonl(rx_nFreePackets);
1110             tstat.callsExecuted = htonl(rxi_nCalls);
1111             tstat.packetReclaims = htonl(rx_packetReclaims);
1112             tstat.usedFDs = CountFDs(64);
1113             tstat.nWaiting = htonl(rx_nWaiting);
1114             queue_Count( &rx_idleServerQueue, np, nqe,
1115                                 rx_serverQueueEntry, tstat.idleThreads);
1116             tstat.idleThreads = htonl(tstat.idleThreads);
1117             tl = sizeof(struct rx_debugStats) - ap->length;
1118             if (tl > 0)
1119               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1120
1121             if (tl <= 0) {
1122               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1123               ap->length = sizeof(struct rx_debugStats);
1124               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1125               rx_computelen(ap, ap->length);
1126             }
1127             break;
1128         }
1129
1130         case RX_DEBUGI_GETALLCONN:
1131         case RX_DEBUGI_GETCONN: {
1132             int i, j;
1133             register struct rx_connection *tc;
1134             struct rx_call *tcall;
1135             struct rx_debugConn tconn;
1136             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1137
1138
1139             tl = sizeof(struct rx_debugConn) - ap->length;
1140             if (tl > 0)
1141               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1142             if (tl > 0)
1143               return ap;
1144
1145             memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1146             /* get N'th (maybe) "interesting" connection info */
1147             for(i=0;i<rx_hashTableSize;i++) {
1148 #if !defined(KERNEL)
1149                 /* the time complexity of the algorithm used here
1150                  * exponentially increses with the number of connections.
1151                  */
1152 #ifdef AFS_PTHREAD_ENV
1153                 pthread_yield();
1154 #else
1155                 (void) IOMGR_Poll();
1156 #endif
1157 #endif
1158                 MUTEX_ENTER(&rx_connHashTable_lock);
1159                 /* We might be slightly out of step since we are not
1160                  * locking each call, but this is only debugging output.
1161                  */
1162                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1163                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1164                         tconn.host = tc->peer->host;
1165                         tconn.port = tc->peer->port;
1166                         tconn.cid = htonl(tc->cid);
1167                         tconn.epoch = htonl(tc->epoch);
1168                         tconn.serial = htonl(tc->serial);
1169                         for(j=0;j<RX_MAXCALLS;j++) {
1170                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1171                             if ((tcall=tc->call[j])) {
1172                                 tconn.callState[j] = tcall->state;
1173                                 tconn.callMode[j] = tcall->mode;
1174                                 tconn.callFlags[j] = tcall->flags;
1175                                 if (queue_IsNotEmpty(&tcall->rq))
1176                                     tconn.callOther[j] |= RX_OTHER_IN;
1177                                 if (queue_IsNotEmpty(&tcall->tq))
1178                                     tconn.callOther[j] |= RX_OTHER_OUT;
1179                             }
1180                             else tconn.callState[j] = RX_STATE_NOTINIT;
1181                         }
1182
1183                         tconn.natMTU = htonl(tc->peer->natMTU);
1184                         tconn.error = htonl(tc->error);
1185                         tconn.flags = tc->flags;
1186                         tconn.type = tc->type;
1187                         tconn.securityIndex = tc->securityIndex;
1188                         if (tc->securityObject) {
1189                             RXS_GetStats (tc->securityObject, tc,
1190                                           &tconn.secStats);
1191 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1192 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1193                             DOHTONL(flags);
1194                             DOHTONL(expires);
1195                             DOHTONL(packetsReceived);
1196                             DOHTONL(packetsSent);
1197                             DOHTONL(bytesReceived);
1198                             DOHTONL(bytesSent);
1199                             for (i=0;
1200                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1201                                  i++)
1202                                 DOHTONS(spares[i]);
1203                             for (i=0;
1204                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1205                                  i++)
1206                                 DOHTONL(sparel[i]);
1207                         }
1208
1209                         MUTEX_EXIT(&rx_connHashTable_lock);
1210                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1211                         tl = ap->length;
1212                         ap->length = sizeof(struct rx_debugConn);
1213                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1214                         ap->length = tl;
1215                         return ap;
1216                     }
1217                 }
1218                 MUTEX_EXIT(&rx_connHashTable_lock);
1219             }
1220             /* if we make it here, there are no interesting packets */
1221             tconn.cid = htonl(0xffffffff); /* means end */
1222             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1223             tl = ap->length;
1224             ap->length = sizeof(struct rx_debugConn);
1225             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1226             ap->length = tl;
1227             break;
1228         }
1229
1230         /*
1231          * Pass back all the peer structures we have available
1232          */
1233
1234         case RX_DEBUGI_GETPEER: {
1235             int i;
1236             register struct rx_peer *tp;
1237             struct rx_debugPeer tpeer;
1238
1239
1240             tl = sizeof(struct rx_debugPeer) - ap->length;
1241             if (tl > 0)
1242               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1243             if (tl > 0)
1244               return ap;
1245
1246             memset((char *)&tpeer, 0, sizeof(tpeer));
1247             for(i=0;i<rx_hashTableSize;i++) {
1248 #if !defined(KERNEL)
1249                 /* the time complexity of the algorithm used here
1250                  * exponentially increses with the number of peers.
1251                  *
1252                  * Yielding after processing each hash table entry
1253                  * and dropping rx_peerHashTable_lock.
1254                  * also increases the risk that we will miss a new
1255                  * entry - but we are willing to live with this
1256                  * limitation since this is meant for debugging only
1257                  */
1258 #ifdef AFS_PTHREAD_ENV
1259                 pthread_yield();
1260 #else
1261                 (void) IOMGR_Poll();
1262 #endif
1263 #endif
1264                 MUTEX_ENTER(&rx_peerHashTable_lock);
1265                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1266                     if (tin.index-- <= 0) {
1267                         tpeer.host = tp->host;
1268                         tpeer.port = tp->port;
1269                         tpeer.ifMTU = htons(tp->ifMTU);
1270                         tpeer.idleWhen = htonl(tp->idleWhen);
1271                         tpeer.refCount = htons(tp->refCount);
1272                         tpeer.burstSize = tp->burstSize;
1273                         tpeer.burst = tp->burst;
1274                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1275                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1276                         tpeer.rtt = htonl(tp->rtt);
1277                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1278                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1279                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1280                         tpeer.nSent = htonl(tp->nSent);
1281                         tpeer.reSends = htonl(tp->reSends);
1282                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1283                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1284                         tpeer.rateFlag = htonl(tp->rateFlag);
1285                         tpeer.natMTU = htons(tp->natMTU);
1286                         tpeer.maxMTU = htons(tp->maxMTU);
1287                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1288                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1289                         tpeer.MTU = htons(tp->MTU);
1290                         tpeer.cwind = htons(tp->cwind);
1291                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1292                         tpeer.congestSeq = htons(tp->congestSeq);
1293                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1294                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1295                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1296                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1297
1298                         MUTEX_EXIT(&rx_peerHashTable_lock);
1299                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1300                         tl = ap->length;
1301                         ap->length = sizeof(struct rx_debugPeer);
1302                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1303                         ap->length = tl;
1304                         return ap;
1305                     }
1306                 }
1307                 MUTEX_EXIT(&rx_peerHashTable_lock);
1308             }
1309             /* if we make it here, there are no interesting packets */
1310             tpeer.host = htonl(0xffffffff); /* means end */
1311             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1312             tl = ap->length;
1313             ap->length = sizeof(struct rx_debugPeer);
1314             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1315             ap->length = tl;
1316             break;
1317         }
1318
1319         case RX_DEBUGI_RXSTATS: {
1320             int i;
1321             afs_int32 *s;
1322
1323             tl = sizeof(rx_stats) - ap->length;
1324             if (tl > 0)
1325               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1326             if (tl > 0)
1327               return ap;
1328
1329             /* Since its all int32s convert to network order with a loop. */
1330             MUTEX_ENTER(&rx_stats_mutex);
1331             s = (afs_int32 *)&rx_stats;
1332             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1333                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1334
1335             tl = ap->length;
1336             ap->length = sizeof(rx_stats);
1337             MUTEX_EXIT(&rx_stats_mutex);
1338             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1339             ap->length = tl;
1340             break;
1341         }
1342
1343         default:
1344             /* error response packet */
1345             tin.type = htonl(RX_DEBUGI_BADTYPE);
1346             tin.index = tin.type;
1347             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1348             tl = ap->length;
1349             ap->length = sizeof(struct rx_debugIn);
1350             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1351             ap->length = tl;
1352             break;
1353     }
1354     return ap;
1355 }
1356
1357 struct rx_packet *rxi_ReceiveVersionPacket(register struct rx_packet *ap,
1358         osi_socket asocket, afs_int32 ahost, short aport, int istack)
1359 {
1360     afs_int32 tl;
1361
1362     /*
1363      * Only respond to client-initiated version requests, and
1364      * clear that flag in the response.
1365      */
1366     if (ap->header.flags & RX_CLIENT_INITIATED) {
1367         char buf[66];
1368
1369         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1370         rxi_EncodePacketHeader(ap);
1371         memset(buf, 0, sizeof(buf));
1372         strncpy(buf, cml_version_number+4, sizeof(buf)-1);
1373         rx_packetwrite(ap, 0, 65, buf);
1374         tl = ap->length;
1375         ap->length = 65;
1376         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1377         ap->length = tl;
1378     }
1379
1380     return ap;
1381 }
1382
1383
1384 /* send a debug packet back to the sender */
1385 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1386                                afs_int32 ahost, short aport, afs_int32 istack)
1387 {
1388     struct sockaddr_in taddr;
1389     int i;
1390     int nbytes;
1391     int saven = 0;
1392     size_t savelen = 0;
1393 #ifdef KERNEL
1394     int waslocked = ISAFS_GLOCK();
1395 #endif
1396
1397     taddr.sin_family = AF_INET;
1398     taddr.sin_port = aport;
1399     taddr.sin_addr.s_addr = ahost;
1400 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1401     taddr.sin_len = sizeof(struct sockaddr_in);
1402 #endif
1403
1404     /* We need to trim the niovecs. */
1405     nbytes = apacket->length;
1406     for (i=1; i < apacket->niovecs; i++) {
1407       if (nbytes <= apacket->wirevec[i].iov_len) {
1408         savelen = apacket->wirevec[i].iov_len;
1409         saven = apacket->niovecs;
1410         apacket->wirevec[i].iov_len = nbytes;
1411         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1412       }
1413       else nbytes -= apacket->wirevec[i].iov_len;
1414     }
1415     AFS_RXGUNLOCK();
1416 #ifdef KERNEL
1417     if (waslocked) AFS_GUNLOCK();
1418 #endif
1419     /* debug packets are not reliably delivered, hence the cast below. */
1420     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1421                        apacket->length+RX_HEADER_SIZE, istack);
1422 #ifdef KERNEL
1423     if (waslocked) AFS_GLOCK();
1424 #endif
1425     AFS_RXGLOCK();
1426     if (saven) {  /* means we truncated the packet above. */
1427       apacket->wirevec[i-1].iov_len = savelen;
1428       apacket->niovecs = saven;
1429     }
1430
1431 }
1432
1433 /* Send the packet to appropriate destination for the specified
1434  * connection.  The header is first encoded and placed in the packet.
1435  */
1436 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1437                     int istack)
1438 {
1439 #if defined(KERNEL)
1440     int waslocked;
1441 #endif
1442     struct sockaddr_in addr;
1443     register struct rx_peer *peer = conn->peer;
1444     osi_socket socket;
1445 #ifdef RXDEBUG
1446     char deliveryType = 'S';
1447 #endif
1448     /* The address we're sending the packet to */
1449     addr.sin_family = AF_INET;
1450     addr.sin_port = peer->port;
1451     addr.sin_addr.s_addr = peer->host;
1452
1453     /* This stuff should be revamped, I think, so that most, if not
1454      * all, of the header stuff is always added here.  We could
1455      * probably do away with the encode/decode routines. XXXXX */
1456
1457     /* Stamp each packet with a unique serial number.  The serial
1458      * number is maintained on a connection basis because some types
1459      * of security may be based on the serial number of the packet,
1460      * and security is handled on a per authenticated-connection
1461      * basis. */
1462     /* Pre-increment, to guarantee no zero serial number; a zero
1463      * serial number means the packet was never sent. */
1464     MUTEX_ENTER(&conn->conn_data_lock);
1465     p->header.serial = ++conn->serial;
1466     MUTEX_EXIT(&conn->conn_data_lock);
1467     /* This is so we can adjust retransmit time-outs better in the face of
1468      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1469      */
1470     if (p->firstSerial == 0) {
1471        p->firstSerial = p->header.serial;
1472      }
1473
1474 #ifdef RXDEBUG
1475     /* If an output tracer function is defined, call it with the packet and
1476      * network address.  Note this function may modify its arguments. */
1477     if (rx_almostSent) {
1478         int drop = (*rx_almostSent) (p, &addr);
1479         /* drop packet if return value is non-zero? */
1480         if (drop) deliveryType = 'D';   /* Drop the packet */
1481     }
1482 #endif
1483
1484     /* Get network byte order header */
1485     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1486                                  * touch ALL the fields */
1487
1488     /* Send the packet out on the same socket that related packets are being
1489      * received on */
1490     socket = (conn->type == RX_CLIENT_CONNECTION
1491               ? rx_socket : conn->service->socket);
1492
1493 #ifdef RXDEBUG
1494     /* Possibly drop this packet,  for testing purposes */
1495     if ((deliveryType == 'D') ||
1496         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1497          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1498         deliveryType = 'D';             /* Drop the packet */
1499     }
1500     else {
1501         deliveryType = 'S';             /* Send the packet */
1502 #endif /* RXDEBUG */
1503
1504         /* Loop until the packet is sent.  We'd prefer just to use a
1505          * blocking socket, but unfortunately the interface doesn't
1506          * allow us to have the socket block in send mode, and not
1507          * block in receive mode */
1508         AFS_RXGUNLOCK();
1509 #ifdef KERNEL
1510         waslocked = ISAFS_GLOCK();
1511         if (waslocked) AFS_GUNLOCK();
1512 #endif
1513         if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1514                         p->length+RX_HEADER_SIZE, istack)){
1515           /* send failed, so let's hurry up the resend, eh? */
1516           MUTEX_ENTER(&rx_stats_mutex);
1517           rx_stats.netSendFailures++;
1518           MUTEX_EXIT(&rx_stats_mutex);
1519           p->retryTime = p->timeSent;  /* resend it very soon */
1520           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1521         }
1522 #ifdef KERNEL
1523         if (waslocked) AFS_GLOCK();
1524 #endif
1525         AFS_RXGLOCK();
1526 #ifdef RXDEBUG
1527     }
1528     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1529          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1530          peer->host, peer->port, p->header.serial, p->header.epoch,
1531          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1532          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1533 #endif
1534     MUTEX_ENTER(&rx_stats_mutex);
1535     rx_stats.packetsSent[p->header.type-1]++;
1536     MUTEX_EXIT(&rx_stats_mutex);
1537     MUTEX_ENTER(&peer->peer_lock);
1538     hadd32(peer->bytesSent, p->length);
1539     MUTEX_EXIT(&peer->peer_lock);
1540 }
1541
1542 /* Send a list of packets to appropriate destination for the specified
1543  * connection.  The headers are first encoded and placed in the packets.
1544  */
1545 void rxi_SendPacketList(struct rx_connection * conn, struct rx_packet **list,
1546         int len, int istack)
1547 {
1548 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1549     int waslocked;
1550 #endif
1551     struct sockaddr_in addr;
1552     register struct rx_peer *peer = conn->peer;
1553     osi_socket socket;
1554     struct rx_packet *p = NULL;
1555     struct iovec wirevec[RX_MAXIOVECS];
1556     int i, length;
1557     afs_uint32 serial;
1558     afs_uint32 temp;
1559     struct rx_jumboHeader *jp;
1560 #ifdef RXDEBUG
1561     char deliveryType = 'S';
1562 #endif
1563     /* The address we're sending the packet to */
1564     addr.sin_family = AF_INET;
1565     addr.sin_port = peer->port;
1566     addr.sin_addr.s_addr = peer->host;
1567
1568     if (len+1 > RX_MAXIOVECS) {
1569         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1570     }
1571
1572     /*
1573      * Stamp the packets in this jumbogram with consecutive serial numbers
1574      */
1575     MUTEX_ENTER(&conn->conn_data_lock);
1576     serial = conn->serial;
1577     conn->serial += len;
1578     MUTEX_EXIT(&conn->conn_data_lock);
1579
1580
1581     /* This stuff should be revamped, I think, so that most, if not
1582      * all, of the header stuff is always added here.  We could
1583      * probably do away with the encode/decode routines. XXXXX */
1584
1585     jp = NULL;
1586     length = RX_HEADER_SIZE;
1587     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1588     wirevec[0].iov_len = RX_HEADER_SIZE;
1589     for (i = 0 ; i < len ; i++) {
1590         p = list[i];
1591
1592         /* The whole 3.5 jumbogram scheme relies on packets fitting
1593          * in a single packet buffer. */
1594         if (p->niovecs > 2) {
1595             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1596         }
1597
1598         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1599          * in this chunk.  */
1600         if (i < len-1) {
1601             if (p->length != RX_JUMBOBUFFERSIZE) {
1602                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1603             }
1604             p->header.flags |= RX_JUMBO_PACKET;
1605             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1606             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1607         } else {
1608             wirevec[i+1].iov_len = p->length;
1609             length += p->length;
1610         }
1611         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1612         if (jp != NULL) {
1613             /* Convert jumbo packet header to network byte order */
1614             temp = (afs_uint32)(p->header.flags) << 24;
1615             temp |= (afs_uint32)(p->header.spare);
1616             *(afs_uint32 *)jp = htonl(temp);
1617         }
1618         jp = (struct rx_jumboHeader *)
1619              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1620
1621         /* Stamp each packet with a unique serial number.  The serial
1622          * number is maintained on a connection basis because some types
1623          * of security may be based on the serial number of the packet,
1624          * and security is handled on a per authenticated-connection
1625          * basis. */
1626         /* Pre-increment, to guarantee no zero serial number; a zero
1627          * serial number means the packet was never sent. */
1628         p->header.serial = ++serial;
1629         /* This is so we can adjust retransmit time-outs better in the face of
1630          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1631          */
1632         if (p->firstSerial == 0) {
1633            p->firstSerial = p->header.serial;
1634         }
1635
1636 #ifdef RXDEBUG
1637         /* If an output tracer function is defined, call it with the packet and
1638          * network address.  Note this function may modify its arguments. */
1639         if (rx_almostSent) {
1640             int drop = (*rx_almostSent) (p, &addr);
1641             /* drop packet if return value is non-zero? */
1642             if (drop) deliveryType = 'D';       /* Drop the packet */
1643         }
1644 #endif
1645
1646         /* Get network byte order header */
1647         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1648                                      * touch ALL the fields */
1649     }
1650
1651     /* Send the packet out on the same socket that related packets are being
1652      * received on */
1653     socket = (conn->type == RX_CLIENT_CONNECTION
1654               ? rx_socket : conn->service->socket);
1655
1656 #ifdef RXDEBUG
1657     /* Possibly drop this packet,  for testing purposes */
1658     if ((deliveryType == 'D') ||
1659         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1660          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1661         deliveryType = 'D';             /* Drop the packet */
1662     }
1663     else {
1664         deliveryType = 'S';             /* Send the packet */
1665 #endif /* RXDEBUG */
1666
1667         /* Loop until the packet is sent.  We'd prefer just to use a
1668          * blocking socket, but unfortunately the interface doesn't
1669          * allow us to have the socket block in send mode, and not
1670          * block in receive mode */
1671         AFS_RXGUNLOCK();
1672 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1673         waslocked = ISAFS_GLOCK();
1674         if (!istack && waslocked) AFS_GUNLOCK();
1675 #endif
1676         if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1677           /* send failed, so let's hurry up the resend, eh? */
1678           MUTEX_ENTER(&rx_stats_mutex);
1679           rx_stats.netSendFailures++;
1680           MUTEX_EXIT(&rx_stats_mutex);
1681           for (i = 0 ; i < len ; i++) {
1682             p = list[i];
1683             p->retryTime = p->timeSent;  /* resend it very soon */
1684             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1685           }
1686         }
1687 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1688         if (!istack && waslocked) AFS_GLOCK();
1689 #endif
1690         AFS_RXGLOCK();
1691 #ifdef RXDEBUG
1692     }
1693     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1694          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1695          peer->host, peer->port, p->header.serial, p->header.epoch,
1696          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1697          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1698 #endif
1699     MUTEX_ENTER(&rx_stats_mutex);
1700     rx_stats.packetsSent[p->header.type-1]++;
1701     MUTEX_EXIT(&rx_stats_mutex);
1702     MUTEX_ENTER(&peer->peer_lock);
1703     hadd32(peer->bytesSent, p->length);
1704     MUTEX_EXIT(&peer->peer_lock);
1705 }
1706
1707
1708 /* Send a "special" packet to the peer connection.  If call is
1709  * specified, then the packet is directed to a specific call channel
1710  * associated with the connection, otherwise it is directed to the
1711  * connection only. Uses optionalPacket if it is supplied, rather than
1712  * allocating a new packet buffer.  Nbytes is the length of the data
1713  * portion of the packet.  If data is non-null, nbytes of data are
1714  * copied into the packet.  Type is the type of the packet, as defined
1715  * in rx.h.  Bug: there's a lot of duplication between this and other
1716  * routines.  This needs to be cleaned up. */
1717 struct rx_packet *rxi_SendSpecial(register struct rx_call *call,
1718         register struct rx_connection *conn, struct rx_packet *optionalPacket,
1719         int type, char *data, int nbytes, int istack)
1720 {
1721     /* Some of the following stuff should be common code for all
1722      * packet sends (it's repeated elsewhere) */
1723     register struct rx_packet *p;
1724     unsigned int i = 0;
1725     int savelen = 0, saven = 0;
1726     int channel, callNumber;
1727     if (call) {
1728         channel = call->channel;
1729         callNumber = *call->callNumber;
1730         /* BUSY packets refer to the next call on this connection */
1731         if (type == RX_PACKET_TYPE_BUSY) {
1732             callNumber++;
1733         }
1734     } else {
1735         channel = 0;
1736         callNumber = 0;
1737     }
1738     p = optionalPacket;
1739     if (!p) {
1740         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1741         if (!p) osi_Panic("rxi_SendSpecial failure");
1742     }
1743
1744     if (nbytes != -1)
1745       p->length = nbytes;
1746     else
1747       nbytes = p->length;
1748     p->header.serviceId = conn->serviceId;
1749     p->header.securityIndex = conn->securityIndex;
1750     p->header.cid = (conn->cid | channel);
1751     p->header.callNumber = callNumber;
1752     p->header.seq = 0;
1753     p->header.epoch = conn->epoch;
1754     p->header.type = type;
1755     p->header.flags = 0;
1756     if (conn->type == RX_CLIENT_CONNECTION)
1757        p->header.flags |= RX_CLIENT_INITIATED;
1758     if (data)
1759       rx_packetwrite(p, 0, nbytes, data);
1760
1761     for (i=1; i < p->niovecs; i++) {
1762       if (nbytes <= p->wirevec[i].iov_len) {
1763         savelen = p->wirevec[i].iov_len;
1764         saven = p->niovecs;
1765         p->wirevec[i].iov_len = nbytes;
1766         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1767       }
1768       else nbytes -= p->wirevec[i].iov_len;
1769     }
1770
1771     if (call) rxi_Send(call, p, istack);
1772     else rxi_SendPacket(conn, p, istack);
1773     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1774       /* really need to do this, but it seems safer this way, given that  */
1775       /* sneaky optionalPacket... */
1776       p->wirevec[i-1].iov_len = savelen;
1777       p->niovecs = saven;
1778     }
1779     if (!optionalPacket) rxi_FreePacket(p);
1780     return optionalPacket;
1781 }
1782
1783
1784 /* Encode the packet's header (from the struct header in the packet to
1785  * the net byte order representation in the wire representation of the
1786  * packet, which is what is actually sent out on the wire) */
1787 void rxi_EncodePacketHeader(register struct rx_packet *p)
1788 {
1789     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1790
1791     memset((char *)buf, 0, RX_HEADER_SIZE);
1792     *buf++ = htonl(p->header.epoch);
1793     *buf++ = htonl(p->header.cid);
1794     *buf++ = htonl(p->header.callNumber);
1795     *buf++ = htonl(p->header.seq);
1796     *buf++ = htonl(p->header.serial);
1797     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1798                    | (((afs_uint32)p->header.flags)<<16)
1799                    | (p->header.userStatus<<8) | p->header.securityIndex);
1800     /* Note: top 16 bits of this next word were reserved */
1801     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1802 }
1803
1804 /* Decode the packet's header (from net byte order to a struct header) */
1805 void rxi_DecodePacketHeader(register struct rx_packet *p)
1806 {
1807     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1808     afs_uint32 temp;
1809
1810     p->header.epoch = ntohl(*buf);
1811     buf++;
1812     p->header.cid = ntohl(*buf);
1813     buf++;
1814     p->header.callNumber = ntohl(*buf);
1815     buf++;
1816     p->header.seq = ntohl(*buf);
1817     buf++;
1818     p->header.serial = ntohl(*buf);
1819     buf++;
1820
1821     temp = ntohl(*buf);
1822     buf++;
1823
1824     /* C will truncate byte fields to bytes for me */
1825     p->header.type = temp>>24;
1826     p->header.flags = temp>>16;
1827     p->header.userStatus = temp>>8;
1828     p->header.securityIndex = temp>>0;
1829
1830     temp = ntohl(*buf);
1831     buf++;
1832
1833     p->header.serviceId = (temp&0xffff);
1834     p->header.spare = temp>>16;
1835     /* Note: top 16 bits of this last word are the security checksum */
1836 }
1837
1838 void rxi_PrepareSendPacket(register struct rx_call *call, register struct rx_packet *p,
1839         register int last)
1840 {
1841     register struct rx_connection *conn = call->conn;
1842     int i, j;
1843     ssize_t len;        /* len must be a signed type; it can go negative */
1844
1845     p->flags &= ~RX_PKTFLAG_ACKED;
1846     p->header.cid = (conn->cid | call->channel);
1847     p->header.serviceId = conn->serviceId;
1848     p->header.securityIndex = conn->securityIndex;
1849     p->header.callNumber = *call->callNumber;
1850     p->header.seq = call->tnext++;
1851     p->header.epoch = conn->epoch;
1852     p->header.type = RX_PACKET_TYPE_DATA;
1853     p->header.flags = 0;
1854     p->header.spare = 0;
1855     if (conn->type == RX_CLIENT_CONNECTION)
1856       p->header.flags |= RX_CLIENT_INITIATED;
1857
1858     if (last)
1859       p->header.flags |= RX_LAST_PACKET;
1860
1861     clock_Zero(&p->retryTime); /* Never yet transmitted */
1862     clock_Zero(&p->firstSent); /* Never yet transmitted */
1863     p->header.serial = 0;      /* Another way of saying never transmitted... */
1864     p->backoff = 0;
1865
1866     /* Now that we're sure this is the last data on the call, make sure
1867      * that the "length" and the sum of the iov_lens matches. */
1868     len = p->length + call->conn->securityHeaderSize;
1869
1870     for (i=1; i < p->niovecs && len > 0; i++) {
1871       len -=  p->wirevec[i].iov_len;
1872     }
1873     if (len > 0) {
1874       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1875     }
1876     else {
1877       /* Free any extra elements in the wirevec */
1878       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1879         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1880       }
1881       p->niovecs = i;
1882       p->wirevec[i-1].iov_len += len;
1883     }
1884     RXS_PreparePacket(conn->securityObject, call, p);
1885 }
1886
1887 /* Given an interface MTU size, calculate an adjusted MTU size that
1888  * will make efficient use of the RX buffers when the peer is sending
1889  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1890 int rxi_AdjustIfMTU(int mtu)
1891 {
1892     int adjMTU;
1893     int frags;
1894
1895     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1896     if (mtu <= adjMTU) {
1897         return mtu;
1898     }
1899     mtu -= adjMTU;
1900     if (mtu <= 0) {
1901         return adjMTU;
1902     }
1903     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1904     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1905 }
1906
1907 /* Given an interface MTU size, and the peer's advertised max receive
1908  * size, calculate an adjisted maxMTU size that makes efficient use
1909  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1910 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1911 {
1912     int maxMTU = mtu * rxi_nSendFrags;
1913     maxMTU = MIN(maxMTU, peerMaxMTU);
1914     return rxi_AdjustIfMTU(maxMTU);
1915 }
1916
1917 /* Given a packet size, figure out how many datagram packet will fit.
1918  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1919  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1920  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1921 int rxi_AdjustDgramPackets(int frags, int mtu)
1922 {
1923     int maxMTU;
1924     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1925         return 1;
1926     }
1927     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1928     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1929     /* subtract the size of the first and last packets */
1930     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1931     if (maxMTU < 0) {
1932         return 1;
1933     }
1934     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1935 }