src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID("$Header$");
  18
  19 #ifdef KERNEL
  20 #if defined(UKERNEL)
  21 #include "afs/sysincludes.h"
  22 #include "afsincludes.h"
  23 #include "rx/rx_kcommon.h"
  24 #include "rx/rx_clock.h"
  25 #include "rx/rx_queue.h"
  26 #include "rx/rx_packet.h"
  27 #else /* defined(UKERNEL) */
  28 #include "h/types.h"
  29 #ifndef AFS_LINUX20_ENV
  30 #include "h/systm.h"
  31 #endif
  32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  33 #include "afs/sysincludes.h"
  34 #endif
  35 #if defined(AFS_OBSD_ENV)
  36 #include "h/proc.h"
  37 #endif
  38 #include "h/socket.h"
  39 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV)
  40 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  41 #include "sys/mount.h"   /* it gets pulled in by something later anyway */
  42 #endif
  43 #include "h/mbuf.h"
  44 #endif
  45 #include "netinet/in.h"
  46 #include "afs/afs_osi.h"
  47 #include "rx_kmutex.h"
  48 #include "rx/rx_clock.h"
  49 #include "rx/rx_queue.h"
  50 #ifdef  AFS_SUN5_ENV
  51 #include <sys/sysmacros.h>
  52 #endif
  53 #include "rx/rx_packet.h"
  54 #endif /* defined(UKERNEL) */
  55 #include "rx/rx_globals.h"
  56 #else /* KERNEL */
  57 #include "sys/types.h"
  58 #include <sys/stat.h>
  59 #include <errno.h>
  60 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  61 #ifdef AFS_NT40_ENV
  62 #include <winsock2.h>
  63 #else
  64 #include <sys/socket.h>
  65 #include <netinet/in.h>
  66 #endif /* AFS_NT40_ENV */
  67 #include "rx_xmit_nt.h"
  68 #include <stdlib.h>
  69 #else
  70 #include <sys/socket.h>
  71 #include <netinet/in.h>
  72 #endif
  73 #include "rx_clock.h"
  74 #include "rx.h"
  75 #include "rx_queue.h"
  76 #ifdef  AFS_SUN5_ENV
  77 #include <sys/sysmacros.h>
  78 #endif
  79 #include "rx_packet.h"
  80 #include "rx_globals.h"
  81 #include <lwp.h>
  82 #ifdef HAVE_STRING_H
  83 #include <string.h>
  84 #else
  85 #ifdef HAVE_STRINGS_H
  86 #include <strings.h>
  87 #endif
  88 #endif
  89 #ifdef HAVE_UNISTD_H
  90 #include <unistd.h>
  91 #endif
  92 #endif /* KERNEL */
  93
  94 #ifdef RX_LOCKS_DB
  95 /* rxdb_fileID is used to identify the lock location, along with line#. */
  96 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
  97 #endif /* RX_LOCKS_DB */
  98 struct rx_packet *rx_mallocedP = 0;
  99
 100 extern char cml_version_number[];
 101 extern int (*rx_almostSent)();
 102
 103 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 104                                afs_int32 ahost, short aport, afs_int32 istack);
 105
 106 /* some rules about packets:
 107  * 1.  When a packet is allocated, the final iov_buf contains room for
 108  * a security trailer, but iov_len masks that fact.  If the security
 109  * package wants to add the trailer, it may do so, and then extend
 110  * iov_len appropriately.  For this reason, packet's niovecs and
 111  * iov_len fields should be accurate before calling PreparePacket.
 112 */
 113
 114 /* Preconditions:
 115  *        all packet buffers (iov_base) are integral multiples of
 116  *        the word size.
 117  *        offset is an integral multiple of the word size.
 118  */
 119 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 120 {
 121   unsigned int i;
 122   size_t l;
 123   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 124     if (l + packet->wirevec[i].iov_len > offset) {
 125       return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
 126     }
 127     l += packet->wirevec[i].iov_len;
 128   }
 129
 130   return 0;
 131 }
 132
 133 /* Preconditions:
 134  *        all packet buffers (iov_base) are integral multiples of the word size.
 135  *        offset is an integral multiple of the word size.
 136  */
 137 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
 138 {
 139   unsigned int i;
 140   size_t l;
 141   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 142     if (l + packet->wirevec[i].iov_len > offset) {
 143       *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
 144           data;
 145       return 0;
 146     }
 147     l += packet->wirevec[i].iov_len;
 148   }
 149
 150   return 0;
 151 }
 152
 153 /* Preconditions:
 154  *        all packet buffers (iov_base) are integral multiples of the
 155  *        word size.
 156  *        offset is an integral multiple of the word size.
 157  * Packet Invariants:
 158  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 159  */
 160 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
 161         int resid, char *out)
 162 {
 163   unsigned int i, j, l, r;
 164   for (l=0, i=1; i< packet->niovecs ; i++ ) {
 165     if (l + packet->wirevec[i].iov_len > offset) {
 166       break;
 167     }
 168     l += packet->wirevec[i].iov_len;
 169   }
 170
 171   /* i is the iovec which contains the first little bit of data in which we
 172    * are interested.  l is the total length of everything prior to this iovec.
 173    * j is the number of bytes we can safely copy out of this iovec.
 174    */
 175   r = resid;
 176   while ((resid > 0) && (i < packet->niovecs)) {
 177     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 178     memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 179     resid -= j;
 180     l += packet->wirevec[i].iov_len;
 181     i++;
 182   }
 183
 184   return (resid ? (r - resid) : r);
 185 }
 186
 187
 188 /* Preconditions:
 189  *        all packet buffers (iov_base) are integral multiples of the
 190  *        word size.
 191  *        offset is an integral multiple of the word size.
 192  */
 193 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
 194         char *in)
 195 {
 196   int i, j, l, r;
 197   char * b;
 198
 199   for (l=0, i=1; i < packet->niovecs; i++ ) {
 200     if (l + packet->wirevec[i].iov_len > offset) {
 201       break;
 202     }
 203     l += packet->wirevec[i].iov_len;
 204   }
 205
 206   /* i is the iovec which contains the first little bit of data in which we
 207    * are interested.  l is the total length of everything prior to this iovec.
 208    * j is the number of bytes we can safely copy out of this iovec.
 209    */
 210   r = resid;
 211   while ((resid > 0) && (i < RX_MAXWVECS)) {
 212     if (i >= packet->niovecs)
 213       if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
 214         break;
 215
 216     b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
 217     j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
 218     memcpy(b, in, j);
 219     resid -= j;
 220     l += packet->wirevec[i].iov_len;
 221     i++;
 222   }
 223
 224   return (resid ? (r - resid) : r);
 225 }
 226
 227 static struct rx_packet *allocCBuf(int class)
 228 {
 229   struct rx_packet *c;
 230   SPLVAR;
 231
 232   NETPRI;
 233   MUTEX_ENTER(&rx_freePktQ_lock);
 234
 235 #ifdef KERNEL
 236   if (rxi_OverQuota(class)) {
 237     c = NULL;
 238     rxi_NeedMorePackets = TRUE;
 239     MUTEX_ENTER(&rx_stats_mutex);
 240     switch(class) {
 241         case RX_PACKET_CLASS_RECEIVE:
 242             rx_stats.receivePktAllocFailures++;
 243             break;
 244         case RX_PACKET_CLASS_SEND:
 245             rx_stats.sendPktAllocFailures++;
 246             break;
 247         case RX_PACKET_CLASS_SPECIAL:
 248             rx_stats.specialPktAllocFailures++;
 249             break;
 250         case RX_PACKET_CLASS_RECV_CBUF:
 251             rx_stats.receiveCbufPktAllocFailures++;
 252             break;
 253         case RX_PACKET_CLASS_SEND_CBUF:
 254             rx_stats.sendCbufPktAllocFailures++;
 255             break;
 256     }
 257     MUTEX_EXIT(&rx_stats_mutex);
 258     goto done;
 259   }
 260
 261   if (queue_IsEmpty(&rx_freePacketQueue)) {
 262     c = NULL;
 263     rxi_NeedMorePackets = TRUE;
 264     goto done;
 265   }
 266 #else /* KERNEL */
 267   if (queue_IsEmpty(&rx_freePacketQueue)) {
 268     rxi_MorePacketsNoLock(rx_initSendWindow);
 269   }
 270 #endif /* KERNEL */
 271
 272   rx_nFreePackets--;
 273   c = queue_First(&rx_freePacketQueue, rx_packet);
 274   queue_Remove(c);
 275   if (!(c->flags & RX_PKTFLAG_FREE))
 276     osi_Panic("rxi_AllocPacket: packet not free\n");
 277   c->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 278   c->header.flags = 0;
 279
 280 #ifdef KERNEL
 281  done:
 282 #endif
 283   MUTEX_EXIT(&rx_freePktQ_lock);
 284
 285   USERPRI;
 286   return c;
 287 }
 288
 289 /*
 290  * Free a packet currently used as a continuation buffer
 291  */
 292 void rxi_freeCBuf(struct rx_packet *c)
 293 {
 294   SPLVAR;
 295
 296   NETPRI;
 297   MUTEX_ENTER(&rx_freePktQ_lock);
 298
 299   rxi_FreePacketNoLock(c);
 300   /* Wakeup anyone waiting for packets */
 301   rxi_PacketsUnWait();
 302
 303   MUTEX_EXIT(&rx_freePktQ_lock);
 304   USERPRI;
 305 }
 306
 307 /* this one is kind of awful.
 308  * In rxkad, the packet has been all shortened, and everything, ready for
 309  * sending.  All of a sudden, we discover we need some of that space back.
 310  * This isn't terribly general, because it knows that the packets are only
 311  * rounded up to the EBS (userdata + security header).
 312  */
 313 int rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 314 {
 315   int i;
 316   i = p->niovecs - 1;
 317   if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 318     if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 319       p->wirevec[i].iov_len += nb;
 320       return 0;
 321     }
 322   }
 323   else {
 324     if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 325       p->wirevec[i].iov_len += nb;
 326       return 0;
 327     }
 328   }
 329
 330 return 0;
 331 }
 332 /* get sufficient space to store nb bytes of data (or more), and hook
 333  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 334  * returns the number of bytes >0 which it failed to come up with.
 335  * Don't need to worry about locking on packet, since only
 336  * one thread can manipulate one at a time. Locking on continution
 337  * packets is handled by allocCBuf */
 338 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 339 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 340 {
 341   int i;
 342
 343   for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
 344       register struct rx_packet *cb;
 345       if ((cb = allocCBuf(class))) {
 346           p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 347           p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 348           nb -= RX_CBUFFERSIZE;
 349           p->length += RX_CBUFFERSIZE;
 350           p->niovecs++;
 351       }
 352       else break;
 353   }
 354
 355   return nb;
 356 }
 357
 358 /* Add more packet buffers */
 359 void rxi_MorePackets(int apackets)
 360 {
 361   struct rx_packet *p, *e;
 362   int getme;
 363   SPLVAR;
 364
 365   getme = apackets * sizeof(struct rx_packet);
 366   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 367
 368   PIN(p, getme);        /* XXXXX */
 369   memset((char *)p, 0, getme);
 370   NETPRI;
 371   AFS_RXGLOCK();
 372   MUTEX_ENTER(&rx_freePktQ_lock);
 373
 374   for (e = p + apackets; p<e; p++) {
 375     p->wirevec[0].iov_base = (char *) (p->wirehead);
 376     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 377     p->wirevec[1].iov_base = (char *) (p->localdata);
 378     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 379     p->flags |= RX_PKTFLAG_FREE;
 380     p->niovecs = 2;
 381
 382     queue_Append(&rx_freePacketQueue, p);
 383   }
 384   rx_nFreePackets += apackets;
 385   rxi_NeedMorePackets = FALSE;
 386   rxi_PacketsUnWait();
 387
 388   AFS_RXGUNLOCK();
 389   MUTEX_EXIT(&rx_freePktQ_lock);
 390   USERPRI;
 391 }
 392
 393 #ifndef KERNEL
 394 /* Add more packet buffers */
 395 void rxi_MorePacketsNoLock(int apackets)
 396 {
 397   struct rx_packet *p, *e;
 398   int getme;
 399
 400   /* allocate enough packets that 1/4 of the packets will be able
 401    * to hold maximal amounts of data */
 402   apackets += (apackets/4)
 403               * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
 404   getme = apackets * sizeof(struct rx_packet);
 405   p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
 406
 407   memset((char *)p, 0, getme);
 408
 409   for (e = p + apackets; p<e; p++) {
 410     p->wirevec[0].iov_base = (char *) (p->wirehead);
 411     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 412     p->wirevec[1].iov_base = (char *) (p->localdata);
 413     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 414     p->flags |= RX_PKTFLAG_FREE;
 415     p->niovecs = 2;
 416
 417     queue_Append(&rx_freePacketQueue, p);
 418   }
 419   rx_nFreePackets += apackets;
 420   rxi_NeedMorePackets = FALSE;
 421   rxi_PacketsUnWait();
 422 }
 423 #endif /* !KERNEL */
 424
 425 void rxi_FreeAllPackets(void)
 426 {
 427   /* must be called at proper interrupt level, etcetera */
 428   /* MTUXXX need to free all Packets */
 429   osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 430   UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
 431 }
 432
 433 /* Allocate more packets iff we need more continuation buffers */
 434 /* In kernel, can't page in memory with interrupts disabled, so we
 435  * don't use the event mechanism. */
 436 void rx_CheckPackets(void)
 437 {
 438   if (rxi_NeedMorePackets) {
 439     rxi_MorePackets(rx_initSendWindow);
 440   }
 441 }
 442
 443 /* In the packet freeing routine below, the assumption is that
 444    we want all of the packets to be used equally frequently, so that we
 445    don't get packet buffers paging out.  It would be just as valid to
 446    assume that we DO want them to page out if not many are being used.
 447    In any event, we assume the former, and append the packets to the end
 448    of the free list.  */
 449 /* This explanation is bogus.  The free list doesn't remain in any kind of
 450    useful order for afs_int32: the packets in use get pretty much randomly scattered
 451    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 452    must be stored so that packets which are adjacent in memory are adjacent in the
 453    free list.  An array springs rapidly to mind.
 454    */
 455
 456 /* Actually free the packet p. */
 457 void rxi_FreePacketNoLock(struct rx_packet *p)
 458 {
 459   dpf(("Free %x\n", p));
 460
 461   if (p->flags & RX_PKTFLAG_FREE)
 462     osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 463   rx_nFreePackets++;
 464   p->flags |= RX_PKTFLAG_FREE;
 465   queue_Append(&rx_freePacketQueue, p);
 466 }
 467
 468 int rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 469 {
 470   struct iovec *iov, *end;
 471
 472   if (first != 1)          /* MTUXXX */
 473       osi_Panic("FreeDataBufs 1: first must be 1");
 474   iov = &p->wirevec[1];
 475   end = iov + (p->niovecs-1);
 476   if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
 477         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 478   for (iov++ ; iov < end ; iov++) {
 479     if (!iov->iov_base)
 480         osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 481     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 482   }
 483   p->length = 0;
 484   p->niovecs = 0;
 485
 486   return 0;
 487 }
 488
 489 int rxi_nBadIovecs = 0;
 490
 491 /* rxi_RestoreDataBufs
 492  *
 493  * Restore the correct sizes to the iovecs. Called when reusing a packet
 494  * for reading off the wire.
 495  */
 496 void rxi_RestoreDataBufs(struct rx_packet *p)
 497 {
 498     int i;
 499     struct iovec *iov = &p->wirevec[2];
 500
 501     p->wirevec[0].iov_base = (char *) (p->wirehead);
 502     p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 503     p->wirevec[1].iov_base = (char *) (p->localdata);
 504     p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 505
 506     for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 507         if (!iov->iov_base) {
 508             rxi_nBadIovecs ++;
 509             p->niovecs = i;
 510             break;
 511         }
 512         iov->iov_len = RX_CBUFFERSIZE;
 513     }
 514 }
 515
 516 int rxi_TrimDataBufs(struct rx_packet *p, int first)
 517 {
 518   int length;
 519   struct iovec *iov, *end;
 520   SPLVAR;
 521
 522   if (first != 1)
 523       osi_Panic("TrimDataBufs 1: first must be 1");
 524
 525   /* Skip over continuation buffers containing message data */
 526   iov = &p->wirevec[2];
 527   end = iov + (p->niovecs-2);
 528   length = p->length - p->wirevec[1].iov_len;
 529   for (; iov < end && length > 0 ; iov++) {
 530     if (!iov->iov_base)
 531         osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 532     length -= iov->iov_len;
 533   }
 534
 535   /* iov now points to the first empty data buffer. */
 536   if (iov >= end)
 537     return 0;
 538
 539   NETPRI;
 540   MUTEX_ENTER(&rx_freePktQ_lock);
 541
 542   for (; iov < end ; iov++) {
 543     if (!iov->iov_base)
 544         osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 545     rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 546     p->niovecs--;
 547   }
 548   rxi_PacketsUnWait();
 549
 550   MUTEX_EXIT(&rx_freePktQ_lock);
 551   USERPRI;
 552
 553   return 0;
 554 }
 555
 556 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 557  * remove it yourself first if you call this routine. */
 558 void rxi_FreePacket(struct rx_packet *p)
 559 {
 560   SPLVAR;
 561
 562   NETPRI;
 563   MUTEX_ENTER(&rx_freePktQ_lock);
 564
 565   rxi_FreeDataBufsNoLock(p,1);
 566   rxi_FreePacketNoLock(p);
 567   /* Wakeup anyone waiting for packets */
 568   rxi_PacketsUnWait();
 569
 570   MUTEX_EXIT(&rx_freePktQ_lock);
 571   USERPRI;
 572 }
 573
 574
 575 /* rxi_AllocPacket sets up p->length so it reflects the number of
 576  * bytes in the packet at this point, **not including** the header.
 577  * The header is absolutely necessary, besides, this is the way the
 578  * length field is usually used */
 579 struct rx_packet *rxi_AllocPacketNoLock(int class)
 580 {
 581   register struct rx_packet *p;
 582
 583 #ifdef KERNEL
 584   if (rxi_OverQuota(class)) {
 585     rxi_NeedMorePackets = TRUE;
 586     MUTEX_ENTER(&rx_stats_mutex);
 587     switch(class) {
 588         case RX_PACKET_CLASS_RECEIVE:
 589             rx_stats.receivePktAllocFailures++;
 590             break;
 591         case RX_PACKET_CLASS_SEND:
 592             rx_stats.sendPktAllocFailures++;
 593             break;
 594         case RX_PACKET_CLASS_SPECIAL:
 595             rx_stats.specialPktAllocFailures++;
 596             break;
 597         case RX_PACKET_CLASS_RECV_CBUF:
 598             rx_stats.receiveCbufPktAllocFailures++;
 599             break;
 600         case RX_PACKET_CLASS_SEND_CBUF:
 601             rx_stats.sendCbufPktAllocFailures++;
 602             break;
 603     }
 604     MUTEX_EXIT(&rx_stats_mutex);
 605     return (struct rx_packet *) 0;
 606   }
 607 #endif /* KERNEL */
 608
 609   MUTEX_ENTER(&rx_stats_mutex);
 610   rx_stats.packetRequests++;
 611   MUTEX_EXIT(&rx_stats_mutex);
 612
 613 #ifdef KERNEL
 614   if (queue_IsEmpty(&rx_freePacketQueue))
 615     osi_Panic("rxi_AllocPacket error");
 616 #else /* KERNEL */
 617   if (queue_IsEmpty(&rx_freePacketQueue))
 618     rxi_MorePacketsNoLock(rx_initSendWindow);
 619 #endif /* KERNEL */
 620
 621   rx_nFreePackets--;
 622   p = queue_First(&rx_freePacketQueue, rx_packet);
 623   if (!(p->flags & RX_PKTFLAG_FREE))
 624     osi_Panic("rxi_AllocPacket: packet not free\n");
 625
 626   dpf(("Alloc %x, class %d\n", p, class));
 627
 628   queue_Remove(p);
 629   p->flags = 0;         /* clear RX_PKTFLAG_FREE, initialize the rest */
 630   p->header.flags = 0;
 631
 632   /* have to do this here because rx_FlushWrite fiddles with the iovs in
 633    * order to truncate outbound packets.  In the near future, may need
 634    * to allocate bufs from a static pool here, and/or in AllocSendPacket
 635    */
 636   p->wirevec[0].iov_base = (char *) (p->wirehead);
 637   p->wirevec[0].iov_len  = RX_HEADER_SIZE;
 638   p->wirevec[1].iov_base = (char *) (p->localdata);
 639   p->wirevec[1].iov_len  = RX_FIRSTBUFFERSIZE;
 640   p->niovecs = 2;
 641   p->length = RX_FIRSTBUFFERSIZE;
 642   return p;
 643 }
 644
 645 struct rx_packet *rxi_AllocPacket(int class)
 646 {
 647     register struct rx_packet *p;
 648
 649     MUTEX_ENTER(&rx_freePktQ_lock);
 650     p = rxi_AllocPacketNoLock(class);
 651     MUTEX_EXIT(&rx_freePktQ_lock);
 652     return p;
 653 }
 654
 655 /* This guy comes up with as many buffers as it {takes,can get} given
 656  * the MTU for this call. It also sets the packet length before
 657  * returning.  caution: this is often called at NETPRI
 658  * Called with call locked.
 659  */
 660 struct rx_packet *rxi_AllocSendPacket(register struct rx_call *call, int want)
 661 {
 662     register struct rx_packet *p = (struct rx_packet *) 0;
 663     register int mud;
 664     register unsigned delta;
 665
 666     SPLVAR;
 667     mud = call->MTU - RX_HEADER_SIZE;
 668     delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 669         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 670
 671     while (!(call->error)) {
 672       MUTEX_ENTER(&rx_freePktQ_lock);
 673       /* if an error occurred, or we get the packet we want, we're done */
 674       if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 675         MUTEX_EXIT(&rx_freePktQ_lock);
 676
 677         want += delta;
 678         want = MIN(want, mud);
 679
 680         if ((unsigned) want > p->length)
 681           (void) rxi_AllocDataBuf(p, (want - p->length),
 682                                   RX_PACKET_CLASS_SEND_CBUF);
 683
 684         if ((unsigned) p->length > mud)
 685             p->length = mud;
 686
 687         if (delta >= p->length) {
 688           rxi_FreePacket(p);
 689           p = NULL;
 690         } else {
 691             p->length -= delta;
 692         }
 693         break;
 694       }
 695
 696       /* no error occurred, and we didn't get a packet, so we sleep.
 697        * At this point, we assume that packets will be returned
 698        * sooner or later, as packets are acknowledged, and so we
 699        * just wait.  */
 700       NETPRI;
 701       call->flags |= RX_CALL_WAIT_PACKETS;
 702       CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 703       MUTEX_EXIT(&call->lock);
 704       rx_waitingForPackets = 1;
 705
 706 #ifdef  RX_ENABLE_LOCKS
 707       CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 708 #else
 709       osi_rxSleep(&rx_waitingForPackets);
 710 #endif
 711       MUTEX_EXIT(&rx_freePktQ_lock);
 712       MUTEX_ENTER(&call->lock);
 713       CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 714       call->flags &= ~RX_CALL_WAIT_PACKETS;
 715       USERPRI;
 716     }
 717
 718     return p;
 719 }
 720
 721 #ifndef KERNEL
 722
 723 /* count the number of used FDs */
 724 static int CountFDs(register int amax)
 725 {
 726     struct stat tstat;
 727     register int i, code;
 728     register int count;
 729
 730     count = 0;
 731     for(i=0;i<amax;i++) {
 732         code = fstat(i, &tstat);
 733         if (code == 0) count++;
 734     }
 735     return count;
 736 }
 737
 738 #else /* KERNEL */
 739
 740 #define CountFDs(amax) amax
 741
 742 #endif /* KERNEL */
 743
 744 #if !defined(KERNEL) || defined(UKERNEL)
 745
 746 /* This function reads a single packet from the interface into the
 747  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 748  * (host,port) of the sender are stored in the supplied variables, and
 749  * the data length of the packet is stored in the packet structure.
 750  * The header is decoded. */
 751 int rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 *host, u_short *port)
 752 {
 753     struct sockaddr_in from;
 754     int nbytes;
 755     afs_int32 rlen;
 756     register afs_int32 tlen, savelen;
 757     struct msghdr msg;
 758     rx_computelen(p, tlen);
 759     rx_SetDataSize(p, tlen);  /* this is the size of the user data area */
 760
 761     tlen += RX_HEADER_SIZE;   /* now this is the size of the entire packet */
 762     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 763                                  * it once in order to avoid races.  */
 764     tlen = rlen - tlen;
 765     if (tlen > 0) {
 766       tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 767       if (tlen >0) {
 768         tlen = rlen - tlen;
 769       }
 770       else tlen = rlen;
 771     }
 772     else tlen = rlen;
 773
 774    /* Extend the last iovec for padding, it's just to make sure that the
 775     * read doesn't return more data than we expect, and is done to get around
 776     * our problems caused by the lack of a length field in the rx header.
 777     * Use the extra buffer that follows the localdata in each packet
 778     * structure. */
 779     savelen = p->wirevec[p->niovecs-1].iov_len;
 780     p->wirevec[p->niovecs-1].iov_len += RX_EXTRABUFFERSIZE;
 781
 782     memset((char *)&msg, 0, sizeof(msg));
 783     msg.msg_name = (char *) &from;
 784     msg.msg_namelen = sizeof(struct sockaddr_in);
 785     msg.msg_iov = p->wirevec;
 786     msg.msg_iovlen = p->niovecs;
 787     nbytes = rxi_Recvmsg(socket, &msg, 0);
 788
 789    /* restore the vec to its correct state */
 790     p->wirevec[p->niovecs-1].iov_len = savelen;
 791
 792     p->length = (nbytes - RX_HEADER_SIZE);
 793     if ((nbytes > tlen) || (p->length  & 0x8000)) {  /* Bogus packet */
 794       if (nbytes > 0)
 795         rxi_MorePackets(rx_initSendWindow);
 796 #ifndef AFS_NT40_ENV
 797       else if (nbytes < 0 && errno == EWOULDBLOCK) {
 798         MUTEX_ENTER(&rx_stats_mutex);
 799         rx_stats.noPacketOnRead++;
 800         MUTEX_EXIT(&rx_stats_mutex);
 801       }
 802 #endif
 803       else {
 804         MUTEX_ENTER(&rx_stats_mutex);
 805         rx_stats.bogusPacketOnRead++;
 806         rx_stats.bogusHost = from.sin_addr.s_addr;
 807         MUTEX_EXIT(&rx_stats_mutex);
 808         dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 809              from.sin_port,nbytes));
 810       }
 811       return  0;
 812     }
 813     else {
 814       /* Extract packet header. */
 815       rxi_DecodePacketHeader(p);
 816
 817       *host = from.sin_addr.s_addr;
 818       *port = from.sin_port;
 819       if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 820         struct rx_peer *peer;
 821         MUTEX_ENTER(&rx_stats_mutex);
 822         rx_stats.packetsRead[p->header.type-1]++;
 823         MUTEX_EXIT(&rx_stats_mutex);
 824         /*
 825          * Try to look up this peer structure.  If it doesn't exist,
 826          * don't create a new one -
 827          * we don't keep count of the bytes sent/received if a peer
 828          * structure doesn't already exist.
 829          *
 830          * The peer/connection cleanup code assumes that there is 1 peer
 831          * per connection.  If we actually created a peer structure here
 832          * and this packet was an rxdebug packet, the peer structure would
 833          * never be cleaned up.
 834          */
 835         peer = rxi_FindPeer(*host, *port, 0, 0);
 836         if (peer) {
 837             MUTEX_ENTER(&peer->peer_lock);
 838             hadd32(peer->bytesReceived, p->length);
 839             MUTEX_EXIT(&peer->peer_lock);
 840         }
 841       }
 842
 843       /* Free any empty packet buffers at the end of this packet */
 844       rxi_TrimDataBufs(p, 1);
 845
 846       return  1;
 847     }
 848 }
 849
 850 #endif /* !KERNEL || UKERNEL */
 851
 852 /* This function splits off the first packet in a jumbo packet.
 853  * As of AFS 3.5, jumbograms contain more than one fixed size
 854  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 855  * last packet header. All packets (except the last) are padded to
 856  * fall on RX_CBUFFERSIZE boundaries.
 857  * HACK: We store the length of the first n-1 packets in the
 858  * last two pad bytes. */
 859
 860 struct rx_packet *rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host,
 861         short port, int first)
 862 {
 863     struct rx_packet *np;
 864     struct rx_jumboHeader *jp;
 865     int niov, i;
 866     struct iovec *iov;
 867     int length;
 868     afs_uint32 temp;
 869
 870     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 871      * bytes in length. All but the first packet are preceded by
 872      * an abbreviated four byte header. The length of the last packet
 873      * is calculated from the size of the jumbogram. */
 874     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 875
 876     if ((int)p->length < length) {
 877         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 878         return NULL;
 879     }
 880     niov = p->niovecs - 2;
 881     if (niov < 1) {
 882         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 883         return NULL;
 884     }
 885     iov = &p->wirevec[2];
 886     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 887
 888     /* Get a pointer to the abbreviated packet header */
 889     jp = (struct rx_jumboHeader *)
 890          ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 891
 892     /* Set up the iovecs for the next packet */
 893     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 894     np->wirevec[0].iov_len = sizeof(struct rx_header);
 895     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 896     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 897     np->niovecs = niov+1;
 898     for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
 899         np->wirevec[i] = *iov;
 900     }
 901     np->length = p->length - length;
 902     p->length = RX_JUMBOBUFFERSIZE;
 903     p->niovecs = 2;
 904
 905     /* Convert the jumbo packet header to host byte order */
 906     temp = ntohl(*(afs_uint32 *)jp);
 907     jp->flags = (u_char)(temp >> 24);
 908     jp->cksum = (u_short)(temp);
 909
 910     /* Fill in the packet header */
 911     np->header = p->header;
 912     np->header.serial = p->header.serial + 1;
 913     np->header.seq = p->header.seq + 1;
 914     np->header.flags = jp->flags;
 915     np->header.spare = jp->cksum;
 916
 917     return np;
 918 }
 919
 920 #ifndef KERNEL
 921 /* Send a udp datagram */
 922 int osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
 923         int length, int istack)
 924 {
 925     struct msghdr msg;
 926
 927     memset(&msg, 0, sizeof(msg));
 928     msg.msg_iov = dvec;
 929     msg.msg_iovlen = nvecs;
 930     msg.msg_name = addr;
 931     msg.msg_namelen = sizeof(struct sockaddr_in);
 932
 933     rxi_Sendmsg(socket, &msg, 0);
 934
 935     return 0;
 936 }
 937 #elif !defined(UKERNEL)
 938 /*
 939  * message receipt is done in rxk_input or rx_put.
 940  */
 941
 942 #ifdef AFS_SUN5_ENV
 943 /*
 944  * Copy an mblock to the contiguous area pointed to by cp.
 945  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 946  * but it doesn't really.
 947  * Returns the number of bytes not transferred.
 948  * The message is NOT changed.
 949  */
 950 static int cpytoc(mblk_t *mp, register int off, register int len, register char *cp)
 951 {
 952     register int n;
 953
 954     for (;mp && len > 0; mp = mp->b_cont) {
 955         if (mp->b_datap->db_type != M_DATA) {
 956             return -1;
 957         }
 958         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 959         memcpy(cp, (char *)mp->b_rptr, n);
 960         cp += n;
 961         len -= n;
 962         mp->b_rptr += n;
 963     }
 964     return (len);
 965 }
 966
 967 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 968  * but it doesn't really.
 969  * This sucks, anyway, do it like m_cpy.... below
 970  */
 971 static int cpytoiovec(mblk_t *mp, int off, int len, register struct iovec *iovs, int niovs)
 972 {
 973     register int m,n,o,t,i;
 974
 975     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
 976         if (mp->b_datap->db_type != M_DATA) {
 977             return -1;
 978         }
 979         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 980         len -= n;
 981         while (n) {
 982           if (!t) {
 983             o=0;
 984             i++;
 985             t = iovs[i].iov_len;
 986           }
 987           m = MIN(n,t);
 988           memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
 989           mp->b_rptr += m;
 990           o += m;
 991           t -= m;
 992           n -= m;
 993         }
 994     }
 995     return (len);
 996 }
 997 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
 998 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
 999 #else
1000 #if !defined(AFS_LINUX20_ENV)
1001 static int m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1002 {
1003   caddr_t p1, p2;
1004   unsigned int l1, l2, i, t;
1005
1006   if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1007     osi_Panic("m_cpytoiovec");  /* MTUXXX probably don't need this check */
1008
1009   while (off && m)
1010     if (m->m_len <= off) {
1011       off -= m->m_len;
1012       m = m->m_next;
1013       continue;
1014     } else
1015       break;
1016
1017   if (m == NULL)
1018     return len;
1019
1020   p1 = mtod(m, caddr_t)+off;
1021   l1 = m->m_len - off;
1022   i = 0;
1023   p2 = iovs[0].iov_base;
1024   l2 = iovs[0].iov_len;
1025
1026   while (len) {
1027     t = MIN(l1, MIN(l2, (unsigned int)len));
1028     memcpy(p2, p1, t);
1029     p1 += t;    p2 += t;
1030     l1 -= t;    l2 -= t;
1031     len -= t;
1032     if (!l1) {
1033       m = m->m_next;
1034       if (!m)
1035         break;
1036       p1 = mtod(m, caddr_t);
1037       l1 = m->m_len;
1038     }
1039     if (!l2) {
1040       if (++i >= niovs)
1041         break;
1042       p2 = iovs[i].iov_base;
1043       l2 = iovs[i].iov_len;
1044     }
1045
1046   }
1047
1048 return len;
1049 }
1050 #endif /* LINUX */
1051 #endif /* AFS_SUN5_ENV */
1052
1053 #if !defined(AFS_LINUX20_ENV)
1054 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1055 #ifdef  AFS_SUN5_ENV
1056 mblk_t *amb;
1057 #else
1058 struct mbuf *amb;
1059 #endif
1060 void (*free)();
1061 struct rx_packet *phandle;
1062 int hdr_len, data_len;
1063 {
1064   register int code;
1065
1066   code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1067   (*free)(amb);
1068
1069   return code;
1070 }
1071 #endif /* LINUX */
1072 #endif /*KERNEL && !UKERNEL*/
1073
1074
1075 /* send a response to a debug packet */
1076
1077 struct rx_packet *rxi_ReceiveDebugPacket(register struct rx_packet *ap,
1078         osi_socket asocket, afs_int32 ahost, short aport, int istack)
1079 {
1080     struct rx_debugIn tin;
1081     afs_int32 tl;
1082     struct rx_serverQueueEntry *np, *nqe;
1083
1084     /*
1085      * Only respond to client-initiated Rx debug packets,
1086      * and clear the client flag in the response.
1087      */
1088     if (ap->header.flags & RX_CLIENT_INITIATED) {
1089         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1090         rxi_EncodePacketHeader(ap);
1091     } else {
1092         return ap;
1093     }
1094
1095     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1096     /* all done with packet, now set length to the truth, so we can
1097      * reuse this packet */
1098     rx_computelen(ap, ap->length);
1099
1100     tin.type = ntohl(tin.type);
1101     tin.index = ntohl(tin.index);
1102     switch(tin.type) {
1103         case RX_DEBUGI_GETSTATS: {
1104             struct rx_debugStats tstat;
1105
1106             /* get basic stats */
1107             memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1108             tstat.version = RX_DEBUGI_VERSION;
1109 #ifndef RX_ENABLE_LOCKS
1110             tstat.waitingForPackets = rx_waitingForPackets;
1111 #endif
1112             tstat.nFreePackets = htonl(rx_nFreePackets);
1113             tstat.callsExecuted = htonl(rxi_nCalls);
1114             tstat.packetReclaims = htonl(rx_packetReclaims);
1115             tstat.usedFDs = CountFDs(64);
1116             tstat.nWaiting = htonl(rx_nWaiting);
1117             queue_Count( &rx_idleServerQueue, np, nqe,
1118                                 rx_serverQueueEntry, tstat.idleThreads);
1119             tstat.idleThreads = htonl(tstat.idleThreads);
1120             tl = sizeof(struct rx_debugStats) - ap->length;
1121             if (tl > 0)
1122               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1123
1124             if (tl <= 0) {
1125               rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1126               ap->length = sizeof(struct rx_debugStats);
1127               rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1128               rx_computelen(ap, ap->length);
1129             }
1130             break;
1131         }
1132
1133         case RX_DEBUGI_GETALLCONN:
1134         case RX_DEBUGI_GETCONN: {
1135             int i, j;
1136             register struct rx_connection *tc;
1137             struct rx_call *tcall;
1138             struct rx_debugConn tconn;
1139             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1140
1141
1142             tl = sizeof(struct rx_debugConn) - ap->length;
1143             if (tl > 0)
1144               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1145             if (tl > 0)
1146               return ap;
1147
1148             memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1149             /* get N'th (maybe) "interesting" connection info */
1150             for(i=0;i<rx_hashTableSize;i++) {
1151 #if !defined(KERNEL)
1152                 /* the time complexity of the algorithm used here
1153                  * exponentially increses with the number of connections.
1154                  */
1155 #ifdef AFS_PTHREAD_ENV
1156                 pthread_yield();
1157 #else
1158                 (void) IOMGR_Poll();
1159 #endif
1160 #endif
1161                 MUTEX_ENTER(&rx_connHashTable_lock);
1162                 /* We might be slightly out of step since we are not
1163                  * locking each call, but this is only debugging output.
1164                  */
1165                 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1166                     if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1167                         tconn.host = tc->peer->host;
1168                         tconn.port = tc->peer->port;
1169                         tconn.cid = htonl(tc->cid);
1170                         tconn.epoch = htonl(tc->epoch);
1171                         tconn.serial = htonl(tc->serial);
1172                         for(j=0;j<RX_MAXCALLS;j++) {
1173                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1174                             if ((tcall=tc->call[j])) {
1175                                 tconn.callState[j] = tcall->state;
1176                                 tconn.callMode[j] = tcall->mode;
1177                                 tconn.callFlags[j] = tcall->flags;
1178                                 if (queue_IsNotEmpty(&tcall->rq))
1179                                     tconn.callOther[j] |= RX_OTHER_IN;
1180                                 if (queue_IsNotEmpty(&tcall->tq))
1181                                     tconn.callOther[j] |= RX_OTHER_OUT;
1182                             }
1183                             else tconn.callState[j] = RX_STATE_NOTINIT;
1184                         }
1185
1186                         tconn.natMTU = htonl(tc->peer->natMTU);
1187                         tconn.error = htonl(tc->error);
1188                         tconn.flags = tc->flags;
1189                         tconn.type = tc->type;
1190                         tconn.securityIndex = tc->securityIndex;
1191                         if (tc->securityObject) {
1192                             RXS_GetStats (tc->securityObject, tc,
1193                                           &tconn.secStats);
1194 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1195 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1196                             DOHTONL(flags);
1197                             DOHTONL(expires);
1198                             DOHTONL(packetsReceived);
1199                             DOHTONL(packetsSent);
1200                             DOHTONL(bytesReceived);
1201                             DOHTONL(bytesSent);
1202                             for (i=0;
1203                                  i<sizeof(tconn.secStats.spares)/sizeof(short);
1204                                  i++)
1205                                 DOHTONS(spares[i]);
1206                             for (i=0;
1207                                  i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1208                                  i++)
1209                                 DOHTONL(sparel[i]);
1210                         }
1211
1212                         MUTEX_EXIT(&rx_connHashTable_lock);
1213                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1214                         tl = ap->length;
1215                         ap->length = sizeof(struct rx_debugConn);
1216                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1217                         ap->length = tl;
1218                         return ap;
1219                     }
1220                 }
1221                 MUTEX_EXIT(&rx_connHashTable_lock);
1222             }
1223             /* if we make it here, there are no interesting packets */
1224             tconn.cid = htonl(0xffffffff); /* means end */
1225             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1226             tl = ap->length;
1227             ap->length = sizeof(struct rx_debugConn);
1228             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1229             ap->length = tl;
1230             break;
1231         }
1232
1233         /*
1234          * Pass back all the peer structures we have available
1235          */
1236
1237         case RX_DEBUGI_GETPEER: {
1238             int i;
1239             register struct rx_peer *tp;
1240             struct rx_debugPeer tpeer;
1241
1242
1243             tl = sizeof(struct rx_debugPeer) - ap->length;
1244             if (tl > 0)
1245               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1246             if (tl > 0)
1247               return ap;
1248
1249             memset((char *)&tpeer, 0, sizeof(tpeer));
1250             for(i=0;i<rx_hashTableSize;i++) {
1251 #if !defined(KERNEL)
1252                 /* the time complexity of the algorithm used here
1253                  * exponentially increses with the number of peers.
1254                  *
1255                  * Yielding after processing each hash table entry
1256                  * and dropping rx_peerHashTable_lock.
1257                  * also increases the risk that we will miss a new
1258                  * entry - but we are willing to live with this
1259                  * limitation since this is meant for debugging only
1260                  */
1261 #ifdef AFS_PTHREAD_ENV
1262                 pthread_yield();
1263 #else
1264                 (void) IOMGR_Poll();
1265 #endif
1266 #endif
1267                 MUTEX_ENTER(&rx_peerHashTable_lock);
1268                 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1269                     if (tin.index-- <= 0) {
1270                         tpeer.host = tp->host;
1271                         tpeer.port = tp->port;
1272                         tpeer.ifMTU = htons(tp->ifMTU);
1273                         tpeer.idleWhen = htonl(tp->idleWhen);
1274                         tpeer.refCount = htons(tp->refCount);
1275                         tpeer.burstSize = tp->burstSize;
1276                         tpeer.burst = tp->burst;
1277                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1278                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1279                         tpeer.rtt = htonl(tp->rtt);
1280                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1281                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1282                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1283                         tpeer.nSent = htonl(tp->nSent);
1284                         tpeer.reSends = htonl(tp->reSends);
1285                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1286                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1287                         tpeer.rateFlag = htonl(tp->rateFlag);
1288                         tpeer.natMTU = htons(tp->natMTU);
1289                         tpeer.maxMTU = htons(tp->maxMTU);
1290                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1291                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1292                         tpeer.MTU = htons(tp->MTU);
1293                         tpeer.cwind = htons(tp->cwind);
1294                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1295                         tpeer.congestSeq = htons(tp->congestSeq);
1296                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1297                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1298                         tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1299                         tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1300
1301                         MUTEX_EXIT(&rx_peerHashTable_lock);
1302                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1303                         tl = ap->length;
1304                         ap->length = sizeof(struct rx_debugPeer);
1305                         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1306                         ap->length = tl;
1307                         return ap;
1308                     }
1309                 }
1310                 MUTEX_EXIT(&rx_peerHashTable_lock);
1311             }
1312             /* if we make it here, there are no interesting packets */
1313             tpeer.host = htonl(0xffffffff); /* means end */
1314             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1315             tl = ap->length;
1316             ap->length = sizeof(struct rx_debugPeer);
1317             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1318             ap->length = tl;
1319             break;
1320         }
1321
1322         case RX_DEBUGI_RXSTATS: {
1323             int i;
1324             afs_int32 *s;
1325
1326             tl = sizeof(rx_stats) - ap->length;
1327             if (tl > 0)
1328               tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1329             if (tl > 0)
1330               return ap;
1331
1332             /* Since its all int32s convert to network order with a loop. */
1333             MUTEX_ENTER(&rx_stats_mutex);
1334             s = (afs_int32 *)&rx_stats;
1335             for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1336                 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1337
1338             tl = ap->length;
1339             ap->length = sizeof(rx_stats);
1340             MUTEX_EXIT(&rx_stats_mutex);
1341             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1342             ap->length = tl;
1343             break;
1344         }
1345
1346         default:
1347             /* error response packet */
1348             tin.type = htonl(RX_DEBUGI_BADTYPE);
1349             tin.index = tin.type;
1350             rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1351             tl = ap->length;
1352             ap->length = sizeof(struct rx_debugIn);
1353             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1354             ap->length = tl;
1355             break;
1356     }
1357     return ap;
1358 }
1359
1360 struct rx_packet *rxi_ReceiveVersionPacket(register struct rx_packet *ap,
1361         osi_socket asocket, afs_int32 ahost, short aport, int istack)
1362 {
1363     afs_int32 tl;
1364
1365     /*
1366      * Only respond to client-initiated version requests, and
1367      * clear that flag in the response.
1368      */
1369     if (ap->header.flags & RX_CLIENT_INITIATED) {
1370         char buf[66];
1371
1372         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1373         rxi_EncodePacketHeader(ap);
1374         memset(buf, 0, sizeof(buf));
1375         strncpy(buf, cml_version_number+4, sizeof(buf)-1);
1376         rx_packetwrite(ap, 0, 65, buf);
1377         tl = ap->length;
1378         ap->length = 65;
1379         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1380         ap->length = tl;
1381     }
1382
1383     return ap;
1384 }
1385
1386
1387 /* send a debug packet back to the sender */
1388 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1389                                afs_int32 ahost, short aport, afs_int32 istack)
1390 {
1391     struct sockaddr_in taddr;
1392     int i;
1393     int nbytes;
1394     int saven = 0;
1395     size_t savelen = 0;
1396 #ifdef KERNEL
1397     int waslocked = ISAFS_GLOCK();
1398 #endif
1399
1400     taddr.sin_family = AF_INET;
1401     taddr.sin_port = aport;
1402     taddr.sin_addr.s_addr = ahost;
1403 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1404     taddr.sin_len = sizeof(struct sockaddr_in);
1405 #endif
1406
1407     /* We need to trim the niovecs. */
1408     nbytes = apacket->length;
1409     for (i=1; i < apacket->niovecs; i++) {
1410       if (nbytes <= apacket->wirevec[i].iov_len) {
1411         savelen = apacket->wirevec[i].iov_len;
1412         saven = apacket->niovecs;
1413         apacket->wirevec[i].iov_len = nbytes;
1414         apacket->niovecs = i+1;   /* so condition fails because i == niovecs */
1415       }
1416       else nbytes -= apacket->wirevec[i].iov_len;
1417     }
1418     AFS_RXGUNLOCK();
1419 #ifdef KERNEL
1420     if (waslocked) AFS_GUNLOCK();
1421 #endif
1422     /* debug packets are not reliably delivered, hence the cast below. */
1423     (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1424                        apacket->length+RX_HEADER_SIZE, istack);
1425 #ifdef KERNEL
1426     if (waslocked) AFS_GLOCK();
1427 #endif
1428     AFS_RXGLOCK();
1429     if (saven) {  /* means we truncated the packet above. */
1430       apacket->wirevec[i-1].iov_len = savelen;
1431       apacket->niovecs = saven;
1432     }
1433
1434 }
1435
1436 /* Send the packet to appropriate destination for the specified
1437  * call.  The header is first encoded and placed in the packet.
1438  */
1439 void rxi_SendPacket(struct rx_call * call, struct rx_connection * conn,
1440                     struct rx_packet *p, int istack)
1441 {
1442 #if defined(KERNEL)
1443     int waslocked;
1444 #endif
1445     int code;
1446     struct sockaddr_in addr;
1447     register struct rx_peer *peer = conn->peer;
1448     osi_socket socket;
1449 #ifdef RXDEBUG
1450     char deliveryType = 'S';
1451 #endif
1452     /* The address we're sending the packet to */
1453     memset(&addr, 0, sizeof (addr));
1454     addr.sin_family = AF_INET;
1455     addr.sin_port = peer->port;
1456     addr.sin_addr.s_addr = peer->host;
1457
1458     /* This stuff should be revamped, I think, so that most, if not
1459      * all, of the header stuff is always added here.  We could
1460      * probably do away with the encode/decode routines. XXXXX */
1461
1462     /* Stamp each packet with a unique serial number.  The serial
1463      * number is maintained on a connection basis because some types
1464      * of security may be based on the serial number of the packet,
1465      * and security is handled on a per authenticated-connection
1466      * basis. */
1467     /* Pre-increment, to guarantee no zero serial number; a zero
1468      * serial number means the packet was never sent. */
1469     MUTEX_ENTER(&conn->conn_data_lock);
1470     p->header.serial = ++conn->serial;
1471     MUTEX_EXIT(&conn->conn_data_lock);
1472     /* This is so we can adjust retransmit time-outs better in the face of
1473      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1474      */
1475     if (p->firstSerial == 0) {
1476        p->firstSerial = p->header.serial;
1477      }
1478
1479 #ifdef RXDEBUG
1480     /* If an output tracer function is defined, call it with the packet and
1481      * network address.  Note this function may modify its arguments. */
1482     if (rx_almostSent) {
1483         int drop = (*rx_almostSent) (p, &addr);
1484         /* drop packet if return value is non-zero? */
1485         if (drop) deliveryType = 'D';   /* Drop the packet */
1486     }
1487 #endif
1488
1489     /* Get network byte order header */
1490     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1491                                  * touch ALL the fields */
1492
1493     /* Send the packet out on the same socket that related packets are being
1494      * received on */
1495     socket = (conn->type == RX_CLIENT_CONNECTION
1496               ? rx_socket : conn->service->socket);
1497
1498 #ifdef RXDEBUG
1499     /* Possibly drop this packet,  for testing purposes */
1500     if ((deliveryType == 'D') ||
1501         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1502          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1503         deliveryType = 'D';             /* Drop the packet */
1504     }
1505     else {
1506         deliveryType = 'S';             /* Send the packet */
1507 #endif /* RXDEBUG */
1508
1509         /* Loop until the packet is sent.  We'd prefer just to use a
1510          * blocking socket, but unfortunately the interface doesn't
1511          * allow us to have the socket block in send mode, and not
1512          * block in receive mode */
1513         AFS_RXGUNLOCK();
1514 #ifdef KERNEL
1515         waslocked = ISAFS_GLOCK();
1516         if (waslocked) AFS_GUNLOCK();
1517 #endif
1518         if ((code = osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1519                                 p->length+RX_HEADER_SIZE, istack)) != 0) {
1520           /* send failed, so let's hurry up the resend, eh? */
1521           MUTEX_ENTER(&rx_stats_mutex);
1522           rx_stats.netSendFailures++;
1523           MUTEX_EXIT(&rx_stats_mutex);
1524           p->retryTime = p->timeSent;  /* resend it very soon */
1525           clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1526
1527 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1528           /* Linux is nice -- it can tell us right away that we cannot
1529            * reach this recipient by returning an ENETUNREACH error
1530            * code.  So, when this happens let's "down" the host NOW so
1531            * we don't sit around waiting for this host to timeout later.
1532            */
1533           if (call && code == -ENETUNREACH)
1534             call->lastReceiveTime = 0;
1535 #endif
1536         }
1537 #ifdef KERNEL
1538         if (waslocked) AFS_GLOCK();
1539 #endif
1540         AFS_RXGLOCK();
1541 #ifdef RXDEBUG
1542     }
1543     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1544          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1545          peer->host, peer->port, p->header.serial, p->header.epoch,
1546          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1547          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1548 #endif
1549     MUTEX_ENTER(&rx_stats_mutex);
1550     rx_stats.packetsSent[p->header.type-1]++;
1551     MUTEX_EXIT(&rx_stats_mutex);
1552     MUTEX_ENTER(&peer->peer_lock);
1553     hadd32(peer->bytesSent, p->length);
1554     MUTEX_EXIT(&peer->peer_lock);
1555 }
1556
1557 /* Send a list of packets to appropriate destination for the specified
1558  * connection.  The headers are first encoded and placed in the packets.
1559  */
1560 void rxi_SendPacketList(struct rx_call * call, struct rx_connection * conn,
1561                         struct rx_packet **list, int len, int istack)
1562 {
1563 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1564     int waslocked;
1565 #endif
1566     struct sockaddr_in addr;
1567     register struct rx_peer *peer = conn->peer;
1568     osi_socket socket;
1569     struct rx_packet *p = NULL;
1570     struct iovec wirevec[RX_MAXIOVECS];
1571     int i, length, code;
1572     afs_uint32 serial;
1573     afs_uint32 temp;
1574     struct rx_jumboHeader *jp;
1575 #ifdef RXDEBUG
1576     char deliveryType = 'S';
1577 #endif
1578     /* The address we're sending the packet to */
1579     addr.sin_family = AF_INET;
1580     addr.sin_port = peer->port;
1581     addr.sin_addr.s_addr = peer->host;
1582
1583     if (len+1 > RX_MAXIOVECS) {
1584         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1585     }
1586
1587     /*
1588      * Stamp the packets in this jumbogram with consecutive serial numbers
1589      */
1590     MUTEX_ENTER(&conn->conn_data_lock);
1591     serial = conn->serial;
1592     conn->serial += len;
1593     MUTEX_EXIT(&conn->conn_data_lock);
1594
1595
1596     /* This stuff should be revamped, I think, so that most, if not
1597      * all, of the header stuff is always added here.  We could
1598      * probably do away with the encode/decode routines. XXXXX */
1599
1600     jp = NULL;
1601     length = RX_HEADER_SIZE;
1602     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1603     wirevec[0].iov_len = RX_HEADER_SIZE;
1604     for (i = 0 ; i < len ; i++) {
1605         p = list[i];
1606
1607         /* The whole 3.5 jumbogram scheme relies on packets fitting
1608          * in a single packet buffer. */
1609         if (p->niovecs > 2) {
1610             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1611         }
1612
1613         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1614          * in this chunk.  */
1615         if (i < len-1) {
1616             if (p->length != RX_JUMBOBUFFERSIZE) {
1617                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1618             }
1619             p->header.flags |= RX_JUMBO_PACKET;
1620             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1621             wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1622         } else {
1623             wirevec[i+1].iov_len = p->length;
1624             length += p->length;
1625         }
1626         wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1627         if (jp != NULL) {
1628             /* Convert jumbo packet header to network byte order */
1629             temp = (afs_uint32)(p->header.flags) << 24;
1630             temp |= (afs_uint32)(p->header.spare);
1631             *(afs_uint32 *)jp = htonl(temp);
1632         }
1633         jp = (struct rx_jumboHeader *)
1634              ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1635
1636         /* Stamp each packet with a unique serial number.  The serial
1637          * number is maintained on a connection basis because some types
1638          * of security may be based on the serial number of the packet,
1639          * and security is handled on a per authenticated-connection
1640          * basis. */
1641         /* Pre-increment, to guarantee no zero serial number; a zero
1642          * serial number means the packet was never sent. */
1643         p->header.serial = ++serial;
1644         /* This is so we can adjust retransmit time-outs better in the face of
1645          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1646          */
1647         if (p->firstSerial == 0) {
1648            p->firstSerial = p->header.serial;
1649         }
1650
1651 #ifdef RXDEBUG
1652         /* If an output tracer function is defined, call it with the packet and
1653          * network address.  Note this function may modify its arguments. */
1654         if (rx_almostSent) {
1655             int drop = (*rx_almostSent) (p, &addr);
1656             /* drop packet if return value is non-zero? */
1657             if (drop) deliveryType = 'D';       /* Drop the packet */
1658         }
1659 #endif
1660
1661         /* Get network byte order header */
1662         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1663                                      * touch ALL the fields */
1664     }
1665
1666     /* Send the packet out on the same socket that related packets are being
1667      * received on */
1668     socket = (conn->type == RX_CLIENT_CONNECTION
1669               ? rx_socket : conn->service->socket);
1670
1671 #ifdef RXDEBUG
1672     /* Possibly drop this packet,  for testing purposes */
1673     if ((deliveryType == 'D') ||
1674         ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1675          (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1676         deliveryType = 'D';             /* Drop the packet */
1677     }
1678     else {
1679         deliveryType = 'S';             /* Send the packet */
1680 #endif /* RXDEBUG */
1681
1682         /* Loop until the packet is sent.  We'd prefer just to use a
1683          * blocking socket, but unfortunately the interface doesn't
1684          * allow us to have the socket block in send mode, and not
1685          * block in receive mode */
1686         AFS_RXGUNLOCK();
1687 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1688         waslocked = ISAFS_GLOCK();
1689         if (!istack && waslocked) AFS_GUNLOCK();
1690 #endif
1691         if ((code = osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)) != 0){
1692           /* send failed, so let's hurry up the resend, eh? */
1693           MUTEX_ENTER(&rx_stats_mutex);
1694           rx_stats.netSendFailures++;
1695           MUTEX_EXIT(&rx_stats_mutex);
1696           for (i = 0 ; i < len ; i++) {
1697             p = list[i];
1698             p->retryTime = p->timeSent;  /* resend it very soon */
1699             clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1700           }
1701 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1702           /* Linux is nice -- it can tell us right away that we cannot
1703            * reach this recipient by returning an ENETUNREACH error
1704            * code.  So, when this happens let's "down" the host NOW so
1705            * we don't sit around waiting for this host to timeout later.
1706            */
1707           if (call && code == -ENETUNREACH)
1708             call->lastReceiveTime = 0;
1709 #endif
1710         }
1711 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1712         if (!istack && waslocked) AFS_GLOCK();
1713 #endif
1714         AFS_RXGLOCK();
1715 #ifdef RXDEBUG
1716     }
1717     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1718          deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1719          peer->host, peer->port, p->header.serial, p->header.epoch,
1720          p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1721          p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1722 #endif
1723     MUTEX_ENTER(&rx_stats_mutex);
1724     rx_stats.packetsSent[p->header.type-1]++;
1725     MUTEX_EXIT(&rx_stats_mutex);
1726     MUTEX_ENTER(&peer->peer_lock);
1727     hadd32(peer->bytesSent, p->length);
1728     MUTEX_EXIT(&peer->peer_lock);
1729 }
1730
1731
1732 /* Send a "special" packet to the peer connection.  If call is
1733  * specified, then the packet is directed to a specific call channel
1734  * associated with the connection, otherwise it is directed to the
1735  * connection only. Uses optionalPacket if it is supplied, rather than
1736  * allocating a new packet buffer.  Nbytes is the length of the data
1737  * portion of the packet.  If data is non-null, nbytes of data are
1738  * copied into the packet.  Type is the type of the packet, as defined
1739  * in rx.h.  Bug: there's a lot of duplication between this and other
1740  * routines.  This needs to be cleaned up. */
1741 struct rx_packet *rxi_SendSpecial(register struct rx_call *call,
1742         register struct rx_connection *conn, struct rx_packet *optionalPacket,
1743         int type, char *data, int nbytes, int istack)
1744 {
1745     /* Some of the following stuff should be common code for all
1746      * packet sends (it's repeated elsewhere) */
1747     register struct rx_packet *p;
1748     unsigned int i = 0;
1749     int savelen = 0, saven = 0;
1750     int channel, callNumber;
1751     if (call) {
1752         channel = call->channel;
1753         callNumber = *call->callNumber;
1754         /* BUSY packets refer to the next call on this connection */
1755         if (type == RX_PACKET_TYPE_BUSY) {
1756             callNumber++;
1757         }
1758     } else {
1759         channel = 0;
1760         callNumber = 0;
1761     }
1762     p = optionalPacket;
1763     if (!p) {
1764         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1765         if (!p) osi_Panic("rxi_SendSpecial failure");
1766     }
1767
1768     if (nbytes != -1)
1769       p->length = nbytes;
1770     else
1771       nbytes = p->length;
1772     p->header.serviceId = conn->serviceId;
1773     p->header.securityIndex = conn->securityIndex;
1774     p->header.cid = (conn->cid | channel);
1775     p->header.callNumber = callNumber;
1776     p->header.seq = 0;
1777     p->header.epoch = conn->epoch;
1778     p->header.type = type;
1779     p->header.flags = 0;
1780     if (conn->type == RX_CLIENT_CONNECTION)
1781        p->header.flags |= RX_CLIENT_INITIATED;
1782     if (data)
1783       rx_packetwrite(p, 0, nbytes, data);
1784
1785     for (i=1; i < p->niovecs; i++) {
1786       if (nbytes <= p->wirevec[i].iov_len) {
1787         savelen = p->wirevec[i].iov_len;
1788         saven = p->niovecs;
1789         p->wirevec[i].iov_len = nbytes;
1790         p->niovecs = i+1;   /* so condition fails because i == niovecs */
1791       }
1792       else nbytes -= p->wirevec[i].iov_len;
1793     }
1794
1795     if (call) rxi_Send(call, p, istack);
1796     else rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1797     if (saven) {  /* means we truncated the packet above.  We probably don't  */
1798       /* really need to do this, but it seems safer this way, given that  */
1799       /* sneaky optionalPacket... */
1800       p->wirevec[i-1].iov_len = savelen;
1801       p->niovecs = saven;
1802     }
1803     if (!optionalPacket) rxi_FreePacket(p);
1804     return optionalPacket;
1805 }
1806
1807
1808 /* Encode the packet's header (from the struct header in the packet to
1809  * the net byte order representation in the wire representation of the
1810  * packet, which is what is actually sent out on the wire) */
1811 void rxi_EncodePacketHeader(register struct rx_packet *p)
1812 {
1813     register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base);      /* MTUXXX */
1814
1815     memset((char *)buf, 0, RX_HEADER_SIZE);
1816     *buf++ = htonl(p->header.epoch);
1817     *buf++ = htonl(p->header.cid);
1818     *buf++ = htonl(p->header.callNumber);
1819     *buf++ = htonl(p->header.seq);
1820     *buf++ = htonl(p->header.serial);
1821     *buf++ = htonl(  (((afs_uint32)p->header.type)<<24)
1822                    | (((afs_uint32)p->header.flags)<<16)
1823                    | (p->header.userStatus<<8) | p->header.securityIndex);
1824     /* Note: top 16 bits of this next word were reserved */
1825     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1826 }
1827
1828 /* Decode the packet's header (from net byte order to a struct header) */
1829 void rxi_DecodePacketHeader(register struct rx_packet *p)
1830 {
1831     register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base);      /* MTUXXX */
1832     afs_uint32 temp;
1833
1834     p->header.epoch = ntohl(*buf);
1835     buf++;
1836     p->header.cid = ntohl(*buf);
1837     buf++;
1838     p->header.callNumber = ntohl(*buf);
1839     buf++;
1840     p->header.seq = ntohl(*buf);
1841     buf++;
1842     p->header.serial = ntohl(*buf);
1843     buf++;
1844
1845     temp = ntohl(*buf);
1846     buf++;
1847
1848     /* C will truncate byte fields to bytes for me */
1849     p->header.type = temp>>24;
1850     p->header.flags = temp>>16;
1851     p->header.userStatus = temp>>8;
1852     p->header.securityIndex = temp>>0;
1853
1854     temp = ntohl(*buf);
1855     buf++;
1856
1857     p->header.serviceId = (temp&0xffff);
1858     p->header.spare = temp>>16;
1859     /* Note: top 16 bits of this last word are the security checksum */
1860 }
1861
1862 void rxi_PrepareSendPacket(register struct rx_call *call, register struct rx_packet *p,
1863         register int last)
1864 {
1865     register struct rx_connection *conn = call->conn;
1866     int i, j;
1867     ssize_t len;        /* len must be a signed type; it can go negative */
1868
1869     p->flags &= ~RX_PKTFLAG_ACKED;
1870     p->header.cid = (conn->cid | call->channel);
1871     p->header.serviceId = conn->serviceId;
1872     p->header.securityIndex = conn->securityIndex;
1873     p->header.callNumber = *call->callNumber;
1874     p->header.seq = call->tnext++;
1875     p->header.epoch = conn->epoch;
1876     p->header.type = RX_PACKET_TYPE_DATA;
1877     p->header.flags = 0;
1878     p->header.spare = 0;
1879     if (conn->type == RX_CLIENT_CONNECTION)
1880       p->header.flags |= RX_CLIENT_INITIATED;
1881
1882     if (last)
1883       p->header.flags |= RX_LAST_PACKET;
1884
1885     clock_Zero(&p->retryTime); /* Never yet transmitted */
1886     clock_Zero(&p->firstSent); /* Never yet transmitted */
1887     p->header.serial = 0;      /* Another way of saying never transmitted... */
1888     p->backoff = 0;
1889
1890     /* Now that we're sure this is the last data on the call, make sure
1891      * that the "length" and the sum of the iov_lens matches. */
1892     len = p->length + call->conn->securityHeaderSize;
1893
1894     for (i=1; i < p->niovecs && len > 0; i++) {
1895       len -=  p->wirevec[i].iov_len;
1896     }
1897     if (len > 0) {
1898       osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1899     }
1900     else {
1901       /* Free any extra elements in the wirevec */
1902       for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1903         rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1904       }
1905       p->niovecs = i;
1906       p->wirevec[i-1].iov_len += len;
1907     }
1908     RXS_PreparePacket(conn->securityObject, call, p);
1909 }
1910
1911 /* Given an interface MTU size, calculate an adjusted MTU size that
1912  * will make efficient use of the RX buffers when the peer is sending
1913  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
1914 int rxi_AdjustIfMTU(int mtu)
1915 {
1916     int adjMTU;
1917     int frags;
1918
1919     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1920     if (mtu <= adjMTU) {
1921         return mtu;
1922     }
1923     mtu -= adjMTU;
1924     if (mtu <= 0) {
1925         return adjMTU;
1926     }
1927     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1928     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1929 }
1930
1931 /* Given an interface MTU size, and the peer's advertised max receive
1932  * size, calculate an adjisted maxMTU size that makes efficient use
1933  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1934 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1935 {
1936     int maxMTU = mtu * rxi_nSendFrags;
1937     maxMTU = MIN(maxMTU, peerMaxMTU);
1938     return rxi_AdjustIfMTU(maxMTU);
1939 }
1940
1941 /* Given a packet size, figure out how many datagram packet will fit.
1942  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1943  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1944  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1945 int rxi_AdjustDgramPackets(int frags, int mtu)
1946 {
1947     int maxMTU;
1948     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1949         return 1;
1950     }
1951     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1952     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1953     /* subtract the size of the first and last packets */
1954     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1955     if (maxMTU < 0) {
1956         return 1;
1957     }
1958     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1959 }