src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_xmit_nt.h"
  75 #include <stdlib.h>
  76 #else
  77 #include <sys/socket.h>
  78 #include <netinet/in.h>
  79 #endif
  80 #include "rx_clock.h"
  81 #include "rx.h"
  82 #include "rx_queue.h"
  83 #ifdef  AFS_SUN5_ENV
  84 #include <sys/sysmacros.h>
  85 #endif
  86 #include "rx_packet.h"
  87 #include "rx_globals.h"
  88 #include <lwp.h>
  89 #include <assert.h>
  90 #ifdef HAVE_STRING_H
  91 #include <string.h>
  92 #else
  93 #ifdef HAVE_STRINGS_H
  94 #include <strings.h>
  95 #endif
  96 #endif
  97 #ifdef HAVE_UNISTD_H
  98 #include <unistd.h>
  99 #endif
 100 #endif /* KERNEL */
 101
 102 #ifdef RX_LOCKS_DB
 103 /* rxdb_fileID is used to identify the lock location, along with line#. */
 104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 105 #endif /* RX_LOCKS_DB */
 106 struct rx_packet *rx_mallocedP = 0;
 107
 108 extern char cml_version_number[];
 109 extern int (*rx_almostSent) ();
 110
 111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 112                                 afs_int32 ahost, short aport,
 113                                 afs_int32 istack);
 114
 115 /* some rules about packets:
 116  * 1.  When a packet is allocated, the final iov_buf contains room for
 117  * a security trailer, but iov_len masks that fact.  If the security
 118  * package wants to add the trailer, it may do so, and then extend
 119  * iov_len appropriately.  For this reason, packet's niovecs and
 120  * iov_len fields should be accurate before calling PreparePacket.
 121 */
 122
 123 /* Preconditions:
 124  *        all packet buffers (iov_base) are integral multiples of
 125  *        the word size.
 126  *        offset is an integral multiple of the word size.
 127  */
 128 afs_int32
 129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 130 {
 131     unsigned int i;
 132     size_t l;
 133     for (l = 0, i = 1; i < packet->niovecs; i++) {
 134         if (l + packet->wirevec[i].iov_len > offset) {
 135             return
 136                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 137                                  (offset - l)));
 138         }
 139         l += packet->wirevec[i].iov_len;
 140     }
 141
 142     return 0;
 143 }
 144
 145 /* Preconditions:
 146  *        all packet buffers (iov_base) are integral multiples of the word size.
 147  *        offset is an integral multiple of the word size.
 148  */
 149 afs_int32
 150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 151 {
 152     unsigned int i;
 153     size_t l;
 154     for (l = 0, i = 1; i < packet->niovecs; i++) {
 155         if (l + packet->wirevec[i].iov_len > offset) {
 156             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 157                              (offset - l))) = data;
 158             return 0;
 159         }
 160         l += packet->wirevec[i].iov_len;
 161     }
 162
 163     return 0;
 164 }
 165
 166 /* Preconditions:
 167  *        all packet buffers (iov_base) are integral multiples of the
 168  *        word size.
 169  *        offset is an integral multiple of the word size.
 170  * Packet Invariants:
 171  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 172  */
 173 afs_int32
 174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 175                   char *out)
 176 {
 177     unsigned int i, j, l, r;
 178     for (l = 0, i = 1; i < packet->niovecs; i++) {
 179         if (l + packet->wirevec[i].iov_len > offset) {
 180             break;
 181         }
 182         l += packet->wirevec[i].iov_len;
 183     }
 184
 185     /* i is the iovec which contains the first little bit of data in which we
 186      * are interested.  l is the total length of everything prior to this iovec.
 187      * j is the number of bytes we can safely copy out of this iovec.
 188      * offset only applies to the first iovec.
 189      */
 190     r = resid;
 191     while ((resid > 0) && (i < packet->niovecs)) {
 192         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 193         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 194         resid -= j;
 195         out += j;
 196         l += packet->wirevec[i].iov_len;
 197         offset = l;
 198         i++;
 199     }
 200
 201     return (resid ? (r - resid) : r);
 202 }
 203
 204
 205 /* Preconditions:
 206  *        all packet buffers (iov_base) are integral multiples of the
 207  *        word size.
 208  *        offset is an integral multiple of the word size.
 209  */
 210 afs_int32
 211 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 212 {
 213     int i, j, l, r;
 214     char *b;
 215
 216     for (l = 0, i = 1; i < packet->niovecs; i++) {
 217         if (l + packet->wirevec[i].iov_len > offset) {
 218             break;
 219         }
 220         l += packet->wirevec[i].iov_len;
 221     }
 222
 223     /* i is the iovec which contains the first little bit of data in which we
 224      * are interested.  l is the total length of everything prior to this iovec.
 225      * j is the number of bytes we can safely copy out of this iovec.
 226      * offset only applies to the first iovec.
 227      */
 228     r = resid;
 229     while ((resid > 0) && (i < RX_MAXWVECS)) {
 230         if (i >= packet->niovecs)
 231             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 232                 break;
 233
 234         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 235         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 236         memcpy(b, in, j);
 237         resid -= j;
 238         in += j;
 239         l += packet->wirevec[i].iov_len;
 240         offset = l;
 241         i++;
 242     }
 243
 244     return (resid ? (r - resid) : r);
 245 }
 246
 247 static struct rx_packet *
 248 allocCBuf(int class)
 249 {
 250     struct rx_packet *c;
 251     SPLVAR;
 252
 253     NETPRI;
 254     MUTEX_ENTER(&rx_freePktQ_lock);
 255
 256 #ifdef KERNEL
 257     if (rxi_OverQuota(class)) {
 258         c = NULL;
 259         rxi_NeedMorePackets = TRUE;
 260         MUTEX_ENTER(&rx_stats_mutex);
 261         switch (class) {
 262         case RX_PACKET_CLASS_RECEIVE:
 263             rx_stats.receivePktAllocFailures++;
 264             break;
 265         case RX_PACKET_CLASS_SEND:
 266             rx_stats.sendPktAllocFailures++;
 267             break;
 268         case RX_PACKET_CLASS_SPECIAL:
 269             rx_stats.specialPktAllocFailures++;
 270             break;
 271         case RX_PACKET_CLASS_RECV_CBUF:
 272             rx_stats.receiveCbufPktAllocFailures++;
 273             break;
 274         case RX_PACKET_CLASS_SEND_CBUF:
 275             rx_stats.sendCbufPktAllocFailures++;
 276             break;
 277         }
 278         MUTEX_EXIT(&rx_stats_mutex);
 279         goto done;
 280     }
 281
 282     if (queue_IsEmpty(&rx_freePacketQueue)) {
 283         c = NULL;
 284         rxi_NeedMorePackets = TRUE;
 285         goto done;
 286     }
 287 #else /* KERNEL */
 288     if (queue_IsEmpty(&rx_freePacketQueue)) {
 289         rxi_MorePacketsNoLock(rx_initSendWindow);
 290     }
 291 #endif /* KERNEL */
 292
 293     rx_nFreePackets--;
 294     c = queue_First(&rx_freePacketQueue, rx_packet);
 295     queue_Remove(c);
 296     if (!(c->flags & RX_PKTFLAG_FREE))
 297         osi_Panic("rxi_AllocPacket: packet not free\n");
 298     c->flags = 0;               /* clear RX_PKTFLAG_FREE, initialize the rest */
 299     c->header.flags = 0;
 300
 301 #ifdef KERNEL
 302   done:
 303 #endif
 304     MUTEX_EXIT(&rx_freePktQ_lock);
 305
 306     USERPRI;
 307     return c;
 308 }
 309
 310 /*
 311  * Free a packet currently used as a continuation buffer
 312  */
 313 void
 314 rxi_freeCBuf(struct rx_packet *c)
 315 {
 316     SPLVAR;
 317
 318     NETPRI;
 319     MUTEX_ENTER(&rx_freePktQ_lock);
 320
 321     rxi_FreePacketNoLock(c);
 322     /* Wakeup anyone waiting for packets */
 323     rxi_PacketsUnWait();
 324
 325     MUTEX_EXIT(&rx_freePktQ_lock);
 326     USERPRI;
 327 }
 328
 329 /* this one is kind of awful.
 330  * In rxkad, the packet has been all shortened, and everything, ready for
 331  * sending.  All of a sudden, we discover we need some of that space back.
 332  * This isn't terribly general, because it knows that the packets are only
 333  * rounded up to the EBS (userdata + security header).
 334  */
 335 int
 336 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 337 {
 338     int i;
 339     i = p->niovecs - 1;
 340     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 341         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 342             p->wirevec[i].iov_len += nb;
 343             return 0;
 344         }
 345     } else {
 346         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 347             p->wirevec[i].iov_len += nb;
 348             return 0;
 349         }
 350     }
 351
 352     return 0;
 353 }
 354
 355 /* get sufficient space to store nb bytes of data (or more), and hook
 356  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 357  * returns the number of bytes >0 which it failed to come up with.
 358  * Don't need to worry about locking on packet, since only
 359  * one thread can manipulate one at a time. Locking on continution
 360  * packets is handled by allocCBuf */
 361 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 362 int
 363 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 364 {
 365     int i;
 366
 367     for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
 368         register struct rx_packet *cb;
 369         if ((cb = allocCBuf(class))) {
 370             p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 371             p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 372             nb -= RX_CBUFFERSIZE;
 373             p->length += RX_CBUFFERSIZE;
 374             p->niovecs++;
 375         } else
 376             break;
 377     }
 378
 379     return nb;
 380 }
 381
 382 /* Add more packet buffers */
 383 void
 384 rxi_MorePackets(int apackets)
 385 {
 386     struct rx_packet *p, *e;
 387     int getme;
 388     SPLVAR;
 389
 390     getme = apackets * sizeof(struct rx_packet);
 391     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 392
 393     PIN(p, getme);              /* XXXXX */
 394     memset((char *)p, 0, getme);
 395     NETPRI;
 396     MUTEX_ENTER(&rx_freePktQ_lock);
 397
 398     for (e = p + apackets; p < e; p++) {
 399         p->wirevec[0].iov_base = (char *)(p->wirehead);
 400         p->wirevec[0].iov_len = RX_HEADER_SIZE;
 401         p->wirevec[1].iov_base = (char *)(p->localdata);
 402         p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 403         p->flags |= RX_PKTFLAG_FREE;
 404         p->niovecs = 2;
 405
 406         queue_Append(&rx_freePacketQueue, p);
 407     }
 408     rx_nFreePackets += apackets;
 409     rxi_NeedMorePackets = FALSE;
 410     rxi_PacketsUnWait();
 411
 412     MUTEX_EXIT(&rx_freePktQ_lock);
 413     USERPRI;
 414 }
 415
 416 #ifndef KERNEL
 417 /* Add more packet buffers */
 418 void
 419 rxi_MorePacketsNoLock(int apackets)
 420 {
 421     struct rx_packet *p, *e;
 422     int getme;
 423
 424     /* allocate enough packets that 1/4 of the packets will be able
 425      * to hold maximal amounts of data */
 426     apackets += (apackets / 4)
 427         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 428     getme = apackets * sizeof(struct rx_packet);
 429     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 430
 431     memset((char *)p, 0, getme);
 432
 433     for (e = p + apackets; p < e; p++) {
 434         p->wirevec[0].iov_base = (char *)(p->wirehead);
 435         p->wirevec[0].iov_len = RX_HEADER_SIZE;
 436         p->wirevec[1].iov_base = (char *)(p->localdata);
 437         p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 438         p->flags |= RX_PKTFLAG_FREE;
 439         p->niovecs = 2;
 440
 441         queue_Append(&rx_freePacketQueue, p);
 442     }
 443     rx_nFreePackets += apackets;
 444     rxi_NeedMorePackets = FALSE;
 445     rxi_PacketsUnWait();
 446 }
 447 #endif /* !KERNEL */
 448
 449 void
 450 rxi_FreeAllPackets(void)
 451 {
 452     /* must be called at proper interrupt level, etcetera */
 453     /* MTUXXX need to free all Packets */
 454     osi_Free(rx_mallocedP,
 455              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 456     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 457 }
 458
 459 /* Allocate more packets iff we need more continuation buffers */
 460 /* In kernel, can't page in memory with interrupts disabled, so we
 461  * don't use the event mechanism. */
 462 void
 463 rx_CheckPackets(void)
 464 {
 465     if (rxi_NeedMorePackets) {
 466         rxi_MorePackets(rx_initSendWindow);
 467     }
 468 }
 469
 470 /* In the packet freeing routine below, the assumption is that
 471    we want all of the packets to be used equally frequently, so that we
 472    don't get packet buffers paging out.  It would be just as valid to
 473    assume that we DO want them to page out if not many are being used.
 474    In any event, we assume the former, and append the packets to the end
 475    of the free list.  */
 476 /* This explanation is bogus.  The free list doesn't remain in any kind of
 477    useful order for afs_int32: the packets in use get pretty much randomly scattered
 478    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 479    must be stored so that packets which are adjacent in memory are adjacent in the
 480    free list.  An array springs rapidly to mind.
 481    */
 482
 483 /* Actually free the packet p. */
 484 void
 485 rxi_FreePacketNoLock(struct rx_packet *p)
 486 {
 487     dpf(("Free %lx\n", (unsigned long)p));
 488
 489     if (p->flags & RX_PKTFLAG_FREE)
 490         osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 491     rx_nFreePackets++;
 492     p->flags |= RX_PKTFLAG_FREE;
 493     queue_Append(&rx_freePacketQueue, p);
 494 }
 495
 496 int
 497 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 498 {
 499     struct iovec *iov, *end;
 500
 501     if (first != 1)             /* MTUXXX */
 502         osi_Panic("FreeDataBufs 1: first must be 1");
 503     iov = &p->wirevec[1];
 504     end = iov + (p->niovecs - 1);
 505     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 506         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 507     for (iov++; iov < end; iov++) {
 508         if (!iov->iov_base)
 509             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 510         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 511     }
 512     p->length = 0;
 513     p->niovecs = 0;
 514
 515     return 0;
 516 }
 517
 518 int rxi_nBadIovecs = 0;
 519
 520 /* rxi_RestoreDataBufs
 521  *
 522  * Restore the correct sizes to the iovecs. Called when reusing a packet
 523  * for reading off the wire.
 524  */
 525 void
 526 rxi_RestoreDataBufs(struct rx_packet *p)
 527 {
 528     int i;
 529     struct iovec *iov = &p->wirevec[2];
 530
 531     p->wirevec[0].iov_base = (char *)(p->wirehead);
 532     p->wirevec[0].iov_len = RX_HEADER_SIZE;
 533     p->wirevec[1].iov_base = (char *)(p->localdata);
 534     p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 535
 536     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 537         if (!iov->iov_base) {
 538             rxi_nBadIovecs++;
 539             p->niovecs = i;
 540             break;
 541         }
 542         iov->iov_len = RX_CBUFFERSIZE;
 543     }
 544 }
 545
 546 int
 547 rxi_TrimDataBufs(struct rx_packet *p, int first)
 548 {
 549     int length;
 550     struct iovec *iov, *end;
 551     SPLVAR;
 552
 553     if (first != 1)
 554         osi_Panic("TrimDataBufs 1: first must be 1");
 555
 556     /* Skip over continuation buffers containing message data */
 557     iov = &p->wirevec[2];
 558     end = iov + (p->niovecs - 2);
 559     length = p->length - p->wirevec[1].iov_len;
 560     for (; iov < end && length > 0; iov++) {
 561         if (!iov->iov_base)
 562             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 563         length -= iov->iov_len;
 564     }
 565
 566     /* iov now points to the first empty data buffer. */
 567     if (iov >= end)
 568         return 0;
 569
 570     NETPRI;
 571     MUTEX_ENTER(&rx_freePktQ_lock);
 572
 573     for (; iov < end; iov++) {
 574         if (!iov->iov_base)
 575             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 576         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 577         p->niovecs--;
 578     }
 579     rxi_PacketsUnWait();
 580
 581     MUTEX_EXIT(&rx_freePktQ_lock);
 582     USERPRI;
 583
 584     return 0;
 585 }
 586
 587 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 588  * remove it yourself first if you call this routine. */
 589 void
 590 rxi_FreePacket(struct rx_packet *p)
 591 {
 592     SPLVAR;
 593
 594     NETPRI;
 595     MUTEX_ENTER(&rx_freePktQ_lock);
 596
 597     rxi_FreeDataBufsNoLock(p, 1);
 598     rxi_FreePacketNoLock(p);
 599     /* Wakeup anyone waiting for packets */
 600     rxi_PacketsUnWait();
 601
 602     MUTEX_EXIT(&rx_freePktQ_lock);
 603     USERPRI;
 604 }
 605
 606
 607 /* rxi_AllocPacket sets up p->length so it reflects the number of
 608  * bytes in the packet at this point, **not including** the header.
 609  * The header is absolutely necessary, besides, this is the way the
 610  * length field is usually used */
 611 struct rx_packet *
 612 rxi_AllocPacketNoLock(int class)
 613 {
 614     register struct rx_packet *p;
 615
 616 #ifdef KERNEL
 617     if (rxi_OverQuota(class)) {
 618         rxi_NeedMorePackets = TRUE;
 619         MUTEX_ENTER(&rx_stats_mutex);
 620         switch (class) {
 621         case RX_PACKET_CLASS_RECEIVE:
 622             rx_stats.receivePktAllocFailures++;
 623             break;
 624         case RX_PACKET_CLASS_SEND:
 625             rx_stats.sendPktAllocFailures++;
 626             break;
 627         case RX_PACKET_CLASS_SPECIAL:
 628             rx_stats.specialPktAllocFailures++;
 629             break;
 630         case RX_PACKET_CLASS_RECV_CBUF:
 631             rx_stats.receiveCbufPktAllocFailures++;
 632             break;
 633         case RX_PACKET_CLASS_SEND_CBUF:
 634             rx_stats.sendCbufPktAllocFailures++;
 635             break;
 636         }
 637         MUTEX_EXIT(&rx_stats_mutex);
 638         return (struct rx_packet *)0;
 639     }
 640 #endif /* KERNEL */
 641
 642     MUTEX_ENTER(&rx_stats_mutex);
 643     rx_stats.packetRequests++;
 644     MUTEX_EXIT(&rx_stats_mutex);
 645
 646 #ifdef KERNEL
 647     if (queue_IsEmpty(&rx_freePacketQueue))
 648         osi_Panic("rxi_AllocPacket error");
 649 #else /* KERNEL */
 650     if (queue_IsEmpty(&rx_freePacketQueue))
 651         rxi_MorePacketsNoLock(rx_initSendWindow);
 652 #endif /* KERNEL */
 653
 654     rx_nFreePackets--;
 655     p = queue_First(&rx_freePacketQueue, rx_packet);
 656     if (!(p->flags & RX_PKTFLAG_FREE))
 657         osi_Panic("rxi_AllocPacket: packet not free\n");
 658
 659     dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
 660
 661     queue_Remove(p);
 662     p->flags = 0;               /* clear RX_PKTFLAG_FREE, initialize the rest */
 663     p->header.flags = 0;
 664
 665     /* have to do this here because rx_FlushWrite fiddles with the iovs in
 666      * order to truncate outbound packets.  In the near future, may need
 667      * to allocate bufs from a static pool here, and/or in AllocSendPacket
 668      */
 669     p->wirevec[0].iov_base = (char *)(p->wirehead);
 670     p->wirevec[0].iov_len = RX_HEADER_SIZE;
 671     p->wirevec[1].iov_base = (char *)(p->localdata);
 672     p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 673     p->niovecs = 2;
 674     p->length = RX_FIRSTBUFFERSIZE;
 675     return p;
 676 }
 677
 678 struct rx_packet *
 679 rxi_AllocPacket(int class)
 680 {
 681     register struct rx_packet *p;
 682
 683     MUTEX_ENTER(&rx_freePktQ_lock);
 684     p = rxi_AllocPacketNoLock(class);
 685     MUTEX_EXIT(&rx_freePktQ_lock);
 686     return p;
 687 }
 688
 689 /* This guy comes up with as many buffers as it {takes,can get} given
 690  * the MTU for this call. It also sets the packet length before
 691  * returning.  caution: this is often called at NETPRI
 692  * Called with call locked.
 693  */
 694 struct rx_packet *
 695 rxi_AllocSendPacket(register struct rx_call *call, int want)
 696 {
 697     register struct rx_packet *p = (struct rx_packet *)0;
 698     register int mud;
 699     register unsigned delta;
 700
 701     SPLVAR;
 702     mud = call->MTU - RX_HEADER_SIZE;
 703     delta =
 704         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 705         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 706
 707     while (!(call->error)) {
 708         MUTEX_ENTER(&rx_freePktQ_lock);
 709         /* if an error occurred, or we get the packet we want, we're done */
 710         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 711             MUTEX_EXIT(&rx_freePktQ_lock);
 712
 713             want += delta;
 714             want = MIN(want, mud);
 715
 716             if ((unsigned)want > p->length)
 717                 (void)rxi_AllocDataBuf(p, (want - p->length),
 718                                        RX_PACKET_CLASS_SEND_CBUF);
 719
 720             if ((unsigned)p->length > mud)
 721                 p->length = mud;
 722
 723             if (delta >= p->length) {
 724                 rxi_FreePacket(p);
 725                 p = NULL;
 726             } else {
 727                 p->length -= delta;
 728             }
 729             break;
 730         }
 731
 732         /* no error occurred, and we didn't get a packet, so we sleep.
 733          * At this point, we assume that packets will be returned
 734          * sooner or later, as packets are acknowledged, and so we
 735          * just wait.  */
 736         NETPRI;
 737         call->flags |= RX_CALL_WAIT_PACKETS;
 738         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 739         MUTEX_EXIT(&call->lock);
 740         rx_waitingForPackets = 1;
 741
 742 #ifdef  RX_ENABLE_LOCKS
 743         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 744 #else
 745         osi_rxSleep(&rx_waitingForPackets);
 746 #endif
 747         MUTEX_EXIT(&rx_freePktQ_lock);
 748         MUTEX_ENTER(&call->lock);
 749         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 750         call->flags &= ~RX_CALL_WAIT_PACKETS;
 751         USERPRI;
 752     }
 753
 754     return p;
 755 }
 756
 757 #ifndef KERNEL
 758
 759 /* count the number of used FDs */
 760 static int
 761 CountFDs(register int amax)
 762 {
 763     struct stat tstat;
 764     register int i, code;
 765     register int count;
 766
 767     count = 0;
 768     for (i = 0; i < amax; i++) {
 769         code = fstat(i, &tstat);
 770         if (code == 0)
 771             count++;
 772     }
 773     return count;
 774 }
 775
 776 #else /* KERNEL */
 777
 778 #define CountFDs(amax) amax
 779
 780 #endif /* KERNEL */
 781
 782 #if !defined(KERNEL) || defined(UKERNEL)
 783
 784 /* This function reads a single packet from the interface into the
 785  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 786  * (host,port) of the sender are stored in the supplied variables, and
 787  * the data length of the packet is stored in the packet structure.
 788  * The header is decoded. */
 789 int
 790 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
 791                u_short * port)
 792 {
 793     struct sockaddr_in from;
 794     int nbytes;
 795     afs_int32 rlen;
 796     register afs_int32 tlen, savelen;
 797     struct msghdr msg;
 798     rx_computelen(p, tlen);
 799     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
 800
 801     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
 802     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 803                                  * it once in order to avoid races.  */
 804     tlen = rlen - tlen;
 805     if (tlen > 0) {
 806         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 807         if (tlen > 0) {
 808             tlen = rlen - tlen;
 809         } else
 810             tlen = rlen;
 811     } else
 812         tlen = rlen;
 813
 814     /* Extend the last iovec for padding, it's just to make sure that the
 815      * read doesn't return more data than we expect, and is done to get around
 816      * our problems caused by the lack of a length field in the rx header.
 817      * Use the extra buffer that follows the localdata in each packet
 818      * structure. */
 819     savelen = p->wirevec[p->niovecs - 1].iov_len;
 820     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
 821
 822     memset((char *)&msg, 0, sizeof(msg));
 823     msg.msg_name = (char *)&from;
 824     msg.msg_namelen = sizeof(struct sockaddr_in);
 825     msg.msg_iov = p->wirevec;
 826     msg.msg_iovlen = p->niovecs;
 827     nbytes = rxi_Recvmsg(socket, &msg, 0);
 828
 829     /* restore the vec to its correct state */
 830     p->wirevec[p->niovecs - 1].iov_len = savelen;
 831
 832     p->length = (nbytes - RX_HEADER_SIZE);
 833     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
 834         if (nbytes > 0)
 835             rxi_MorePackets(rx_initSendWindow);
 836         else if (nbytes < 0 && errno == EWOULDBLOCK) {
 837             MUTEX_ENTER(&rx_stats_mutex);
 838             rx_stats.noPacketOnRead++;
 839             MUTEX_EXIT(&rx_stats_mutex);
 840         } else {
 841             MUTEX_ENTER(&rx_stats_mutex);
 842             rx_stats.bogusPacketOnRead++;
 843             rx_stats.bogusHost = from.sin_addr.s_addr;
 844             MUTEX_EXIT(&rx_stats_mutex);
 845             dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 846                  from.sin_port, nbytes));
 847         }
 848         return 0;
 849     } else {
 850         /* Extract packet header. */
 851         rxi_DecodePacketHeader(p);
 852
 853         *host = from.sin_addr.s_addr;
 854         *port = from.sin_port;
 855         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 856             struct rx_peer *peer;
 857             MUTEX_ENTER(&rx_stats_mutex);
 858             rx_stats.packetsRead[p->header.type - 1]++;
 859             MUTEX_EXIT(&rx_stats_mutex);
 860             /*
 861              * Try to look up this peer structure.  If it doesn't exist,
 862              * don't create a new one -
 863              * we don't keep count of the bytes sent/received if a peer
 864              * structure doesn't already exist.
 865              *
 866              * The peer/connection cleanup code assumes that there is 1 peer
 867              * per connection.  If we actually created a peer structure here
 868              * and this packet was an rxdebug packet, the peer structure would
 869              * never be cleaned up.
 870              */
 871             peer = rxi_FindPeer(*host, *port, 0, 0);
 872             /* Since this may not be associated with a connection,
 873              * it may have no refCount, meaning we could race with
 874              * ReapConnections
 875              */
 876             if (peer && (peer->refCount > 0)) {
 877                 MUTEX_ENTER(&peer->peer_lock);
 878                 hadd32(peer->bytesReceived, p->length);
 879                 MUTEX_EXIT(&peer->peer_lock);
 880             }
 881         }
 882
 883         /* Free any empty packet buffers at the end of this packet */
 884         rxi_TrimDataBufs(p, 1);
 885
 886         return 1;
 887     }
 888 }
 889
 890 #endif /* !KERNEL || UKERNEL */
 891
 892 /* This function splits off the first packet in a jumbo packet.
 893  * As of AFS 3.5, jumbograms contain more than one fixed size
 894  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 895  * last packet header. All packets (except the last) are padded to
 896  * fall on RX_CBUFFERSIZE boundaries.
 897  * HACK: We store the length of the first n-1 packets in the
 898  * last two pad bytes. */
 899
 900 struct rx_packet *
 901 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
 902                      int first)
 903 {
 904     struct rx_packet *np;
 905     struct rx_jumboHeader *jp;
 906     int niov, i;
 907     struct iovec *iov;
 908     int length;
 909     afs_uint32 temp;
 910
 911     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 912      * bytes in length. All but the first packet are preceded by
 913      * an abbreviated four byte header. The length of the last packet
 914      * is calculated from the size of the jumbogram. */
 915     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 916
 917     if ((int)p->length < length) {
 918         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 919         return NULL;
 920     }
 921     niov = p->niovecs - 2;
 922     if (niov < 1) {
 923         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 924         return NULL;
 925     }
 926     iov = &p->wirevec[2];
 927     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 928
 929     /* Get a pointer to the abbreviated packet header */
 930     jp = (struct rx_jumboHeader *)
 931         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 932
 933     /* Set up the iovecs for the next packet */
 934     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 935     np->wirevec[0].iov_len = sizeof(struct rx_header);
 936     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 937     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 938     np->niovecs = niov + 1;
 939     for (i = 2, iov++; i <= niov; i++, iov++) {
 940         np->wirevec[i] = *iov;
 941     }
 942     np->length = p->length - length;
 943     p->length = RX_JUMBOBUFFERSIZE;
 944     p->niovecs = 2;
 945
 946     /* Convert the jumbo packet header to host byte order */
 947     temp = ntohl(*(afs_uint32 *) jp);
 948     jp->flags = (u_char) (temp >> 24);
 949     jp->cksum = (u_short) (temp);
 950
 951     /* Fill in the packet header */
 952     np->header = p->header;
 953     np->header.serial = p->header.serial + 1;
 954     np->header.seq = p->header.seq + 1;
 955     np->header.flags = jp->flags;
 956     np->header.spare = jp->cksum;
 957
 958     return np;
 959 }
 960
 961 #ifndef KERNEL
 962 /* Send a udp datagram */
 963 int
 964 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
 965             int length, int istack)
 966 {
 967     struct msghdr msg;
 968
 969     memset(&msg, 0, sizeof(msg));
 970     msg.msg_iov = dvec;
 971     msg.msg_iovlen = nvecs;
 972     msg.msg_name = addr;
 973     msg.msg_namelen = sizeof(struct sockaddr_in);
 974
 975     rxi_Sendmsg(socket, &msg, 0);
 976
 977     return 0;
 978 }
 979 #elif !defined(UKERNEL)
 980 /*
 981  * message receipt is done in rxk_input or rx_put.
 982  */
 983
 984 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
 985 /*
 986  * Copy an mblock to the contiguous area pointed to by cp.
 987  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 988  * but it doesn't really.
 989  * Returns the number of bytes not transferred.
 990  * The message is NOT changed.
 991  */
 992 static int
 993 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
 994 {
 995     register int n;
 996
 997     for (; mp && len > 0; mp = mp->b_cont) {
 998         if (mp->b_datap->db_type != M_DATA) {
 999             return -1;
1000         }
1001         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1002         memcpy(cp, (char *)mp->b_rptr, n);
1003         cp += n;
1004         len -= n;
1005         mp->b_rptr += n;
1006     }
1007     return (len);
1008 }
1009
1010 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1011  * but it doesn't really.
1012  * This sucks, anyway, do it like m_cpy.... below
1013  */
1014 static int
1015 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1016            int niovs)
1017 {
1018     register int m, n, o, t, i;
1019
1020     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1021         if (mp->b_datap->db_type != M_DATA) {
1022             return -1;
1023         }
1024         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1025         len -= n;
1026         while (n) {
1027             if (!t) {
1028                 o = 0;
1029                 i++;
1030                 t = iovs[i].iov_len;
1031             }
1032             m = MIN(n, t);
1033             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1034             mp->b_rptr += m;
1035             o += m;
1036             t -= m;
1037             n -= m;
1038         }
1039     }
1040     return (len);
1041 }
1042
1043 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1044 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1045 #else
1046 #if !defined(AFS_LINUX20_ENV)
1047 static int
1048 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1049 {
1050     caddr_t p1, p2;
1051     unsigned int l1, l2, i, t;
1052
1053     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1054         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1055
1056     while (off && m)
1057         if (m->m_len <= off) {
1058             off -= m->m_len;
1059             m = m->m_next;
1060             continue;
1061         } else
1062             break;
1063
1064     if (m == NULL)
1065         return len;
1066
1067     p1 = mtod(m, caddr_t) + off;
1068     l1 = m->m_len - off;
1069     i = 0;
1070     p2 = iovs[0].iov_base;
1071     l2 = iovs[0].iov_len;
1072
1073     while (len) {
1074         t = MIN(l1, MIN(l2, (unsigned int)len));
1075         memcpy(p2, p1, t);
1076         p1 += t;
1077         p2 += t;
1078         l1 -= t;
1079         l2 -= t;
1080         len -= t;
1081         if (!l1) {
1082             m = m->m_next;
1083             if (!m)
1084                 break;
1085             p1 = mtod(m, caddr_t);
1086             l1 = m->m_len;
1087         }
1088         if (!l2) {
1089             if (++i >= niovs)
1090                 break;
1091             p2 = iovs[i].iov_base;
1092             l2 = iovs[i].iov_len;
1093         }
1094
1095     }
1096
1097     return len;
1098 }
1099 #endif /* LINUX */
1100 #endif /* AFS_SUN5_ENV */
1101
1102 #if !defined(AFS_LINUX20_ENV)
1103 int
1104 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1105 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1106      mblk_t *amb;
1107 #else
1108      struct mbuf *amb;
1109 #endif
1110      void (*free) ();
1111      struct rx_packet *phandle;
1112      int hdr_len, data_len;
1113 {
1114     register int code;
1115
1116     code =
1117         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1118                      phandle->niovecs);
1119     (*free) (amb);
1120
1121     return code;
1122 }
1123 #endif /* LINUX */
1124 #endif /*KERNEL && !UKERNEL */
1125
1126
1127 /* send a response to a debug packet */
1128
1129 struct rx_packet *
1130 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1131                        afs_int32 ahost, short aport, int istack)
1132 {
1133     struct rx_debugIn tin;
1134     afs_int32 tl;
1135     struct rx_serverQueueEntry *np, *nqe;
1136
1137     /*
1138      * Only respond to client-initiated Rx debug packets,
1139      * and clear the client flag in the response.
1140      */
1141     if (ap->header.flags & RX_CLIENT_INITIATED) {
1142         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1143         rxi_EncodePacketHeader(ap);
1144     } else {
1145         return ap;
1146     }
1147
1148     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1149     /* all done with packet, now set length to the truth, so we can
1150      * reuse this packet */
1151     rx_computelen(ap, ap->length);
1152
1153     tin.type = ntohl(tin.type);
1154     tin.index = ntohl(tin.index);
1155     switch (tin.type) {
1156     case RX_DEBUGI_GETSTATS:{
1157             struct rx_debugStats tstat;
1158
1159             /* get basic stats */
1160             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1161             tstat.version = RX_DEBUGI_VERSION;
1162 #ifndef RX_ENABLE_LOCKS
1163             tstat.waitingForPackets = rx_waitingForPackets;
1164 #endif
1165             MUTEX_ENTER(&rx_serverPool_lock);
1166             tstat.nFreePackets = htonl(rx_nFreePackets);
1167             tstat.callsExecuted = htonl(rxi_nCalls);
1168             tstat.packetReclaims = htonl(rx_packetReclaims);
1169             tstat.usedFDs = CountFDs(64);
1170             tstat.nWaiting = htonl(rx_nWaiting);
1171             tstat.nWaited = htonl(rx_nWaited);
1172             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1173                         tstat.idleThreads);
1174             MUTEX_EXIT(&rx_serverPool_lock);
1175             tstat.idleThreads = htonl(tstat.idleThreads);
1176             tl = sizeof(struct rx_debugStats) - ap->length;
1177             if (tl > 0)
1178                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1179
1180             if (tl <= 0) {
1181                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1182                                (char *)&tstat);
1183                 ap->length = sizeof(struct rx_debugStats);
1184                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1185                 rx_computelen(ap, ap->length);
1186             }
1187             break;
1188         }
1189
1190     case RX_DEBUGI_GETALLCONN:
1191     case RX_DEBUGI_GETCONN:{
1192             int i, j;
1193             register struct rx_connection *tc;
1194             struct rx_call *tcall;
1195             struct rx_debugConn tconn;
1196             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1197
1198
1199             tl = sizeof(struct rx_debugConn) - ap->length;
1200             if (tl > 0)
1201                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1202             if (tl > 0)
1203                 return ap;
1204
1205             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1206             /* get N'th (maybe) "interesting" connection info */
1207             for (i = 0; i < rx_hashTableSize; i++) {
1208 #if !defined(KERNEL)
1209                 /* the time complexity of the algorithm used here
1210                  * exponentially increses with the number of connections.
1211                  */
1212 #ifdef AFS_PTHREAD_ENV
1213                 pthread_yield();
1214 #else
1215                 (void)IOMGR_Poll();
1216 #endif
1217 #endif
1218                 MUTEX_ENTER(&rx_connHashTable_lock);
1219                 /* We might be slightly out of step since we are not
1220                  * locking each call, but this is only debugging output.
1221                  */
1222                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1223                     if ((all || rxi_IsConnInteresting(tc))
1224                         && tin.index-- <= 0) {
1225                         tconn.host = tc->peer->host;
1226                         tconn.port = tc->peer->port;
1227                         tconn.cid = htonl(tc->cid);
1228                         tconn.epoch = htonl(tc->epoch);
1229                         tconn.serial = htonl(tc->serial);
1230                         for (j = 0; j < RX_MAXCALLS; j++) {
1231                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1232                             if ((tcall = tc->call[j])) {
1233                                 tconn.callState[j] = tcall->state;
1234                                 tconn.callMode[j] = tcall->mode;
1235                                 tconn.callFlags[j] = tcall->flags;
1236                                 if (queue_IsNotEmpty(&tcall->rq))
1237                                     tconn.callOther[j] |= RX_OTHER_IN;
1238                                 if (queue_IsNotEmpty(&tcall->tq))
1239                                     tconn.callOther[j] |= RX_OTHER_OUT;
1240                             } else
1241                                 tconn.callState[j] = RX_STATE_NOTINIT;
1242                         }
1243
1244                         tconn.natMTU = htonl(tc->peer->natMTU);
1245                         tconn.error = htonl(tc->error);
1246                         tconn.flags = tc->flags;
1247                         tconn.type = tc->type;
1248                         tconn.securityIndex = tc->securityIndex;
1249                         if (tc->securityObject) {
1250                             RXS_GetStats(tc->securityObject, tc,
1251                                          &tconn.secStats);
1252 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1253 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1254                             DOHTONL(flags);
1255                             DOHTONL(expires);
1256                             DOHTONL(packetsReceived);
1257                             DOHTONL(packetsSent);
1258                             DOHTONL(bytesReceived);
1259                             DOHTONL(bytesSent);
1260                             for (i = 0;
1261                                  i <
1262                                  sizeof(tconn.secStats.spares) /
1263                                  sizeof(short); i++)
1264                                 DOHTONS(spares[i]);
1265                             for (i = 0;
1266                                  i <
1267                                  sizeof(tconn.secStats.sparel) /
1268                                  sizeof(afs_int32); i++)
1269                                 DOHTONL(sparel[i]);
1270                         }
1271
1272                         MUTEX_EXIT(&rx_connHashTable_lock);
1273                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1274                                        (char *)&tconn);
1275                         tl = ap->length;
1276                         ap->length = sizeof(struct rx_debugConn);
1277                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1278                                             istack);
1279                         ap->length = tl;
1280                         return ap;
1281                     }
1282                 }
1283                 MUTEX_EXIT(&rx_connHashTable_lock);
1284             }
1285             /* if we make it here, there are no interesting packets */
1286             tconn.cid = htonl(0xffffffff);      /* means end */
1287             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1288                            (char *)&tconn);
1289             tl = ap->length;
1290             ap->length = sizeof(struct rx_debugConn);
1291             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1292             ap->length = tl;
1293             break;
1294         }
1295
1296         /*
1297          * Pass back all the peer structures we have available
1298          */
1299
1300     case RX_DEBUGI_GETPEER:{
1301             int i;
1302             register struct rx_peer *tp;
1303             struct rx_debugPeer tpeer;
1304
1305
1306             tl = sizeof(struct rx_debugPeer) - ap->length;
1307             if (tl > 0)
1308                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1309             if (tl > 0)
1310                 return ap;
1311
1312             memset((char *)&tpeer, 0, sizeof(tpeer));
1313             for (i = 0; i < rx_hashTableSize; i++) {
1314 #if !defined(KERNEL)
1315                 /* the time complexity of the algorithm used here
1316                  * exponentially increses with the number of peers.
1317                  *
1318                  * Yielding after processing each hash table entry
1319                  * and dropping rx_peerHashTable_lock.
1320                  * also increases the risk that we will miss a new
1321                  * entry - but we are willing to live with this
1322                  * limitation since this is meant for debugging only
1323                  */
1324 #ifdef AFS_PTHREAD_ENV
1325                 pthread_yield();
1326 #else
1327                 (void)IOMGR_Poll();
1328 #endif
1329 #endif
1330                 MUTEX_ENTER(&rx_peerHashTable_lock);
1331                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1332                     if (tin.index-- <= 0) {
1333                         tpeer.host = tp->host;
1334                         tpeer.port = tp->port;
1335                         tpeer.ifMTU = htons(tp->ifMTU);
1336                         tpeer.idleWhen = htonl(tp->idleWhen);
1337                         tpeer.refCount = htons(tp->refCount);
1338                         tpeer.burstSize = tp->burstSize;
1339                         tpeer.burst = tp->burst;
1340                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1341                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1342                         tpeer.rtt = htonl(tp->rtt);
1343                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1344                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1345                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1346                         tpeer.nSent = htonl(tp->nSent);
1347                         tpeer.reSends = htonl(tp->reSends);
1348                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1349                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1350                         tpeer.rateFlag = htonl(tp->rateFlag);
1351                         tpeer.natMTU = htons(tp->natMTU);
1352                         tpeer.maxMTU = htons(tp->maxMTU);
1353                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1354                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1355                         tpeer.MTU = htons(tp->MTU);
1356                         tpeer.cwind = htons(tp->cwind);
1357                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1358                         tpeer.congestSeq = htons(tp->congestSeq);
1359                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1360                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1361                         tpeer.bytesReceived.high =
1362                             htonl(tp->bytesReceived.high);
1363                         tpeer.bytesReceived.low =
1364                             htonl(tp->bytesReceived.low);
1365
1366                         MUTEX_EXIT(&rx_peerHashTable_lock);
1367                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1368                                        (char *)&tpeer);
1369                         tl = ap->length;
1370                         ap->length = sizeof(struct rx_debugPeer);
1371                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1372                                             istack);
1373                         ap->length = tl;
1374                         return ap;
1375                     }
1376                 }
1377                 MUTEX_EXIT(&rx_peerHashTable_lock);
1378             }
1379             /* if we make it here, there are no interesting packets */
1380             tpeer.host = htonl(0xffffffff);     /* means end */
1381             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1382                            (char *)&tpeer);
1383             tl = ap->length;
1384             ap->length = sizeof(struct rx_debugPeer);
1385             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1386             ap->length = tl;
1387             break;
1388         }
1389
1390     case RX_DEBUGI_RXSTATS:{
1391             int i;
1392             afs_int32 *s;
1393
1394             tl = sizeof(rx_stats) - ap->length;
1395             if (tl > 0)
1396                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1397             if (tl > 0)
1398                 return ap;
1399
1400             /* Since its all int32s convert to network order with a loop. */
1401             MUTEX_ENTER(&rx_stats_mutex);
1402             s = (afs_int32 *) & rx_stats;
1403             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1404                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1405
1406             tl = ap->length;
1407             ap->length = sizeof(rx_stats);
1408             MUTEX_EXIT(&rx_stats_mutex);
1409             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1410             ap->length = tl;
1411             break;
1412         }
1413
1414     default:
1415         /* error response packet */
1416         tin.type = htonl(RX_DEBUGI_BADTYPE);
1417         tin.index = tin.type;
1418         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1419         tl = ap->length;
1420         ap->length = sizeof(struct rx_debugIn);
1421         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1422         ap->length = tl;
1423         break;
1424     }
1425     return ap;
1426 }
1427
1428 struct rx_packet *
1429 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1430                          afs_int32 ahost, short aport, int istack)
1431 {
1432     afs_int32 tl;
1433
1434     /*
1435      * Only respond to client-initiated version requests, and
1436      * clear that flag in the response.
1437      */
1438     if (ap->header.flags & RX_CLIENT_INITIATED) {
1439         char buf[66];
1440
1441         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1442         rxi_EncodePacketHeader(ap);
1443         memset(buf, 0, sizeof(buf));
1444         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1445         rx_packetwrite(ap, 0, 65, buf);
1446         tl = ap->length;
1447         ap->length = 65;
1448         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1449         ap->length = tl;
1450     }
1451
1452     return ap;
1453 }
1454
1455
1456 /* send a debug packet back to the sender */
1457 static void
1458 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1459                     afs_int32 ahost, short aport, afs_int32 istack)
1460 {
1461     struct sockaddr_in taddr;
1462     int i;
1463     int nbytes;
1464     int saven = 0;
1465     size_t savelen = 0;
1466 #ifdef KERNEL
1467     int waslocked = ISAFS_GLOCK();
1468 #endif
1469
1470     taddr.sin_family = AF_INET;
1471     taddr.sin_port = aport;
1472     taddr.sin_addr.s_addr = ahost;
1473 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1474     taddr.sin_len = sizeof(struct sockaddr_in);
1475 #endif
1476
1477     /* We need to trim the niovecs. */
1478     nbytes = apacket->length;
1479     for (i = 1; i < apacket->niovecs; i++) {
1480         if (nbytes <= apacket->wirevec[i].iov_len) {
1481             savelen = apacket->wirevec[i].iov_len;
1482             saven = apacket->niovecs;
1483             apacket->wirevec[i].iov_len = nbytes;
1484             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
1485         } else
1486             nbytes -= apacket->wirevec[i].iov_len;
1487     }
1488 #ifdef KERNEL
1489 #ifdef RX_KERNEL_TRACE
1490     if (ICL_SETACTIVE(afs_iclSetp)) {
1491         if (!waslocked)
1492             AFS_GLOCK();
1493         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1494                    "before osi_NetSend()");
1495         AFS_GUNLOCK();
1496     } else
1497 #else
1498     if (waslocked)
1499         AFS_GUNLOCK();
1500 #endif
1501 #endif
1502     /* debug packets are not reliably delivered, hence the cast below. */
1503     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1504                       apacket->length + RX_HEADER_SIZE, istack);
1505 #ifdef KERNEL
1506 #ifdef RX_KERNEL_TRACE
1507     if (ICL_SETACTIVE(afs_iclSetp)) {
1508         AFS_GLOCK();
1509         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1510                    "after osi_NetSend()");
1511         if (!waslocked)
1512             AFS_GUNLOCK();
1513     } else
1514 #else
1515     if (waslocked)
1516         AFS_GLOCK();
1517 #endif
1518 #endif
1519     if (saven) {                /* means we truncated the packet above. */
1520         apacket->wirevec[i - 1].iov_len = savelen;
1521         apacket->niovecs = saven;
1522     }
1523
1524 }
1525
1526 /* Send the packet to appropriate destination for the specified
1527  * call.  The header is first encoded and placed in the packet.
1528  */
1529 void
1530 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1531                struct rx_packet *p, int istack)
1532 {
1533 #if defined(KERNEL)
1534     int waslocked;
1535 #endif
1536     int code;
1537     struct sockaddr_in addr;
1538     register struct rx_peer *peer = conn->peer;
1539     osi_socket socket;
1540 #ifdef RXDEBUG
1541     char deliveryType = 'S';
1542 #endif
1543     /* The address we're sending the packet to */
1544     memset(&addr, 0, sizeof(addr));
1545     addr.sin_family = AF_INET;
1546     addr.sin_port = peer->port;
1547     addr.sin_addr.s_addr = peer->host;
1548
1549     /* This stuff should be revamped, I think, so that most, if not
1550      * all, of the header stuff is always added here.  We could
1551      * probably do away with the encode/decode routines. XXXXX */
1552
1553     /* Stamp each packet with a unique serial number.  The serial
1554      * number is maintained on a connection basis because some types
1555      * of security may be based on the serial number of the packet,
1556      * and security is handled on a per authenticated-connection
1557      * basis. */
1558     /* Pre-increment, to guarantee no zero serial number; a zero
1559      * serial number means the packet was never sent. */
1560     MUTEX_ENTER(&conn->conn_data_lock);
1561     p->header.serial = ++conn->serial;
1562     MUTEX_EXIT(&conn->conn_data_lock);
1563     /* This is so we can adjust retransmit time-outs better in the face of
1564      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1565      */
1566     if (p->firstSerial == 0) {
1567         p->firstSerial = p->header.serial;
1568     }
1569 #ifdef RXDEBUG
1570     /* If an output tracer function is defined, call it with the packet and
1571      * network address.  Note this function may modify its arguments. */
1572     if (rx_almostSent) {
1573         int drop = (*rx_almostSent) (p, &addr);
1574         /* drop packet if return value is non-zero? */
1575         if (drop)
1576             deliveryType = 'D'; /* Drop the packet */
1577     }
1578 #endif
1579
1580     /* Get network byte order header */
1581     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1582                                  * touch ALL the fields */
1583
1584     /* Send the packet out on the same socket that related packets are being
1585      * received on */
1586     socket =
1587         (conn->type ==
1588          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1589
1590 #ifdef RXDEBUG
1591     /* Possibly drop this packet,  for testing purposes */
1592     if ((deliveryType == 'D')
1593         || ((rx_intentionallyDroppedPacketsPer100 > 0)
1594             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1595         deliveryType = 'D';     /* Drop the packet */
1596     } else {
1597         deliveryType = 'S';     /* Send the packet */
1598 #endif /* RXDEBUG */
1599
1600         /* Loop until the packet is sent.  We'd prefer just to use a
1601          * blocking socket, but unfortunately the interface doesn't
1602          * allow us to have the socket block in send mode, and not
1603          * block in receive mode */
1604 #ifdef KERNEL
1605         waslocked = ISAFS_GLOCK();
1606 #ifdef RX_KERNEL_TRACE
1607         if (ICL_SETACTIVE(afs_iclSetp)) {
1608             if (!waslocked)
1609                 AFS_GLOCK();
1610             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1611                        "before osi_NetSend()");
1612             AFS_GUNLOCK();
1613         } else
1614 #else
1615         if (waslocked)
1616             AFS_GUNLOCK();
1617 #endif
1618 #endif
1619         if ((code =
1620              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1621                          p->length + RX_HEADER_SIZE, istack)) != 0) {
1622             /* send failed, so let's hurry up the resend, eh? */
1623             MUTEX_ENTER(&rx_stats_mutex);
1624             rx_stats.netSendFailures++;
1625             MUTEX_EXIT(&rx_stats_mutex);
1626             p->retryTime = p->timeSent; /* resend it very soon */
1627             clock_Addmsec(&(p->retryTime),
1628                           10 + (((afs_uint32) p->backoff) << 8));
1629
1630 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1631             /* Linux is nice -- it can tell us right away that we cannot
1632              * reach this recipient by returning an ENETUNREACH error
1633              * code.  So, when this happens let's "down" the host NOW so
1634              * we don't sit around waiting for this host to timeout later.
1635              */
1636             if (call && code == -ENETUNREACH)
1637                 call->lastReceiveTime = 0;
1638 #endif
1639         }
1640 #ifdef KERNEL
1641 #ifdef RX_KERNEL_TRACE
1642         if (ICL_SETACTIVE(afs_iclSetp)) {
1643             AFS_GLOCK();
1644             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1645                        "after osi_NetSend()");
1646             if (!waslocked)
1647                 AFS_GUNLOCK();
1648         } else
1649 #else
1650         if (waslocked)
1651             AFS_GLOCK();
1652 #endif
1653 #endif
1654 #ifdef RXDEBUG
1655     }
1656     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1657 #endif
1658     MUTEX_ENTER(&rx_stats_mutex);
1659     rx_stats.packetsSent[p->header.type - 1]++;
1660     MUTEX_EXIT(&rx_stats_mutex);
1661     MUTEX_ENTER(&peer->peer_lock);
1662     hadd32(peer->bytesSent, p->length);
1663     MUTEX_EXIT(&peer->peer_lock);
1664 }
1665
1666 /* Send a list of packets to appropriate destination for the specified
1667  * connection.  The headers are first encoded and placed in the packets.
1668  */
1669 void
1670 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1671                    struct rx_packet **list, int len, int istack)
1672 {
1673 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1674     int waslocked;
1675 #endif
1676     struct sockaddr_in addr;
1677     register struct rx_peer *peer = conn->peer;
1678     osi_socket socket;
1679     struct rx_packet *p = NULL;
1680     struct iovec wirevec[RX_MAXIOVECS];
1681     int i, length, code;
1682     afs_uint32 serial;
1683     afs_uint32 temp;
1684     struct rx_jumboHeader *jp;
1685 #ifdef RXDEBUG
1686     char deliveryType = 'S';
1687 #endif
1688     /* The address we're sending the packet to */
1689     addr.sin_family = AF_INET;
1690     addr.sin_port = peer->port;
1691     addr.sin_addr.s_addr = peer->host;
1692
1693     if (len + 1 > RX_MAXIOVECS) {
1694         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1695     }
1696
1697     /*
1698      * Stamp the packets in this jumbogram with consecutive serial numbers
1699      */
1700     MUTEX_ENTER(&conn->conn_data_lock);
1701     serial = conn->serial;
1702     conn->serial += len;
1703     MUTEX_EXIT(&conn->conn_data_lock);
1704
1705
1706     /* This stuff should be revamped, I think, so that most, if not
1707      * all, of the header stuff is always added here.  We could
1708      * probably do away with the encode/decode routines. XXXXX */
1709
1710     jp = NULL;
1711     length = RX_HEADER_SIZE;
1712     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1713     wirevec[0].iov_len = RX_HEADER_SIZE;
1714     for (i = 0; i < len; i++) {
1715         p = list[i];
1716
1717         /* The whole 3.5 jumbogram scheme relies on packets fitting
1718          * in a single packet buffer. */
1719         if (p->niovecs > 2) {
1720             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1721         }
1722
1723         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1724          * in this chunk.  */
1725         if (i < len - 1) {
1726             if (p->length != RX_JUMBOBUFFERSIZE) {
1727                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1728             }
1729             p->header.flags |= RX_JUMBO_PACKET;
1730             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1731             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1732         } else {
1733             wirevec[i + 1].iov_len = p->length;
1734             length += p->length;
1735         }
1736         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1737         if (jp != NULL) {
1738             /* Convert jumbo packet header to network byte order */
1739             temp = (afs_uint32) (p->header.flags) << 24;
1740             temp |= (afs_uint32) (p->header.spare);
1741             *(afs_uint32 *) jp = htonl(temp);
1742         }
1743         jp = (struct rx_jumboHeader *)
1744             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1745
1746         /* Stamp each packet with a unique serial number.  The serial
1747          * number is maintained on a connection basis because some types
1748          * of security may be based on the serial number of the packet,
1749          * and security is handled on a per authenticated-connection
1750          * basis. */
1751         /* Pre-increment, to guarantee no zero serial number; a zero
1752          * serial number means the packet was never sent. */
1753         p->header.serial = ++serial;
1754         /* This is so we can adjust retransmit time-outs better in the face of
1755          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1756          */
1757         if (p->firstSerial == 0) {
1758             p->firstSerial = p->header.serial;
1759         }
1760 #ifdef RXDEBUG
1761         /* If an output tracer function is defined, call it with the packet and
1762          * network address.  Note this function may modify its arguments. */
1763         if (rx_almostSent) {
1764             int drop = (*rx_almostSent) (p, &addr);
1765             /* drop packet if return value is non-zero? */
1766             if (drop)
1767                 deliveryType = 'D';     /* Drop the packet */
1768         }
1769 #endif
1770
1771         /* Get network byte order header */
1772         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1773                                          * touch ALL the fields */
1774     }
1775
1776     /* Send the packet out on the same socket that related packets are being
1777      * received on */
1778     socket =
1779         (conn->type ==
1780          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1781
1782 #ifdef RXDEBUG
1783     /* Possibly drop this packet,  for testing purposes */
1784     if ((deliveryType == 'D')
1785         || ((rx_intentionallyDroppedPacketsPer100 > 0)
1786             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1787         deliveryType = 'D';     /* Drop the packet */
1788     } else {
1789         deliveryType = 'S';     /* Send the packet */
1790 #endif /* RXDEBUG */
1791
1792         /* Loop until the packet is sent.  We'd prefer just to use a
1793          * blocking socket, but unfortunately the interface doesn't
1794          * allow us to have the socket block in send mode, and not
1795          * block in receive mode */
1796 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1797         waslocked = ISAFS_GLOCK();
1798         if (!istack && waslocked)
1799             AFS_GUNLOCK();
1800 #endif
1801         if ((code =
1802              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1803                          istack)) != 0) {
1804             /* send failed, so let's hurry up the resend, eh? */
1805             MUTEX_ENTER(&rx_stats_mutex);
1806             rx_stats.netSendFailures++;
1807             MUTEX_EXIT(&rx_stats_mutex);
1808             for (i = 0; i < len; i++) {
1809                 p = list[i];
1810                 p->retryTime = p->timeSent;     /* resend it very soon */
1811                 clock_Addmsec(&(p->retryTime),
1812                               10 + (((afs_uint32) p->backoff) << 8));
1813             }
1814 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1815             /* Linux is nice -- it can tell us right away that we cannot
1816              * reach this recipient by returning an ENETUNREACH error
1817              * code.  So, when this happens let's "down" the host NOW so
1818              * we don't sit around waiting for this host to timeout later.
1819              */
1820             if (call && code == -ENETUNREACH)
1821                 call->lastReceiveTime = 0;
1822 #endif
1823         }
1824 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1825         if (!istack && waslocked)
1826             AFS_GLOCK();
1827 #endif
1828 #ifdef RXDEBUG
1829     }
1830
1831     assert(p != NULL);
1832
1833     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1834
1835 #endif
1836     MUTEX_ENTER(&rx_stats_mutex);
1837     rx_stats.packetsSent[p->header.type - 1]++;
1838     MUTEX_EXIT(&rx_stats_mutex);
1839     MUTEX_ENTER(&peer->peer_lock);
1840
1841     hadd32(peer->bytesSent, p->length);
1842     MUTEX_EXIT(&peer->peer_lock);
1843 }
1844
1845
1846 /* Send a "special" packet to the peer connection.  If call is
1847  * specified, then the packet is directed to a specific call channel
1848  * associated with the connection, otherwise it is directed to the
1849  * connection only. Uses optionalPacket if it is supplied, rather than
1850  * allocating a new packet buffer.  Nbytes is the length of the data
1851  * portion of the packet.  If data is non-null, nbytes of data are
1852  * copied into the packet.  Type is the type of the packet, as defined
1853  * in rx.h.  Bug: there's a lot of duplication between this and other
1854  * routines.  This needs to be cleaned up. */
1855 struct rx_packet *
1856 rxi_SendSpecial(register struct rx_call *call,
1857                 register struct rx_connection *conn,
1858                 struct rx_packet *optionalPacket, int type, char *data,
1859                 int nbytes, int istack)
1860 {
1861     /* Some of the following stuff should be common code for all
1862      * packet sends (it's repeated elsewhere) */
1863     register struct rx_packet *p;
1864     unsigned int i = 0;
1865     int savelen = 0, saven = 0;
1866     int channel, callNumber;
1867     if (call) {
1868         channel = call->channel;
1869         callNumber = *call->callNumber;
1870         /* BUSY packets refer to the next call on this connection */
1871         if (type == RX_PACKET_TYPE_BUSY) {
1872             callNumber++;
1873         }
1874     } else {
1875         channel = 0;
1876         callNumber = 0;
1877     }
1878     p = optionalPacket;
1879     if (!p) {
1880         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1881         if (!p)
1882             osi_Panic("rxi_SendSpecial failure");
1883     }
1884
1885     if (nbytes != -1)
1886         p->length = nbytes;
1887     else
1888         nbytes = p->length;
1889     p->header.serviceId = conn->serviceId;
1890     p->header.securityIndex = conn->securityIndex;
1891     p->header.cid = (conn->cid | channel);
1892     p->header.callNumber = callNumber;
1893     p->header.seq = 0;
1894     p->header.epoch = conn->epoch;
1895     p->header.type = type;
1896     p->header.flags = 0;
1897     if (conn->type == RX_CLIENT_CONNECTION)
1898         p->header.flags |= RX_CLIENT_INITIATED;
1899     if (data)
1900         rx_packetwrite(p, 0, nbytes, data);
1901
1902     for (i = 1; i < p->niovecs; i++) {
1903         if (nbytes <= p->wirevec[i].iov_len) {
1904             savelen = p->wirevec[i].iov_len;
1905             saven = p->niovecs;
1906             p->wirevec[i].iov_len = nbytes;
1907             p->niovecs = i + 1; /* so condition fails because i == niovecs */
1908         } else
1909             nbytes -= p->wirevec[i].iov_len;
1910     }
1911
1912     if (call)
1913         rxi_Send(call, p, istack);
1914     else
1915         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1916     if (saven) {                /* means we truncated the packet above.  We probably don't  */
1917         /* really need to do this, but it seems safer this way, given that  */
1918         /* sneaky optionalPacket... */
1919         p->wirevec[i - 1].iov_len = savelen;
1920         p->niovecs = saven;
1921     }
1922     if (!optionalPacket)
1923         rxi_FreePacket(p);
1924     return optionalPacket;
1925 }
1926
1927
1928 /* Encode the packet's header (from the struct header in the packet to
1929  * the net byte order representation in the wire representation of the
1930  * packet, which is what is actually sent out on the wire) */
1931 void
1932 rxi_EncodePacketHeader(register struct rx_packet *p)
1933 {
1934     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1935
1936     memset((char *)buf, 0, RX_HEADER_SIZE);
1937     *buf++ = htonl(p->header.epoch);
1938     *buf++ = htonl(p->header.cid);
1939     *buf++ = htonl(p->header.callNumber);
1940     *buf++ = htonl(p->header.seq);
1941     *buf++ = htonl(p->header.serial);
1942     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1943                    | (((afs_uint32) p->header.flags) << 16)
1944                    | (p->header.userStatus << 8) | p->header.securityIndex);
1945     /* Note: top 16 bits of this next word were reserved */
1946     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1947 }
1948
1949 /* Decode the packet's header (from net byte order to a struct header) */
1950 void
1951 rxi_DecodePacketHeader(register struct rx_packet *p)
1952 {
1953     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1954     afs_uint32 temp;
1955
1956     p->header.epoch = ntohl(*buf);
1957     buf++;
1958     p->header.cid = ntohl(*buf);
1959     buf++;
1960     p->header.callNumber = ntohl(*buf);
1961     buf++;
1962     p->header.seq = ntohl(*buf);
1963     buf++;
1964     p->header.serial = ntohl(*buf);
1965     buf++;
1966
1967     temp = ntohl(*buf);
1968     buf++;
1969
1970     /* C will truncate byte fields to bytes for me */
1971     p->header.type = temp >> 24;
1972     p->header.flags = temp >> 16;
1973     p->header.userStatus = temp >> 8;
1974     p->header.securityIndex = temp >> 0;
1975
1976     temp = ntohl(*buf);
1977     buf++;
1978
1979     p->header.serviceId = (temp & 0xffff);
1980     p->header.spare = temp >> 16;
1981     /* Note: top 16 bits of this last word are the security checksum */
1982 }
1983
1984 void
1985 rxi_PrepareSendPacket(register struct rx_call *call,
1986                       register struct rx_packet *p, register int last)
1987 {
1988     register struct rx_connection *conn = call->conn;
1989     int i, j;
1990     ssize_t len;                /* len must be a signed type; it can go negative */
1991
1992     p->flags &= ~RX_PKTFLAG_ACKED;
1993     p->header.cid = (conn->cid | call->channel);
1994     p->header.serviceId = conn->serviceId;
1995     p->header.securityIndex = conn->securityIndex;
1996     p->header.callNumber = *call->callNumber;
1997     p->header.seq = call->tnext++;
1998     p->header.epoch = conn->epoch;
1999     p->header.type = RX_PACKET_TYPE_DATA;
2000     p->header.flags = 0;
2001     p->header.spare = 0;
2002     if (conn->type == RX_CLIENT_CONNECTION)
2003         p->header.flags |= RX_CLIENT_INITIATED;
2004
2005     if (last)
2006         p->header.flags |= RX_LAST_PACKET;
2007
2008     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2009     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2010     p->header.serial = 0;       /* Another way of saying never transmitted... */
2011     p->backoff = 0;
2012
2013     /* Now that we're sure this is the last data on the call, make sure
2014      * that the "length" and the sum of the iov_lens matches. */
2015     len = p->length + call->conn->securityHeaderSize;
2016
2017     for (i = 1; i < p->niovecs && len > 0; i++) {
2018         len -= p->wirevec[i].iov_len;
2019     }
2020     if (len > 0) {
2021         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2022     } else {
2023         /* Free any extra elements in the wirevec */
2024         for (j = MAX(2, i); j < p->niovecs; j++) {
2025             rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2026         }
2027         p->niovecs = i;
2028         p->wirevec[i - 1].iov_len += len;
2029     }
2030     RXS_PreparePacket(conn->securityObject, call, p);
2031 }
2032
2033 /* Given an interface MTU size, calculate an adjusted MTU size that
2034  * will make efficient use of the RX buffers when the peer is sending
2035  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2036 int
2037 rxi_AdjustIfMTU(int mtu)
2038 {
2039     int adjMTU;
2040     int frags;
2041
2042     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2043     if (mtu <= adjMTU) {
2044         return mtu;
2045     }
2046     mtu -= adjMTU;
2047     if (mtu <= 0) {
2048         return adjMTU;
2049     }
2050     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2051     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2052 }
2053
2054 /* Given an interface MTU size, and the peer's advertised max receive
2055  * size, calculate an adjisted maxMTU size that makes efficient use
2056  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2057 int
2058 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2059 {
2060     int maxMTU = mtu * rxi_nSendFrags;
2061     maxMTU = MIN(maxMTU, peerMaxMTU);
2062     return rxi_AdjustIfMTU(maxMTU);
2063 }
2064
2065 /* Given a packet size, figure out how many datagram packet will fit.
2066  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2067  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2068  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2069 int
2070 rxi_AdjustDgramPackets(int frags, int mtu)
2071 {
2072     int maxMTU;
2073     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2074         return 1;
2075     }
2076     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2077     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2078     /* subtract the size of the first and last packets */
2079     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2080     if (maxMTU < 0) {
2081         return 1;
2082     }
2083     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2084 }