src/rx/rx_packet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #ifdef KERNEL
  12 #include "afs/param.h"
  13 #else
  14 #include <afs/param.h>
  15 #endif
  16
  17 RCSID
  18     ("$Header$");
  19
  20 #ifdef KERNEL
  21 #if defined(UKERNEL)
  22 #include "afs/sysincludes.h"
  23 #include "afsincludes.h"
  24 #include "rx/rx_kcommon.h"
  25 #include "rx/rx_clock.h"
  26 #include "rx/rx_queue.h"
  27 #include "rx/rx_packet.h"
  28 #else /* defined(UKERNEL) */
  29 #ifdef RX_KERNEL_TRACE
  30 #include "../rx/rx_kcommon.h"
  31 #endif
  32 #include "h/types.h"
  33 #ifndef AFS_LINUX20_ENV
  34 #include "h/systm.h"
  35 #endif
  36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
  37 #include "afs/sysincludes.h"
  38 #endif
  39 #if defined(AFS_OBSD_ENV)
  40 #include "h/proc.h"
  41 #endif
  42 #include "h/socket.h"
  43 #if !defined(AFS_SUN5_ENV) &&  !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
  44 #if     !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
  45 #include "sys/mount.h"          /* it gets pulled in by something later anyway */
  46 #endif
  47 #include "h/mbuf.h"
  48 #endif
  49 #include "netinet/in.h"
  50 #include "afs/afs_osi.h"
  51 #include "rx_kmutex.h"
  52 #include "rx/rx_clock.h"
  53 #include "rx/rx_queue.h"
  54 #ifdef  AFS_SUN5_ENV
  55 #include <sys/sysmacros.h>
  56 #endif
  57 #include "rx/rx_packet.h"
  58 #endif /* defined(UKERNEL) */
  59 #include "rx/rx_globals.h"
  60 #else /* KERNEL */
  61 #include "sys/types.h"
  62 #include <sys/stat.h>
  63 #include <errno.h>
  64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
  65 #ifdef AFS_NT40_ENV
  66 #include <winsock2.h>
  67 #ifndef EWOULDBLOCK
  68 #define EWOULDBLOCK WSAEWOULDBLOCK
  69 #endif
  70 #else
  71 #include <sys/socket.h>
  72 #include <netinet/in.h>
  73 #endif /* AFS_NT40_ENV */
  74 #include "rx_xmit_nt.h"
  75 #include <stdlib.h>
  76 #else
  77 #include <sys/socket.h>
  78 #include <netinet/in.h>
  79 #endif
  80 #include "rx_clock.h"
  81 #include "rx.h"
  82 #include "rx_queue.h"
  83 #ifdef  AFS_SUN5_ENV
  84 #include <sys/sysmacros.h>
  85 #endif
  86 #include "rx_packet.h"
  87 #include "rx_globals.h"
  88 #include <lwp.h>
  89 #include <assert.h>
  90 #ifdef HAVE_STRING_H
  91 #include <string.h>
  92 #else
  93 #ifdef HAVE_STRINGS_H
  94 #include <strings.h>
  95 #endif
  96 #endif
  97 #ifdef HAVE_UNISTD_H
  98 #include <unistd.h>
  99 #endif
 100 #endif /* KERNEL */
 101
 102 #ifdef RX_LOCKS_DB
 103 /* rxdb_fileID is used to identify the lock location, along with line#. */
 104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
 105 #endif /* RX_LOCKS_DB */
 106 struct rx_packet *rx_mallocedP = 0;
 107
 108 extern char cml_version_number[];
 109 extern int (*rx_almostSent) ();
 110
 111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
 112                                 afs_int32 ahost, short aport,
 113                                 afs_int32 istack);
 114
 115 /* some rules about packets:
 116  * 1.  When a packet is allocated, the final iov_buf contains room for
 117  * a security trailer, but iov_len masks that fact.  If the security
 118  * package wants to add the trailer, it may do so, and then extend
 119  * iov_len appropriately.  For this reason, packet's niovecs and
 120  * iov_len fields should be accurate before calling PreparePacket.
 121 */
 122
 123 /* Preconditions:
 124  *        all packet buffers (iov_base) are integral multiples of
 125  *        the word size.
 126  *        offset is an integral multiple of the word size.
 127  */
 128 afs_int32
 129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
 130 {
 131     unsigned int i;
 132     size_t l;
 133     for (l = 0, i = 1; i < packet->niovecs; i++) {
 134         if (l + packet->wirevec[i].iov_len > offset) {
 135             return
 136                 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 137                                  (offset - l)));
 138         }
 139         l += packet->wirevec[i].iov_len;
 140     }
 141
 142     return 0;
 143 }
 144
 145 /* Preconditions:
 146  *        all packet buffers (iov_base) are integral multiples of the word size.
 147  *        offset is an integral multiple of the word size.
 148  */
 149 afs_int32
 150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
 151 {
 152     unsigned int i;
 153     size_t l;
 154     for (l = 0, i = 1; i < packet->niovecs; i++) {
 155         if (l + packet->wirevec[i].iov_len > offset) {
 156             *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
 157                              (offset - l))) = data;
 158             return 0;
 159         }
 160         l += packet->wirevec[i].iov_len;
 161     }
 162
 163     return 0;
 164 }
 165
 166 /* Preconditions:
 167  *        all packet buffers (iov_base) are integral multiples of the
 168  *        word size.
 169  *        offset is an integral multiple of the word size.
 170  * Packet Invariants:
 171  *         all buffers are contiguously arrayed in the iovec from 0..niovecs-1
 172  */
 173 afs_int32
 174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
 175                   char *out)
 176 {
 177     unsigned int i, j, l, r;
 178     for (l = 0, i = 1; i < packet->niovecs; i++) {
 179         if (l + packet->wirevec[i].iov_len > offset) {
 180             break;
 181         }
 182         l += packet->wirevec[i].iov_len;
 183     }
 184
 185     /* i is the iovec which contains the first little bit of data in which we
 186      * are interested.  l is the total length of everything prior to this iovec.
 187      * j is the number of bytes we can safely copy out of this iovec.
 188      */
 189     r = resid;
 190     while ((resid > 0) && (i < packet->niovecs)) {
 191         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 192         memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
 193         resid -= j;
 194         l += packet->wirevec[i].iov_len;
 195         i++;
 196     }
 197
 198     return (resid ? (r - resid) : r);
 199 }
 200
 201
 202 /* Preconditions:
 203  *        all packet buffers (iov_base) are integral multiples of the
 204  *        word size.
 205  *        offset is an integral multiple of the word size.
 206  */
 207 afs_int32
 208 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
 209 {
 210     int i, j, l, r;
 211     char *b;
 212
 213     for (l = 0, i = 1; i < packet->niovecs; i++) {
 214         if (l + packet->wirevec[i].iov_len > offset) {
 215             break;
 216         }
 217         l += packet->wirevec[i].iov_len;
 218     }
 219
 220     /* i is the iovec which contains the first little bit of data in which we
 221      * are interested.  l is the total length of everything prior to this iovec.
 222      * j is the number of bytes we can safely copy out of this iovec.
 223      */
 224     r = resid;
 225     while ((resid > 0) && (i < RX_MAXWVECS)) {
 226         if (i >= packet->niovecs)
 227             if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
 228                 break;
 229
 230         b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
 231         j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
 232         memcpy(b, in, j);
 233         resid -= j;
 234         l += packet->wirevec[i].iov_len;
 235         i++;
 236     }
 237
 238     return (resid ? (r - resid) : r);
 239 }
 240
 241 static struct rx_packet *
 242 allocCBuf(int class)
 243 {
 244     struct rx_packet *c;
 245     SPLVAR;
 246
 247     NETPRI;
 248     MUTEX_ENTER(&rx_freePktQ_lock);
 249
 250 #ifdef KERNEL
 251     if (rxi_OverQuota(class)) {
 252         c = NULL;
 253         rxi_NeedMorePackets = TRUE;
 254         MUTEX_ENTER(&rx_stats_mutex);
 255         switch (class) {
 256         case RX_PACKET_CLASS_RECEIVE:
 257             rx_stats.receivePktAllocFailures++;
 258             break;
 259         case RX_PACKET_CLASS_SEND:
 260             rx_stats.sendPktAllocFailures++;
 261             break;
 262         case RX_PACKET_CLASS_SPECIAL:
 263             rx_stats.specialPktAllocFailures++;
 264             break;
 265         case RX_PACKET_CLASS_RECV_CBUF:
 266             rx_stats.receiveCbufPktAllocFailures++;
 267             break;
 268         case RX_PACKET_CLASS_SEND_CBUF:
 269             rx_stats.sendCbufPktAllocFailures++;
 270             break;
 271         }
 272         MUTEX_EXIT(&rx_stats_mutex);
 273         goto done;
 274     }
 275
 276     if (queue_IsEmpty(&rx_freePacketQueue)) {
 277         c = NULL;
 278         rxi_NeedMorePackets = TRUE;
 279         goto done;
 280     }
 281 #else /* KERNEL */
 282     if (queue_IsEmpty(&rx_freePacketQueue)) {
 283         rxi_MorePacketsNoLock(rx_initSendWindow);
 284     }
 285 #endif /* KERNEL */
 286
 287     rx_nFreePackets--;
 288     c = queue_First(&rx_freePacketQueue, rx_packet);
 289     queue_Remove(c);
 290     if (!(c->flags & RX_PKTFLAG_FREE))
 291         osi_Panic("rxi_AllocPacket: packet not free\n");
 292     c->flags = 0;               /* clear RX_PKTFLAG_FREE, initialize the rest */
 293     c->header.flags = 0;
 294
 295 #ifdef KERNEL
 296   done:
 297 #endif
 298     MUTEX_EXIT(&rx_freePktQ_lock);
 299
 300     USERPRI;
 301     return c;
 302 }
 303
 304 /*
 305  * Free a packet currently used as a continuation buffer
 306  */
 307 void
 308 rxi_freeCBuf(struct rx_packet *c)
 309 {
 310     SPLVAR;
 311
 312     NETPRI;
 313     MUTEX_ENTER(&rx_freePktQ_lock);
 314
 315     rxi_FreePacketNoLock(c);
 316     /* Wakeup anyone waiting for packets */
 317     rxi_PacketsUnWait();
 318
 319     MUTEX_EXIT(&rx_freePktQ_lock);
 320     USERPRI;
 321 }
 322
 323 /* this one is kind of awful.
 324  * In rxkad, the packet has been all shortened, and everything, ready for
 325  * sending.  All of a sudden, we discover we need some of that space back.
 326  * This isn't terribly general, because it knows that the packets are only
 327  * rounded up to the EBS (userdata + security header).
 328  */
 329 int
 330 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
 331 {
 332     int i;
 333     i = p->niovecs - 1;
 334     if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
 335         if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
 336             p->wirevec[i].iov_len += nb;
 337             return 0;
 338         }
 339     } else {
 340         if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
 341             p->wirevec[i].iov_len += nb;
 342             return 0;
 343         }
 344     }
 345
 346     return 0;
 347 }
 348
 349 /* get sufficient space to store nb bytes of data (or more), and hook
 350  * it into the supplied packet.  Return nbytes<=0 if successful, otherwise
 351  * returns the number of bytes >0 which it failed to come up with.
 352  * Don't need to worry about locking on packet, since only
 353  * one thread can manipulate one at a time. Locking on continution
 354  * packets is handled by allocCBuf */
 355 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
 356 int
 357 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
 358 {
 359     int i;
 360
 361     for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
 362         register struct rx_packet *cb;
 363         if ((cb = allocCBuf(class))) {
 364             p->wirevec[i].iov_base = (caddr_t) cb->localdata;
 365             p->wirevec[i].iov_len = RX_CBUFFERSIZE;
 366             nb -= RX_CBUFFERSIZE;
 367             p->length += RX_CBUFFERSIZE;
 368             p->niovecs++;
 369         } else
 370             break;
 371     }
 372
 373     return nb;
 374 }
 375
 376 /* Add more packet buffers */
 377 void
 378 rxi_MorePackets(int apackets)
 379 {
 380     struct rx_packet *p, *e;
 381     int getme;
 382     SPLVAR;
 383
 384     getme = apackets * sizeof(struct rx_packet);
 385     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 386
 387     PIN(p, getme);              /* XXXXX */
 388     memset((char *)p, 0, getme);
 389     NETPRI;
 390     AFS_RXGLOCK();
 391     MUTEX_ENTER(&rx_freePktQ_lock);
 392
 393     for (e = p + apackets; p < e; p++) {
 394         p->wirevec[0].iov_base = (char *)(p->wirehead);
 395         p->wirevec[0].iov_len = RX_HEADER_SIZE;
 396         p->wirevec[1].iov_base = (char *)(p->localdata);
 397         p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 398         p->flags |= RX_PKTFLAG_FREE;
 399         p->niovecs = 2;
 400
 401         queue_Append(&rx_freePacketQueue, p);
 402     }
 403     rx_nFreePackets += apackets;
 404     rxi_NeedMorePackets = FALSE;
 405     rxi_PacketsUnWait();
 406
 407     AFS_RXGUNLOCK();
 408     MUTEX_EXIT(&rx_freePktQ_lock);
 409     USERPRI;
 410 }
 411
 412 #ifndef KERNEL
 413 /* Add more packet buffers */
 414 void
 415 rxi_MorePacketsNoLock(int apackets)
 416 {
 417     struct rx_packet *p, *e;
 418     int getme;
 419
 420     /* allocate enough packets that 1/4 of the packets will be able
 421      * to hold maximal amounts of data */
 422     apackets += (apackets / 4)
 423         * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
 424     getme = apackets * sizeof(struct rx_packet);
 425     p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
 426
 427     memset((char *)p, 0, getme);
 428
 429     for (e = p + apackets; p < e; p++) {
 430         p->wirevec[0].iov_base = (char *)(p->wirehead);
 431         p->wirevec[0].iov_len = RX_HEADER_SIZE;
 432         p->wirevec[1].iov_base = (char *)(p->localdata);
 433         p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 434         p->flags |= RX_PKTFLAG_FREE;
 435         p->niovecs = 2;
 436
 437         queue_Append(&rx_freePacketQueue, p);
 438     }
 439     rx_nFreePackets += apackets;
 440     rxi_NeedMorePackets = FALSE;
 441     rxi_PacketsUnWait();
 442 }
 443 #endif /* !KERNEL */
 444
 445 void
 446 rxi_FreeAllPackets(void)
 447 {
 448     /* must be called at proper interrupt level, etcetera */
 449     /* MTUXXX need to free all Packets */
 450     osi_Free(rx_mallocedP,
 451              (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 452     UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
 453 }
 454
 455 /* Allocate more packets iff we need more continuation buffers */
 456 /* In kernel, can't page in memory with interrupts disabled, so we
 457  * don't use the event mechanism. */
 458 void
 459 rx_CheckPackets(void)
 460 {
 461     if (rxi_NeedMorePackets) {
 462         rxi_MorePackets(rx_initSendWindow);
 463     }
 464 }
 465
 466 /* In the packet freeing routine below, the assumption is that
 467    we want all of the packets to be used equally frequently, so that we
 468    don't get packet buffers paging out.  It would be just as valid to
 469    assume that we DO want them to page out if not many are being used.
 470    In any event, we assume the former, and append the packets to the end
 471    of the free list.  */
 472 /* This explanation is bogus.  The free list doesn't remain in any kind of
 473    useful order for afs_int32: the packets in use get pretty much randomly scattered
 474    across all the pages.  In order to permit unused {packets,bufs} to page out, they
 475    must be stored so that packets which are adjacent in memory are adjacent in the
 476    free list.  An array springs rapidly to mind.
 477    */
 478
 479 /* Actually free the packet p. */
 480 void
 481 rxi_FreePacketNoLock(struct rx_packet *p)
 482 {
 483     dpf(("Free %x\n", (int)p));
 484
 485     if (p->flags & RX_PKTFLAG_FREE)
 486         osi_Panic("rxi_FreePacketNoLock: packet already free\n");
 487     rx_nFreePackets++;
 488     p->flags |= RX_PKTFLAG_FREE;
 489     queue_Append(&rx_freePacketQueue, p);
 490 }
 491
 492 int
 493 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
 494 {
 495     struct iovec *iov, *end;
 496
 497     if (first != 1)             /* MTUXXX */
 498         osi_Panic("FreeDataBufs 1: first must be 1");
 499     iov = &p->wirevec[1];
 500     end = iov + (p->niovecs - 1);
 501     if (iov->iov_base != (caddr_t) p->localdata)        /* MTUXXX */
 502         osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
 503     for (iov++; iov < end; iov++) {
 504         if (!iov->iov_base)
 505             osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
 506         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 507     }
 508     p->length = 0;
 509     p->niovecs = 0;
 510
 511     return 0;
 512 }
 513
 514 int rxi_nBadIovecs = 0;
 515
 516 /* rxi_RestoreDataBufs
 517  *
 518  * Restore the correct sizes to the iovecs. Called when reusing a packet
 519  * for reading off the wire.
 520  */
 521 void
 522 rxi_RestoreDataBufs(struct rx_packet *p)
 523 {
 524     int i;
 525     struct iovec *iov = &p->wirevec[2];
 526
 527     p->wirevec[0].iov_base = (char *)(p->wirehead);
 528     p->wirevec[0].iov_len = RX_HEADER_SIZE;
 529     p->wirevec[1].iov_base = (char *)(p->localdata);
 530     p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 531
 532     for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
 533         if (!iov->iov_base) {
 534             rxi_nBadIovecs++;
 535             p->niovecs = i;
 536             break;
 537         }
 538         iov->iov_len = RX_CBUFFERSIZE;
 539     }
 540 }
 541
 542 int
 543 rxi_TrimDataBufs(struct rx_packet *p, int first)
 544 {
 545     int length;
 546     struct iovec *iov, *end;
 547     SPLVAR;
 548
 549     if (first != 1)
 550         osi_Panic("TrimDataBufs 1: first must be 1");
 551
 552     /* Skip over continuation buffers containing message data */
 553     iov = &p->wirevec[2];
 554     end = iov + (p->niovecs - 2);
 555     length = p->length - p->wirevec[1].iov_len;
 556     for (; iov < end && length > 0; iov++) {
 557         if (!iov->iov_base)
 558             osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
 559         length -= iov->iov_len;
 560     }
 561
 562     /* iov now points to the first empty data buffer. */
 563     if (iov >= end)
 564         return 0;
 565
 566     NETPRI;
 567     MUTEX_ENTER(&rx_freePktQ_lock);
 568
 569     for (; iov < end; iov++) {
 570         if (!iov->iov_base)
 571             osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
 572         rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
 573         p->niovecs--;
 574     }
 575     rxi_PacketsUnWait();
 576
 577     MUTEX_EXIT(&rx_freePktQ_lock);
 578     USERPRI;
 579
 580     return 0;
 581 }
 582
 583 /* Free the packet p.  P is assumed not to be on any queue, i.e.
 584  * remove it yourself first if you call this routine. */
 585 void
 586 rxi_FreePacket(struct rx_packet *p)
 587 {
 588     SPLVAR;
 589
 590     NETPRI;
 591     MUTEX_ENTER(&rx_freePktQ_lock);
 592
 593     rxi_FreeDataBufsNoLock(p, 1);
 594     rxi_FreePacketNoLock(p);
 595     /* Wakeup anyone waiting for packets */
 596     rxi_PacketsUnWait();
 597
 598     MUTEX_EXIT(&rx_freePktQ_lock);
 599     USERPRI;
 600 }
 601
 602
 603 /* rxi_AllocPacket sets up p->length so it reflects the number of
 604  * bytes in the packet at this point, **not including** the header.
 605  * The header is absolutely necessary, besides, this is the way the
 606  * length field is usually used */
 607 struct rx_packet *
 608 rxi_AllocPacketNoLock(int class)
 609 {
 610     register struct rx_packet *p;
 611
 612 #ifdef KERNEL
 613     if (rxi_OverQuota(class)) {
 614         rxi_NeedMorePackets = TRUE;
 615         MUTEX_ENTER(&rx_stats_mutex);
 616         switch (class) {
 617         case RX_PACKET_CLASS_RECEIVE:
 618             rx_stats.receivePktAllocFailures++;
 619             break;
 620         case RX_PACKET_CLASS_SEND:
 621             rx_stats.sendPktAllocFailures++;
 622             break;
 623         case RX_PACKET_CLASS_SPECIAL:
 624             rx_stats.specialPktAllocFailures++;
 625             break;
 626         case RX_PACKET_CLASS_RECV_CBUF:
 627             rx_stats.receiveCbufPktAllocFailures++;
 628             break;
 629         case RX_PACKET_CLASS_SEND_CBUF:
 630             rx_stats.sendCbufPktAllocFailures++;
 631             break;
 632         }
 633         MUTEX_EXIT(&rx_stats_mutex);
 634         return (struct rx_packet *)0;
 635     }
 636 #endif /* KERNEL */
 637
 638     MUTEX_ENTER(&rx_stats_mutex);
 639     rx_stats.packetRequests++;
 640     MUTEX_EXIT(&rx_stats_mutex);
 641
 642 #ifdef KERNEL
 643     if (queue_IsEmpty(&rx_freePacketQueue))
 644         osi_Panic("rxi_AllocPacket error");
 645 #else /* KERNEL */
 646     if (queue_IsEmpty(&rx_freePacketQueue))
 647         rxi_MorePacketsNoLock(rx_initSendWindow);
 648 #endif /* KERNEL */
 649
 650     rx_nFreePackets--;
 651     p = queue_First(&rx_freePacketQueue, rx_packet);
 652     if (!(p->flags & RX_PKTFLAG_FREE))
 653         osi_Panic("rxi_AllocPacket: packet not free\n");
 654
 655     dpf(("Alloc %x, class %d\n", (int)p, class));
 656
 657     queue_Remove(p);
 658     p->flags = 0;               /* clear RX_PKTFLAG_FREE, initialize the rest */
 659     p->header.flags = 0;
 660
 661     /* have to do this here because rx_FlushWrite fiddles with the iovs in
 662      * order to truncate outbound packets.  In the near future, may need
 663      * to allocate bufs from a static pool here, and/or in AllocSendPacket
 664      */
 665     p->wirevec[0].iov_base = (char *)(p->wirehead);
 666     p->wirevec[0].iov_len = RX_HEADER_SIZE;
 667     p->wirevec[1].iov_base = (char *)(p->localdata);
 668     p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
 669     p->niovecs = 2;
 670     p->length = RX_FIRSTBUFFERSIZE;
 671     return p;
 672 }
 673
 674 struct rx_packet *
 675 rxi_AllocPacket(int class)
 676 {
 677     register struct rx_packet *p;
 678
 679     MUTEX_ENTER(&rx_freePktQ_lock);
 680     p = rxi_AllocPacketNoLock(class);
 681     MUTEX_EXIT(&rx_freePktQ_lock);
 682     return p;
 683 }
 684
 685 /* This guy comes up with as many buffers as it {takes,can get} given
 686  * the MTU for this call. It also sets the packet length before
 687  * returning.  caution: this is often called at NETPRI
 688  * Called with call locked.
 689  */
 690 struct rx_packet *
 691 rxi_AllocSendPacket(register struct rx_call *call, int want)
 692 {
 693     register struct rx_packet *p = (struct rx_packet *)0;
 694     register int mud;
 695     register unsigned delta;
 696
 697     SPLVAR;
 698     mud = call->MTU - RX_HEADER_SIZE;
 699     delta =
 700         rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
 701         rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
 702
 703     while (!(call->error)) {
 704         MUTEX_ENTER(&rx_freePktQ_lock);
 705         /* if an error occurred, or we get the packet we want, we're done */
 706         if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
 707             MUTEX_EXIT(&rx_freePktQ_lock);
 708
 709             want += delta;
 710             want = MIN(want, mud);
 711
 712             if ((unsigned)want > p->length)
 713                 (void)rxi_AllocDataBuf(p, (want - p->length),
 714                                        RX_PACKET_CLASS_SEND_CBUF);
 715
 716             if ((unsigned)p->length > mud)
 717                 p->length = mud;
 718
 719             if (delta >= p->length) {
 720                 rxi_FreePacket(p);
 721                 p = NULL;
 722             } else {
 723                 p->length -= delta;
 724             }
 725             break;
 726         }
 727
 728         /* no error occurred, and we didn't get a packet, so we sleep.
 729          * At this point, we assume that packets will be returned
 730          * sooner or later, as packets are acknowledged, and so we
 731          * just wait.  */
 732         NETPRI;
 733         call->flags |= RX_CALL_WAIT_PACKETS;
 734         CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
 735         MUTEX_EXIT(&call->lock);
 736         rx_waitingForPackets = 1;
 737
 738 #ifdef  RX_ENABLE_LOCKS
 739         CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
 740 #else
 741         osi_rxSleep(&rx_waitingForPackets);
 742 #endif
 743         MUTEX_EXIT(&rx_freePktQ_lock);
 744         MUTEX_ENTER(&call->lock);
 745         CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
 746         call->flags &= ~RX_CALL_WAIT_PACKETS;
 747         USERPRI;
 748     }
 749
 750     return p;
 751 }
 752
 753 #ifndef KERNEL
 754
 755 /* count the number of used FDs */
 756 static int
 757 CountFDs(register int amax)
 758 {
 759     struct stat tstat;
 760     register int i, code;
 761     register int count;
 762
 763     count = 0;
 764     for (i = 0; i < amax; i++) {
 765         code = fstat(i, &tstat);
 766         if (code == 0)
 767             count++;
 768     }
 769     return count;
 770 }
 771
 772 #else /* KERNEL */
 773
 774 #define CountFDs(amax) amax
 775
 776 #endif /* KERNEL */
 777
 778 #if !defined(KERNEL) || defined(UKERNEL)
 779
 780 /* This function reads a single packet from the interface into the
 781  * supplied packet buffer (*p).  Return 0 if the packet is bogus.  The
 782  * (host,port) of the sender are stored in the supplied variables, and
 783  * the data length of the packet is stored in the packet structure.
 784  * The header is decoded. */
 785 int
 786 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
 787                u_short * port)
 788 {
 789     struct sockaddr_in from;
 790     int nbytes;
 791     afs_int32 rlen;
 792     register afs_int32 tlen, savelen;
 793     struct msghdr msg;
 794     rx_computelen(p, tlen);
 795     rx_SetDataSize(p, tlen);    /* this is the size of the user data area */
 796
 797     tlen += RX_HEADER_SIZE;     /* now this is the size of the entire packet */
 798     rlen = rx_maxJumboRecvSize; /* this is what I am advertising.  Only check
 799                                  * it once in order to avoid races.  */
 800     tlen = rlen - tlen;
 801     if (tlen > 0) {
 802         tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
 803         if (tlen > 0) {
 804             tlen = rlen - tlen;
 805         } else
 806             tlen = rlen;
 807     } else
 808         tlen = rlen;
 809
 810     /* Extend the last iovec for padding, it's just to make sure that the
 811      * read doesn't return more data than we expect, and is done to get around
 812      * our problems caused by the lack of a length field in the rx header.
 813      * Use the extra buffer that follows the localdata in each packet
 814      * structure. */
 815     savelen = p->wirevec[p->niovecs - 1].iov_len;
 816     p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
 817
 818     memset((char *)&msg, 0, sizeof(msg));
 819     msg.msg_name = (char *)&from;
 820     msg.msg_namelen = sizeof(struct sockaddr_in);
 821     msg.msg_iov = p->wirevec;
 822     msg.msg_iovlen = p->niovecs;
 823     nbytes = rxi_Recvmsg(socket, &msg, 0);
 824
 825     /* restore the vec to its correct state */
 826     p->wirevec[p->niovecs - 1].iov_len = savelen;
 827
 828     p->length = (nbytes - RX_HEADER_SIZE);
 829     if ((nbytes > tlen) || (p->length & 0x8000)) {      /* Bogus packet */
 830         if (nbytes > 0)
 831             rxi_MorePackets(rx_initSendWindow);
 832         else if (nbytes < 0 && errno == EWOULDBLOCK) {
 833             MUTEX_ENTER(&rx_stats_mutex);
 834             rx_stats.noPacketOnRead++;
 835             MUTEX_EXIT(&rx_stats_mutex);
 836         } else {
 837             MUTEX_ENTER(&rx_stats_mutex);
 838             rx_stats.bogusPacketOnRead++;
 839             rx_stats.bogusHost = from.sin_addr.s_addr;
 840             MUTEX_EXIT(&rx_stats_mutex);
 841             dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
 842                  from.sin_port, nbytes));
 843         }
 844         return 0;
 845     } else {
 846         /* Extract packet header. */
 847         rxi_DecodePacketHeader(p);
 848
 849         *host = from.sin_addr.s_addr;
 850         *port = from.sin_port;
 851         if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
 852             struct rx_peer *peer;
 853             MUTEX_ENTER(&rx_stats_mutex);
 854             rx_stats.packetsRead[p->header.type - 1]++;
 855             MUTEX_EXIT(&rx_stats_mutex);
 856             /*
 857              * Try to look up this peer structure.  If it doesn't exist,
 858              * don't create a new one -
 859              * we don't keep count of the bytes sent/received if a peer
 860              * structure doesn't already exist.
 861              *
 862              * The peer/connection cleanup code assumes that there is 1 peer
 863              * per connection.  If we actually created a peer structure here
 864              * and this packet was an rxdebug packet, the peer structure would
 865              * never be cleaned up.
 866              */
 867             peer = rxi_FindPeer(*host, *port, 0, 0);
 868             /* Since this may not be associated with a connection,
 869              * it may have no refCount, meaning we could race with
 870              * ReapConnections
 871              */
 872             if (peer && (peer->refCount > 0)) {
 873                 MUTEX_ENTER(&peer->peer_lock);
 874                 hadd32(peer->bytesReceived, p->length);
 875                 MUTEX_EXIT(&peer->peer_lock);
 876             }
 877         }
 878
 879         /* Free any empty packet buffers at the end of this packet */
 880         rxi_TrimDataBufs(p, 1);
 881
 882         return 1;
 883     }
 884 }
 885
 886 #endif /* !KERNEL || UKERNEL */
 887
 888 /* This function splits off the first packet in a jumbo packet.
 889  * As of AFS 3.5, jumbograms contain more than one fixed size
 890  * packet, and the RX_JUMBO_PACKET flag is set in all but the
 891  * last packet header. All packets (except the last) are padded to
 892  * fall on RX_CBUFFERSIZE boundaries.
 893  * HACK: We store the length of the first n-1 packets in the
 894  * last two pad bytes. */
 895
 896 struct rx_packet *
 897 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
 898                      int first)
 899 {
 900     struct rx_packet *np;
 901     struct rx_jumboHeader *jp;
 902     int niov, i;
 903     struct iovec *iov;
 904     int length;
 905     afs_uint32 temp;
 906
 907     /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
 908      * bytes in length. All but the first packet are preceded by
 909      * an abbreviated four byte header. The length of the last packet
 910      * is calculated from the size of the jumbogram. */
 911     length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
 912
 913     if ((int)p->length < length) {
 914         dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
 915         return NULL;
 916     }
 917     niov = p->niovecs - 2;
 918     if (niov < 1) {
 919         dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
 920         return NULL;
 921     }
 922     iov = &p->wirevec[2];
 923     np = RX_CBUF_TO_PACKET(iov->iov_base, p);
 924
 925     /* Get a pointer to the abbreviated packet header */
 926     jp = (struct rx_jumboHeader *)
 927         ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
 928
 929     /* Set up the iovecs for the next packet */
 930     np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
 931     np->wirevec[0].iov_len = sizeof(struct rx_header);
 932     np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
 933     np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
 934     np->niovecs = niov + 1;
 935     for (i = 2, iov++; i <= niov; i++, iov++) {
 936         np->wirevec[i] = *iov;
 937     }
 938     np->length = p->length - length;
 939     p->length = RX_JUMBOBUFFERSIZE;
 940     p->niovecs = 2;
 941
 942     /* Convert the jumbo packet header to host byte order */
 943     temp = ntohl(*(afs_uint32 *) jp);
 944     jp->flags = (u_char) (temp >> 24);
 945     jp->cksum = (u_short) (temp);
 946
 947     /* Fill in the packet header */
 948     np->header = p->header;
 949     np->header.serial = p->header.serial + 1;
 950     np->header.seq = p->header.seq + 1;
 951     np->header.flags = jp->flags;
 952     np->header.spare = jp->cksum;
 953
 954     return np;
 955 }
 956
 957 #ifndef KERNEL
 958 /* Send a udp datagram */
 959 int
 960 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
 961             int length, int istack)
 962 {
 963     struct msghdr msg;
 964
 965     memset(&msg, 0, sizeof(msg));
 966     msg.msg_iov = dvec;
 967     msg.msg_iovlen = nvecs;
 968     msg.msg_name = addr;
 969     msg.msg_namelen = sizeof(struct sockaddr_in);
 970
 971     rxi_Sendmsg(socket, &msg, 0);
 972
 973     return 0;
 974 }
 975 #elif !defined(UKERNEL)
 976 /*
 977  * message receipt is done in rxk_input or rx_put.
 978  */
 979
 980 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
 981 /*
 982  * Copy an mblock to the contiguous area pointed to by cp.
 983  * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
 984  * but it doesn't really.
 985  * Returns the number of bytes not transferred.
 986  * The message is NOT changed.
 987  */
 988 static int
 989 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
 990 {
 991     register int n;
 992
 993     for (; mp && len > 0; mp = mp->b_cont) {
 994         if (mp->b_datap->db_type != M_DATA) {
 995             return -1;
 996         }
 997         n = MIN(len, (mp->b_wptr - mp->b_rptr));
 998         memcpy(cp, (char *)mp->b_rptr, n);
 999         cp += n;
1000         len -= n;
1001         mp->b_rptr += n;
1002     }
1003     return (len);
1004 }
1005
1006 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1007  * but it doesn't really.
1008  * This sucks, anyway, do it like m_cpy.... below
1009  */
1010 static int
1011 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1012            int niovs)
1013 {
1014     register int m, n, o, t, i;
1015
1016     for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1017         if (mp->b_datap->db_type != M_DATA) {
1018             return -1;
1019         }
1020         n = MIN(len, (mp->b_wptr - mp->b_rptr));
1021         len -= n;
1022         while (n) {
1023             if (!t) {
1024                 o = 0;
1025                 i++;
1026                 t = iovs[i].iov_len;
1027             }
1028             m = MIN(n, t);
1029             memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1030             mp->b_rptr += m;
1031             o += m;
1032             t -= m;
1033             n -= m;
1034         }
1035     }
1036     return (len);
1037 }
1038
1039 #define m_cpytoc(a, b, c, d)  cpytoc(a, b, c, d)
1040 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1041 #else
1042 #if !defined(AFS_LINUX20_ENV)
1043 static int
1044 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1045 {
1046     caddr_t p1, p2;
1047     unsigned int l1, l2, i, t;
1048
1049     if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1050         osi_Panic("m_cpytoiovec");      /* MTUXXX probably don't need this check */
1051
1052     while (off && m)
1053         if (m->m_len <= off) {
1054             off -= m->m_len;
1055             m = m->m_next;
1056             continue;
1057         } else
1058             break;
1059
1060     if (m == NULL)
1061         return len;
1062
1063     p1 = mtod(m, caddr_t) + off;
1064     l1 = m->m_len - off;
1065     i = 0;
1066     p2 = iovs[0].iov_base;
1067     l2 = iovs[0].iov_len;
1068
1069     while (len) {
1070         t = MIN(l1, MIN(l2, (unsigned int)len));
1071         memcpy(p2, p1, t);
1072         p1 += t;
1073         p2 += t;
1074         l1 -= t;
1075         l2 -= t;
1076         len -= t;
1077         if (!l1) {
1078             m = m->m_next;
1079             if (!m)
1080                 break;
1081             p1 = mtod(m, caddr_t);
1082             l1 = m->m_len;
1083         }
1084         if (!l2) {
1085             if (++i >= niovs)
1086                 break;
1087             p2 = iovs[i].iov_base;
1088             l2 = iovs[i].iov_len;
1089         }
1090
1091     }
1092
1093     return len;
1094 }
1095 #endif /* LINUX */
1096 #endif /* AFS_SUN5_ENV */
1097
1098 #if !defined(AFS_LINUX20_ENV)
1099 int
1100 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1101 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1102      mblk_t *amb;
1103 #else
1104      struct mbuf *amb;
1105 #endif
1106      void (*free) ();
1107      struct rx_packet *phandle;
1108      int hdr_len, data_len;
1109 {
1110     register int code;
1111
1112     code =
1113         m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1114                      phandle->niovecs);
1115     (*free) (amb);
1116
1117     return code;
1118 }
1119 #endif /* LINUX */
1120 #endif /*KERNEL && !UKERNEL */
1121
1122
1123 /* send a response to a debug packet */
1124
1125 struct rx_packet *
1126 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1127                        afs_int32 ahost, short aport, int istack)
1128 {
1129     struct rx_debugIn tin;
1130     afs_int32 tl;
1131     struct rx_serverQueueEntry *np, *nqe;
1132
1133     /*
1134      * Only respond to client-initiated Rx debug packets,
1135      * and clear the client flag in the response.
1136      */
1137     if (ap->header.flags & RX_CLIENT_INITIATED) {
1138         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1139         rxi_EncodePacketHeader(ap);
1140     } else {
1141         return ap;
1142     }
1143
1144     rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1145     /* all done with packet, now set length to the truth, so we can
1146      * reuse this packet */
1147     rx_computelen(ap, ap->length);
1148
1149     tin.type = ntohl(tin.type);
1150     tin.index = ntohl(tin.index);
1151     switch (tin.type) {
1152     case RX_DEBUGI_GETSTATS:{
1153             struct rx_debugStats tstat;
1154
1155             /* get basic stats */
1156             memset((char *)&tstat, 0, sizeof(tstat));   /* make sure spares are zero */
1157             tstat.version = RX_DEBUGI_VERSION;
1158 #ifndef RX_ENABLE_LOCKS
1159             tstat.waitingForPackets = rx_waitingForPackets;
1160 #endif
1161             MUTEX_ENTER(&rx_serverPool_lock);
1162             tstat.nFreePackets = htonl(rx_nFreePackets);
1163             tstat.callsExecuted = htonl(rxi_nCalls);
1164             tstat.packetReclaims = htonl(rx_packetReclaims);
1165             tstat.usedFDs = CountFDs(64);
1166             tstat.nWaiting = htonl(rx_nWaiting);
1167             tstat.nWaited = htonl(rx_nWaited);
1168             queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1169                         tstat.idleThreads);
1170             MUTEX_EXIT(&rx_serverPool_lock);
1171             tstat.idleThreads = htonl(tstat.idleThreads);
1172             tl = sizeof(struct rx_debugStats) - ap->length;
1173             if (tl > 0)
1174                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1175
1176             if (tl <= 0) {
1177                 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1178                                (char *)&tstat);
1179                 ap->length = sizeof(struct rx_debugStats);
1180                 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1181                 rx_computelen(ap, ap->length);
1182             }
1183             break;
1184         }
1185
1186     case RX_DEBUGI_GETALLCONN:
1187     case RX_DEBUGI_GETCONN:{
1188             int i, j;
1189             register struct rx_connection *tc;
1190             struct rx_call *tcall;
1191             struct rx_debugConn tconn;
1192             int all = (tin.type == RX_DEBUGI_GETALLCONN);
1193
1194
1195             tl = sizeof(struct rx_debugConn) - ap->length;
1196             if (tl > 0)
1197                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1198             if (tl > 0)
1199                 return ap;
1200
1201             memset((char *)&tconn, 0, sizeof(tconn));   /* make sure spares are zero */
1202             /* get N'th (maybe) "interesting" connection info */
1203             for (i = 0; i < rx_hashTableSize; i++) {
1204 #if !defined(KERNEL)
1205                 /* the time complexity of the algorithm used here
1206                  * exponentially increses with the number of connections.
1207                  */
1208 #ifdef AFS_PTHREAD_ENV
1209                 pthread_yield();
1210 #else
1211                 (void)IOMGR_Poll();
1212 #endif
1213 #endif
1214                 MUTEX_ENTER(&rx_connHashTable_lock);
1215                 /* We might be slightly out of step since we are not
1216                  * locking each call, but this is only debugging output.
1217                  */
1218                 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1219                     if ((all || rxi_IsConnInteresting(tc))
1220                         && tin.index-- <= 0) {
1221                         tconn.host = tc->peer->host;
1222                         tconn.port = tc->peer->port;
1223                         tconn.cid = htonl(tc->cid);
1224                         tconn.epoch = htonl(tc->epoch);
1225                         tconn.serial = htonl(tc->serial);
1226                         for (j = 0; j < RX_MAXCALLS; j++) {
1227                             tconn.callNumber[j] = htonl(tc->callNumber[j]);
1228                             if ((tcall = tc->call[j])) {
1229                                 tconn.callState[j] = tcall->state;
1230                                 tconn.callMode[j] = tcall->mode;
1231                                 tconn.callFlags[j] = tcall->flags;
1232                                 if (queue_IsNotEmpty(&tcall->rq))
1233                                     tconn.callOther[j] |= RX_OTHER_IN;
1234                                 if (queue_IsNotEmpty(&tcall->tq))
1235                                     tconn.callOther[j] |= RX_OTHER_OUT;
1236                             } else
1237                                 tconn.callState[j] = RX_STATE_NOTINIT;
1238                         }
1239
1240                         tconn.natMTU = htonl(tc->peer->natMTU);
1241                         tconn.error = htonl(tc->error);
1242                         tconn.flags = tc->flags;
1243                         tconn.type = tc->type;
1244                         tconn.securityIndex = tc->securityIndex;
1245                         if (tc->securityObject) {
1246                             RXS_GetStats(tc->securityObject, tc,
1247                                          &tconn.secStats);
1248 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1249 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1250                             DOHTONL(flags);
1251                             DOHTONL(expires);
1252                             DOHTONL(packetsReceived);
1253                             DOHTONL(packetsSent);
1254                             DOHTONL(bytesReceived);
1255                             DOHTONL(bytesSent);
1256                             for (i = 0;
1257                                  i <
1258                                  sizeof(tconn.secStats.spares) /
1259                                  sizeof(short); i++)
1260                                 DOHTONS(spares[i]);
1261                             for (i = 0;
1262                                  i <
1263                                  sizeof(tconn.secStats.sparel) /
1264                                  sizeof(afs_int32); i++)
1265                                 DOHTONL(sparel[i]);
1266                         }
1267
1268                         MUTEX_EXIT(&rx_connHashTable_lock);
1269                         rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1270                                        (char *)&tconn);
1271                         tl = ap->length;
1272                         ap->length = sizeof(struct rx_debugConn);
1273                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1274                                             istack);
1275                         ap->length = tl;
1276                         return ap;
1277                     }
1278                 }
1279                 MUTEX_EXIT(&rx_connHashTable_lock);
1280             }
1281             /* if we make it here, there are no interesting packets */
1282             tconn.cid = htonl(0xffffffff);      /* means end */
1283             rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1284                            (char *)&tconn);
1285             tl = ap->length;
1286             ap->length = sizeof(struct rx_debugConn);
1287             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1288             ap->length = tl;
1289             break;
1290         }
1291
1292         /*
1293          * Pass back all the peer structures we have available
1294          */
1295
1296     case RX_DEBUGI_GETPEER:{
1297             int i;
1298             register struct rx_peer *tp;
1299             struct rx_debugPeer tpeer;
1300
1301
1302             tl = sizeof(struct rx_debugPeer) - ap->length;
1303             if (tl > 0)
1304                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1305             if (tl > 0)
1306                 return ap;
1307
1308             memset((char *)&tpeer, 0, sizeof(tpeer));
1309             for (i = 0; i < rx_hashTableSize; i++) {
1310 #if !defined(KERNEL)
1311                 /* the time complexity of the algorithm used here
1312                  * exponentially increses with the number of peers.
1313                  *
1314                  * Yielding after processing each hash table entry
1315                  * and dropping rx_peerHashTable_lock.
1316                  * also increases the risk that we will miss a new
1317                  * entry - but we are willing to live with this
1318                  * limitation since this is meant for debugging only
1319                  */
1320 #ifdef AFS_PTHREAD_ENV
1321                 pthread_yield();
1322 #else
1323                 (void)IOMGR_Poll();
1324 #endif
1325 #endif
1326                 MUTEX_ENTER(&rx_peerHashTable_lock);
1327                 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1328                     if (tin.index-- <= 0) {
1329                         tpeer.host = tp->host;
1330                         tpeer.port = tp->port;
1331                         tpeer.ifMTU = htons(tp->ifMTU);
1332                         tpeer.idleWhen = htonl(tp->idleWhen);
1333                         tpeer.refCount = htons(tp->refCount);
1334                         tpeer.burstSize = tp->burstSize;
1335                         tpeer.burst = tp->burst;
1336                         tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1337                         tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1338                         tpeer.rtt = htonl(tp->rtt);
1339                         tpeer.rtt_dev = htonl(tp->rtt_dev);
1340                         tpeer.timeout.sec = htonl(tp->timeout.sec);
1341                         tpeer.timeout.usec = htonl(tp->timeout.usec);
1342                         tpeer.nSent = htonl(tp->nSent);
1343                         tpeer.reSends = htonl(tp->reSends);
1344                         tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1345                         tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1346                         tpeer.rateFlag = htonl(tp->rateFlag);
1347                         tpeer.natMTU = htons(tp->natMTU);
1348                         tpeer.maxMTU = htons(tp->maxMTU);
1349                         tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1350                         tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1351                         tpeer.MTU = htons(tp->MTU);
1352                         tpeer.cwind = htons(tp->cwind);
1353                         tpeer.nDgramPackets = htons(tp->nDgramPackets);
1354                         tpeer.congestSeq = htons(tp->congestSeq);
1355                         tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1356                         tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1357                         tpeer.bytesReceived.high =
1358                             htonl(tp->bytesReceived.high);
1359                         tpeer.bytesReceived.low =
1360                             htonl(tp->bytesReceived.low);
1361
1362                         MUTEX_EXIT(&rx_peerHashTable_lock);
1363                         rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1364                                        (char *)&tpeer);
1365                         tl = ap->length;
1366                         ap->length = sizeof(struct rx_debugPeer);
1367                         rxi_SendDebugPacket(ap, asocket, ahost, aport,
1368                                             istack);
1369                         ap->length = tl;
1370                         return ap;
1371                     }
1372                 }
1373                 MUTEX_EXIT(&rx_peerHashTable_lock);
1374             }
1375             /* if we make it here, there are no interesting packets */
1376             tpeer.host = htonl(0xffffffff);     /* means end */
1377             rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1378                            (char *)&tpeer);
1379             tl = ap->length;
1380             ap->length = sizeof(struct rx_debugPeer);
1381             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1382             ap->length = tl;
1383             break;
1384         }
1385
1386     case RX_DEBUGI_RXSTATS:{
1387             int i;
1388             afs_int32 *s;
1389
1390             tl = sizeof(rx_stats) - ap->length;
1391             if (tl > 0)
1392                 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1393             if (tl > 0)
1394                 return ap;
1395
1396             /* Since its all int32s convert to network order with a loop. */
1397             MUTEX_ENTER(&rx_stats_mutex);
1398             s = (afs_int32 *) & rx_stats;
1399             for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1400                 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1401
1402             tl = ap->length;
1403             ap->length = sizeof(rx_stats);
1404             MUTEX_EXIT(&rx_stats_mutex);
1405             rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1406             ap->length = tl;
1407             break;
1408         }
1409
1410     default:
1411         /* error response packet */
1412         tin.type = htonl(RX_DEBUGI_BADTYPE);
1413         tin.index = tin.type;
1414         rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1415         tl = ap->length;
1416         ap->length = sizeof(struct rx_debugIn);
1417         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1418         ap->length = tl;
1419         break;
1420     }
1421     return ap;
1422 }
1423
1424 struct rx_packet *
1425 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1426                          afs_int32 ahost, short aport, int istack)
1427 {
1428     afs_int32 tl;
1429
1430     /*
1431      * Only respond to client-initiated version requests, and
1432      * clear that flag in the response.
1433      */
1434     if (ap->header.flags & RX_CLIENT_INITIATED) {
1435         char buf[66];
1436
1437         ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1438         rxi_EncodePacketHeader(ap);
1439         memset(buf, 0, sizeof(buf));
1440         strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1441         rx_packetwrite(ap, 0, 65, buf);
1442         tl = ap->length;
1443         ap->length = 65;
1444         rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1445         ap->length = tl;
1446     }
1447
1448     return ap;
1449 }
1450
1451
1452 /* send a debug packet back to the sender */
1453 static void
1454 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1455                     afs_int32 ahost, short aport, afs_int32 istack)
1456 {
1457     struct sockaddr_in taddr;
1458     int i;
1459     int nbytes;
1460     int saven = 0;
1461     size_t savelen = 0;
1462 #ifdef KERNEL
1463     int waslocked = ISAFS_GLOCK();
1464 #endif
1465
1466     taddr.sin_family = AF_INET;
1467     taddr.sin_port = aport;
1468     taddr.sin_addr.s_addr = ahost;
1469 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1470     taddr.sin_len = sizeof(struct sockaddr_in);
1471 #endif
1472
1473     /* We need to trim the niovecs. */
1474     nbytes = apacket->length;
1475     for (i = 1; i < apacket->niovecs; i++) {
1476         if (nbytes <= apacket->wirevec[i].iov_len) {
1477             savelen = apacket->wirevec[i].iov_len;
1478             saven = apacket->niovecs;
1479             apacket->wirevec[i].iov_len = nbytes;
1480             apacket->niovecs = i + 1;   /* so condition fails because i == niovecs */
1481         } else
1482             nbytes -= apacket->wirevec[i].iov_len;
1483     }
1484     AFS_RXGUNLOCK();
1485 #ifdef KERNEL
1486 #ifdef RX_KERNEL_TRACE
1487     if (ICL_SETACTIVE(afs_iclSetp)) {
1488         if (!waslocked)
1489             AFS_GLOCK();
1490         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1491                    "before osi_NetSend()");
1492         AFS_GUNLOCK();
1493     } else
1494 #else
1495     if (waslocked)
1496         AFS_GUNLOCK();
1497 #endif
1498 #endif
1499     /* debug packets are not reliably delivered, hence the cast below. */
1500     (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1501                       apacket->length + RX_HEADER_SIZE, istack);
1502 #ifdef KERNEL
1503 #ifdef RX_KERNEL_TRACE
1504     if (ICL_SETACTIVE(afs_iclSetp)) {
1505         AFS_GLOCK();
1506         afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1507                    "after osi_NetSend()");
1508         if (!waslocked)
1509             AFS_GUNLOCK();
1510     } else
1511 #else
1512     if (waslocked)
1513         AFS_GLOCK();
1514 #endif
1515 #endif
1516     AFS_RXGLOCK();
1517     if (saven) {                /* means we truncated the packet above. */
1518         apacket->wirevec[i - 1].iov_len = savelen;
1519         apacket->niovecs = saven;
1520     }
1521
1522 }
1523
1524 /* Send the packet to appropriate destination for the specified
1525  * call.  The header is first encoded and placed in the packet.
1526  */
1527 void
1528 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1529                struct rx_packet *p, int istack)
1530 {
1531 #if defined(KERNEL)
1532     int waslocked;
1533 #endif
1534     int code;
1535     struct sockaddr_in addr;
1536     register struct rx_peer *peer = conn->peer;
1537     osi_socket socket;
1538 #ifdef RXDEBUG
1539     char deliveryType = 'S';
1540 #endif
1541     /* The address we're sending the packet to */
1542     memset(&addr, 0, sizeof(addr));
1543     addr.sin_family = AF_INET;
1544     addr.sin_port = peer->port;
1545     addr.sin_addr.s_addr = peer->host;
1546
1547     /* This stuff should be revamped, I think, so that most, if not
1548      * all, of the header stuff is always added here.  We could
1549      * probably do away with the encode/decode routines. XXXXX */
1550
1551     /* Stamp each packet with a unique serial number.  The serial
1552      * number is maintained on a connection basis because some types
1553      * of security may be based on the serial number of the packet,
1554      * and security is handled on a per authenticated-connection
1555      * basis. */
1556     /* Pre-increment, to guarantee no zero serial number; a zero
1557      * serial number means the packet was never sent. */
1558     MUTEX_ENTER(&conn->conn_data_lock);
1559     p->header.serial = ++conn->serial;
1560     MUTEX_EXIT(&conn->conn_data_lock);
1561     /* This is so we can adjust retransmit time-outs better in the face of
1562      * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1563      */
1564     if (p->firstSerial == 0) {
1565         p->firstSerial = p->header.serial;
1566     }
1567 #ifdef RXDEBUG
1568     /* If an output tracer function is defined, call it with the packet and
1569      * network address.  Note this function may modify its arguments. */
1570     if (rx_almostSent) {
1571         int drop = (*rx_almostSent) (p, &addr);
1572         /* drop packet if return value is non-zero? */
1573         if (drop)
1574             deliveryType = 'D'; /* Drop the packet */
1575     }
1576 #endif
1577
1578     /* Get network byte order header */
1579     rxi_EncodePacketHeader(p);  /* XXX in the event of rexmit, etc, don't need to
1580                                  * touch ALL the fields */
1581
1582     /* Send the packet out on the same socket that related packets are being
1583      * received on */
1584     socket =
1585         (conn->type ==
1586          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1587
1588 #ifdef RXDEBUG
1589     /* Possibly drop this packet,  for testing purposes */
1590     if ((deliveryType == 'D')
1591         || ((rx_intentionallyDroppedPacketsPer100 > 0)
1592             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1593         deliveryType = 'D';     /* Drop the packet */
1594     } else {
1595         deliveryType = 'S';     /* Send the packet */
1596 #endif /* RXDEBUG */
1597
1598         /* Loop until the packet is sent.  We'd prefer just to use a
1599          * blocking socket, but unfortunately the interface doesn't
1600          * allow us to have the socket block in send mode, and not
1601          * block in receive mode */
1602         AFS_RXGUNLOCK();
1603 #ifdef KERNEL
1604         waslocked = ISAFS_GLOCK();
1605 #ifdef RX_KERNEL_TRACE
1606         if (ICL_SETACTIVE(afs_iclSetp)) {
1607             if (!waslocked)
1608                 AFS_GLOCK();
1609             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1610                        "before osi_NetSend()");
1611             AFS_GUNLOCK();
1612         } else
1613 #else
1614         if (waslocked)
1615             AFS_GUNLOCK();
1616 #endif
1617 #endif
1618         if ((code =
1619              osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1620                          p->length + RX_HEADER_SIZE, istack)) != 0) {
1621             /* send failed, so let's hurry up the resend, eh? */
1622             MUTEX_ENTER(&rx_stats_mutex);
1623             rx_stats.netSendFailures++;
1624             MUTEX_EXIT(&rx_stats_mutex);
1625             p->retryTime = p->timeSent; /* resend it very soon */
1626             clock_Addmsec(&(p->retryTime),
1627                           10 + (((afs_uint32) p->backoff) << 8));
1628
1629 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1630             /* Linux is nice -- it can tell us right away that we cannot
1631              * reach this recipient by returning an ENETUNREACH error
1632              * code.  So, when this happens let's "down" the host NOW so
1633              * we don't sit around waiting for this host to timeout later.
1634              */
1635             if (call && code == -ENETUNREACH)
1636                 call->lastReceiveTime = 0;
1637 #endif
1638         }
1639 #ifdef KERNEL
1640 #ifdef RX_KERNEL_TRACE
1641         if (ICL_SETACTIVE(afs_iclSetp)) {
1642             AFS_GLOCK();
1643             afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1644                        "after osi_NetSend()");
1645             if (!waslocked)
1646                 AFS_GUNLOCK();
1647         } else
1648 #else
1649         if (waslocked)
1650             AFS_GLOCK();
1651 #endif
1652 #endif
1653         AFS_RXGLOCK();
1654 #ifdef RXDEBUG
1655     }
1656     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1657 #endif
1658     MUTEX_ENTER(&rx_stats_mutex);
1659     rx_stats.packetsSent[p->header.type - 1]++;
1660     MUTEX_EXIT(&rx_stats_mutex);
1661     MUTEX_ENTER(&peer->peer_lock);
1662     hadd32(peer->bytesSent, p->length);
1663     MUTEX_EXIT(&peer->peer_lock);
1664 }
1665
1666 /* Send a list of packets to appropriate destination for the specified
1667  * connection.  The headers are first encoded and placed in the packets.
1668  */
1669 void
1670 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1671                    struct rx_packet **list, int len, int istack)
1672 {
1673 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1674     int waslocked;
1675 #endif
1676     struct sockaddr_in addr;
1677     register struct rx_peer *peer = conn->peer;
1678     osi_socket socket;
1679     struct rx_packet *p = NULL;
1680     struct iovec wirevec[RX_MAXIOVECS];
1681     int i, length, code;
1682     afs_uint32 serial;
1683     afs_uint32 temp;
1684     struct rx_jumboHeader *jp;
1685 #ifdef RXDEBUG
1686     char deliveryType = 'S';
1687 #endif
1688     /* The address we're sending the packet to */
1689     addr.sin_family = AF_INET;
1690     addr.sin_port = peer->port;
1691     addr.sin_addr.s_addr = peer->host;
1692
1693     if (len + 1 > RX_MAXIOVECS) {
1694         osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1695     }
1696
1697     /*
1698      * Stamp the packets in this jumbogram with consecutive serial numbers
1699      */
1700     MUTEX_ENTER(&conn->conn_data_lock);
1701     serial = conn->serial;
1702     conn->serial += len;
1703     MUTEX_EXIT(&conn->conn_data_lock);
1704
1705
1706     /* This stuff should be revamped, I think, so that most, if not
1707      * all, of the header stuff is always added here.  We could
1708      * probably do away with the encode/decode routines. XXXXX */
1709
1710     jp = NULL;
1711     length = RX_HEADER_SIZE;
1712     wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1713     wirevec[0].iov_len = RX_HEADER_SIZE;
1714     for (i = 0; i < len; i++) {
1715         p = list[i];
1716
1717         /* The whole 3.5 jumbogram scheme relies on packets fitting
1718          * in a single packet buffer. */
1719         if (p->niovecs > 2) {
1720             osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1721         }
1722
1723         /* Set the RX_JUMBO_PACKET flags in all but the last packets
1724          * in this chunk.  */
1725         if (i < len - 1) {
1726             if (p->length != RX_JUMBOBUFFERSIZE) {
1727                 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1728             }
1729             p->header.flags |= RX_JUMBO_PACKET;
1730             length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1731             wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1732         } else {
1733             wirevec[i + 1].iov_len = p->length;
1734             length += p->length;
1735         }
1736         wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1737         if (jp != NULL) {
1738             /* Convert jumbo packet header to network byte order */
1739             temp = (afs_uint32) (p->header.flags) << 24;
1740             temp |= (afs_uint32) (p->header.spare);
1741             *(afs_uint32 *) jp = htonl(temp);
1742         }
1743         jp = (struct rx_jumboHeader *)
1744             ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1745
1746         /* Stamp each packet with a unique serial number.  The serial
1747          * number is maintained on a connection basis because some types
1748          * of security may be based on the serial number of the packet,
1749          * and security is handled on a per authenticated-connection
1750          * basis. */
1751         /* Pre-increment, to guarantee no zero serial number; a zero
1752          * serial number means the packet was never sent. */
1753         p->header.serial = ++serial;
1754         /* This is so we can adjust retransmit time-outs better in the face of
1755          * rapidly changing round-trip times.  RTO estimation is not a la Karn.
1756          */
1757         if (p->firstSerial == 0) {
1758             p->firstSerial = p->header.serial;
1759         }
1760 #ifdef RXDEBUG
1761         /* If an output tracer function is defined, call it with the packet and
1762          * network address.  Note this function may modify its arguments. */
1763         if (rx_almostSent) {
1764             int drop = (*rx_almostSent) (p, &addr);
1765             /* drop packet if return value is non-zero? */
1766             if (drop)
1767                 deliveryType = 'D';     /* Drop the packet */
1768         }
1769 #endif
1770
1771         /* Get network byte order header */
1772         rxi_EncodePacketHeader(p);      /* XXX in the event of rexmit, etc, don't need to
1773                                          * touch ALL the fields */
1774     }
1775
1776     /* Send the packet out on the same socket that related packets are being
1777      * received on */
1778     socket =
1779         (conn->type ==
1780          RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1781
1782 #ifdef RXDEBUG
1783     /* Possibly drop this packet,  for testing purposes */
1784     if ((deliveryType == 'D')
1785         || ((rx_intentionallyDroppedPacketsPer100 > 0)
1786             && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1787         deliveryType = 'D';     /* Drop the packet */
1788     } else {
1789         deliveryType = 'S';     /* Send the packet */
1790 #endif /* RXDEBUG */
1791
1792         /* Loop until the packet is sent.  We'd prefer just to use a
1793          * blocking socket, but unfortunately the interface doesn't
1794          * allow us to have the socket block in send mode, and not
1795          * block in receive mode */
1796         AFS_RXGUNLOCK();
1797 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1798         waslocked = ISAFS_GLOCK();
1799         if (!istack && waslocked)
1800             AFS_GUNLOCK();
1801 #endif
1802         if ((code =
1803              osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1804                          istack)) != 0) {
1805             /* send failed, so let's hurry up the resend, eh? */
1806             MUTEX_ENTER(&rx_stats_mutex);
1807             rx_stats.netSendFailures++;
1808             MUTEX_EXIT(&rx_stats_mutex);
1809             for (i = 0; i < len; i++) {
1810                 p = list[i];
1811                 p->retryTime = p->timeSent;     /* resend it very soon */
1812                 clock_Addmsec(&(p->retryTime),
1813                               10 + (((afs_uint32) p->backoff) << 8));
1814             }
1815 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1816             /* Linux is nice -- it can tell us right away that we cannot
1817              * reach this recipient by returning an ENETUNREACH error
1818              * code.  So, when this happens let's "down" the host NOW so
1819              * we don't sit around waiting for this host to timeout later.
1820              */
1821             if (call && code == -ENETUNREACH)
1822                 call->lastReceiveTime = 0;
1823 #endif
1824         }
1825 #if     defined(AFS_SUN5_ENV) && defined(KERNEL)
1826         if (!istack && waslocked)
1827             AFS_GLOCK();
1828 #endif
1829         AFS_RXGLOCK();
1830 #ifdef RXDEBUG
1831     }
1832
1833     assert(p != NULL);
1834
1835     dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1836
1837 #endif
1838     MUTEX_ENTER(&rx_stats_mutex);
1839     rx_stats.packetsSent[p->header.type - 1]++;
1840     MUTEX_EXIT(&rx_stats_mutex);
1841     MUTEX_ENTER(&peer->peer_lock);
1842
1843     hadd32(peer->bytesSent, p->length);
1844     MUTEX_EXIT(&peer->peer_lock);
1845 }
1846
1847
1848 /* Send a "special" packet to the peer connection.  If call is
1849  * specified, then the packet is directed to a specific call channel
1850  * associated with the connection, otherwise it is directed to the
1851  * connection only. Uses optionalPacket if it is supplied, rather than
1852  * allocating a new packet buffer.  Nbytes is the length of the data
1853  * portion of the packet.  If data is non-null, nbytes of data are
1854  * copied into the packet.  Type is the type of the packet, as defined
1855  * in rx.h.  Bug: there's a lot of duplication between this and other
1856  * routines.  This needs to be cleaned up. */
1857 struct rx_packet *
1858 rxi_SendSpecial(register struct rx_call *call,
1859                 register struct rx_connection *conn,
1860                 struct rx_packet *optionalPacket, int type, char *data,
1861                 int nbytes, int istack)
1862 {
1863     /* Some of the following stuff should be common code for all
1864      * packet sends (it's repeated elsewhere) */
1865     register struct rx_packet *p;
1866     unsigned int i = 0;
1867     int savelen = 0, saven = 0;
1868     int channel, callNumber;
1869     if (call) {
1870         channel = call->channel;
1871         callNumber = *call->callNumber;
1872         /* BUSY packets refer to the next call on this connection */
1873         if (type == RX_PACKET_TYPE_BUSY) {
1874             callNumber++;
1875         }
1876     } else {
1877         channel = 0;
1878         callNumber = 0;
1879     }
1880     p = optionalPacket;
1881     if (!p) {
1882         p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1883         if (!p)
1884             osi_Panic("rxi_SendSpecial failure");
1885     }
1886
1887     if (nbytes != -1)
1888         p->length = nbytes;
1889     else
1890         nbytes = p->length;
1891     p->header.serviceId = conn->serviceId;
1892     p->header.securityIndex = conn->securityIndex;
1893     p->header.cid = (conn->cid | channel);
1894     p->header.callNumber = callNumber;
1895     p->header.seq = 0;
1896     p->header.epoch = conn->epoch;
1897     p->header.type = type;
1898     p->header.flags = 0;
1899     if (conn->type == RX_CLIENT_CONNECTION)
1900         p->header.flags |= RX_CLIENT_INITIATED;
1901     if (data)
1902         rx_packetwrite(p, 0, nbytes, data);
1903
1904     for (i = 1; i < p->niovecs; i++) {
1905         if (nbytes <= p->wirevec[i].iov_len) {
1906             savelen = p->wirevec[i].iov_len;
1907             saven = p->niovecs;
1908             p->wirevec[i].iov_len = nbytes;
1909             p->niovecs = i + 1; /* so condition fails because i == niovecs */
1910         } else
1911             nbytes -= p->wirevec[i].iov_len;
1912     }
1913
1914     if (call)
1915         rxi_Send(call, p, istack);
1916     else
1917         rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1918     if (saven) {                /* means we truncated the packet above.  We probably don't  */
1919         /* really need to do this, but it seems safer this way, given that  */
1920         /* sneaky optionalPacket... */
1921         p->wirevec[i - 1].iov_len = savelen;
1922         p->niovecs = saven;
1923     }
1924     if (!optionalPacket)
1925         rxi_FreePacket(p);
1926     return optionalPacket;
1927 }
1928
1929
1930 /* Encode the packet's header (from the struct header in the packet to
1931  * the net byte order representation in the wire representation of the
1932  * packet, which is what is actually sent out on the wire) */
1933 void
1934 rxi_EncodePacketHeader(register struct rx_packet *p)
1935 {
1936     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1937
1938     memset((char *)buf, 0, RX_HEADER_SIZE);
1939     *buf++ = htonl(p->header.epoch);
1940     *buf++ = htonl(p->header.cid);
1941     *buf++ = htonl(p->header.callNumber);
1942     *buf++ = htonl(p->header.seq);
1943     *buf++ = htonl(p->header.serial);
1944     *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1945                    | (((afs_uint32) p->header.flags) << 16)
1946                    | (p->header.userStatus << 8) | p->header.securityIndex);
1947     /* Note: top 16 bits of this next word were reserved */
1948     *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1949 }
1950
1951 /* Decode the packet's header (from net byte order to a struct header) */
1952 void
1953 rxi_DecodePacketHeader(register struct rx_packet *p)
1954 {
1955     register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1956     afs_uint32 temp;
1957
1958     p->header.epoch = ntohl(*buf);
1959     buf++;
1960     p->header.cid = ntohl(*buf);
1961     buf++;
1962     p->header.callNumber = ntohl(*buf);
1963     buf++;
1964     p->header.seq = ntohl(*buf);
1965     buf++;
1966     p->header.serial = ntohl(*buf);
1967     buf++;
1968
1969     temp = ntohl(*buf);
1970     buf++;
1971
1972     /* C will truncate byte fields to bytes for me */
1973     p->header.type = temp >> 24;
1974     p->header.flags = temp >> 16;
1975     p->header.userStatus = temp >> 8;
1976     p->header.securityIndex = temp >> 0;
1977
1978     temp = ntohl(*buf);
1979     buf++;
1980
1981     p->header.serviceId = (temp & 0xffff);
1982     p->header.spare = temp >> 16;
1983     /* Note: top 16 bits of this last word are the security checksum */
1984 }
1985
1986 void
1987 rxi_PrepareSendPacket(register struct rx_call *call,
1988                       register struct rx_packet *p, register int last)
1989 {
1990     register struct rx_connection *conn = call->conn;
1991     int i, j;
1992     ssize_t len;                /* len must be a signed type; it can go negative */
1993
1994     p->flags &= ~RX_PKTFLAG_ACKED;
1995     p->header.cid = (conn->cid | call->channel);
1996     p->header.serviceId = conn->serviceId;
1997     p->header.securityIndex = conn->securityIndex;
1998     p->header.callNumber = *call->callNumber;
1999     p->header.seq = call->tnext++;
2000     p->header.epoch = conn->epoch;
2001     p->header.type = RX_PACKET_TYPE_DATA;
2002     p->header.flags = 0;
2003     p->header.spare = 0;
2004     if (conn->type == RX_CLIENT_CONNECTION)
2005         p->header.flags |= RX_CLIENT_INITIATED;
2006
2007     if (last)
2008         p->header.flags |= RX_LAST_PACKET;
2009
2010     clock_Zero(&p->retryTime);  /* Never yet transmitted */
2011     clock_Zero(&p->firstSent);  /* Never yet transmitted */
2012     p->header.serial = 0;       /* Another way of saying never transmitted... */
2013     p->backoff = 0;
2014
2015     /* Now that we're sure this is the last data on the call, make sure
2016      * that the "length" and the sum of the iov_lens matches. */
2017     len = p->length + call->conn->securityHeaderSize;
2018
2019     for (i = 1; i < p->niovecs && len > 0; i++) {
2020         len -= p->wirevec[i].iov_len;
2021     }
2022     if (len > 0) {
2023         osi_Panic("PrepareSendPacket 1\n");     /* MTUXXX */
2024     } else {
2025         /* Free any extra elements in the wirevec */
2026         for (j = MAX(2, i); j < p->niovecs; j++) {
2027             rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2028         }
2029         p->niovecs = i;
2030         p->wirevec[i - 1].iov_len += len;
2031     }
2032     RXS_PreparePacket(conn->securityObject, call, p);
2033 }
2034
2035 /* Given an interface MTU size, calculate an adjusted MTU size that
2036  * will make efficient use of the RX buffers when the peer is sending
2037  * either AFS 3.4a jumbograms or AFS 3.5 jumbograms.  */
2038 int
2039 rxi_AdjustIfMTU(int mtu)
2040 {
2041     int adjMTU;
2042     int frags;
2043
2044     adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2045     if (mtu <= adjMTU) {
2046         return mtu;
2047     }
2048     mtu -= adjMTU;
2049     if (mtu <= 0) {
2050         return adjMTU;
2051     }
2052     frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2053     return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2054 }
2055
2056 /* Given an interface MTU size, and the peer's advertised max receive
2057  * size, calculate an adjisted maxMTU size that makes efficient use
2058  * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2059 int
2060 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2061 {
2062     int maxMTU = mtu * rxi_nSendFrags;
2063     maxMTU = MIN(maxMTU, peerMaxMTU);
2064     return rxi_AdjustIfMTU(maxMTU);
2065 }
2066
2067 /* Given a packet size, figure out how many datagram packet will fit.
2068  * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2069  * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2070  * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2071 int
2072 rxi_AdjustDgramPackets(int frags, int mtu)
2073 {
2074     int maxMTU;
2075     if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2076         return 1;
2077     }
2078     maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2079     maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2080     /* subtract the size of the first and last packets */
2081     maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2082     if (maxMTU < 0) {
2083         return 1;
2084     }
2085     return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2086 }