2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
104 /* rxdb_fileID is used to identify the lock location, along with line#. */
105 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
106 #endif /* RX_LOCKS_DB */
107 struct rx_packet *rx_mallocedP = 0;
109 extern char cml_version_number[];
110 extern int (*rx_almostSent) ();
112 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
114 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
115 afs_int32 ahost, short aport,
118 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, int first,
119 struct rx_queue * q);
121 /* some rules about packets:
122 * 1. When a packet is allocated, the final iov_buf contains room for
123 * a security trailer, but iov_len masks that fact. If the security
124 * package wants to add the trailer, it may do so, and then extend
125 * iov_len appropriately. For this reason, packet's niovecs and
126 * iov_len fields should be accurate before calling PreparePacket.
130 * all packet buffers (iov_base) are integral multiples of
132 * offset is an integral multiple of the word size.
135 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
139 for (l = 0, i = 1; i < packet->niovecs; i++) {
140 if (l + packet->wirevec[i].iov_len > offset) {
142 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the word size.
153 * offset is an integral multiple of the word size.
156 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
160 for (l = 0, i = 1; i < packet->niovecs; i++) {
161 if (l + packet->wirevec[i].iov_len > offset) {
162 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
163 (offset - l))) = data;
166 l += packet->wirevec[i].iov_len;
173 * all packet buffers (iov_base) are integral multiples of the
175 * offset is an integral multiple of the word size.
177 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
180 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
183 unsigned int i, j, l, r;
184 for (l = 0, i = 1; i < packet->niovecs; i++) {
185 if (l + packet->wirevec[i].iov_len > offset) {
188 l += packet->wirevec[i].iov_len;
191 /* i is the iovec which contains the first little bit of data in which we
192 * are interested. l is the total length of everything prior to this iovec.
193 * j is the number of bytes we can safely copy out of this iovec.
194 * offset only applies to the first iovec.
197 while ((resid > 0) && (i < packet->niovecs)) {
198 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
199 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
202 l += packet->wirevec[i].iov_len;
207 return (resid ? (r - resid) : r);
212 * all packet buffers (iov_base) are integral multiples of the
214 * offset is an integral multiple of the word size.
217 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
222 for (l = 0, i = 1; i < packet->niovecs; i++) {
223 if (l + packet->wirevec[i].iov_len > offset) {
226 l += packet->wirevec[i].iov_len;
229 /* i is the iovec which contains the first little bit of data in which we
230 * are interested. l is the total length of everything prior to this iovec.
231 * j is the number of bytes we can safely copy out of this iovec.
232 * offset only applies to the first iovec.
235 while ((resid > 0) && (i < RX_MAXWVECS)) {
236 if (i >= packet->niovecs)
237 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
240 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
241 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
245 l += packet->wirevec[i].iov_len;
250 return (resid ? (r - resid) : r);
254 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
256 register struct rx_packet *p, *np;
258 num_pkts = AllocPacketBufs(class, num_pkts, q);
260 for (queue_Scan(q, p, np, rx_packet)) {
261 RX_PACKET_IOV_FULLINIT(p);
267 #ifdef RX_ENABLE_TSFPQ
269 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
271 register struct rx_packet *c;
272 register struct rx_ts_info_t * rx_ts_info;
276 RX_TS_INFO_GET(rx_ts_info);
278 transfer = num_pkts - rx_ts_info->_FPQ.len;
281 MUTEX_ENTER(&rx_freePktQ_lock);
283 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
284 transfer += rx_TSFPQGlobSize;
285 } else if (transfer <= rx_nFreePackets) {
286 transfer = rx_nFreePackets;
288 /* alloc enough for us, plus a few globs for other threads */
289 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
290 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
291 transfer += rx_TSFPQGlobSize;
294 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
296 MUTEX_EXIT(&rx_freePktQ_lock);
300 RX_TS_FPQ_CHECKOUT2(rx_ts_info, num_pkts, q);
304 #else /* RX_ENABLE_TSFPQ */
306 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
314 MUTEX_ENTER(&rx_freePktQ_lock);
317 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
318 num_pkts--, overq++);
321 rxi_NeedMorePackets = TRUE;
322 MUTEX_ENTER(&rx_stats_mutex);
324 case RX_PACKET_CLASS_RECEIVE:
325 rx_stats.receivePktAllocFailures++;
327 case RX_PACKET_CLASS_SEND:
328 rx_stats.sendPktAllocFailures++;
330 case RX_PACKET_CLASS_SPECIAL:
331 rx_stats.specialPktAllocFailures++;
333 case RX_PACKET_CLASS_RECV_CBUF:
334 rx_stats.receiveCbufPktAllocFailures++;
336 case RX_PACKET_CLASS_SEND_CBUF:
337 rx_stats.sendCbufPktAllocFailures++;
340 MUTEX_EXIT(&rx_stats_mutex);
343 if (rx_nFreePackets < num_pkts)
344 num_pkts = rx_nFreePackets;
347 rxi_NeedMorePackets = TRUE;
351 if (rx_nFreePackets < num_pkts) {
352 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
356 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
358 i++, c=queue_Next(c, rx_packet)) {
362 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
364 rx_nFreePackets -= num_pkts;
369 MUTEX_EXIT(&rx_freePktQ_lock);
374 #endif /* RX_ENABLE_TSFPQ */
377 * Free a packet currently used as a continuation buffer
379 #ifdef RX_ENABLE_TSFPQ
380 /* num_pkts=0 means queue length is unknown */
382 rxi_FreePackets(int num_pkts, struct rx_queue * q)
384 register struct rx_ts_info_t * rx_ts_info;
385 register struct rx_packet *c, *nc;
388 osi_Assert(num_pkts >= 0);
389 RX_TS_INFO_GET(rx_ts_info);
392 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
393 rxi_FreeDataBufsTSFPQ(c, 1, 0);
396 RX_TS_FPQ_CHECKIN2(rx_ts_info, num_pkts, q);
399 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
401 MUTEX_ENTER(&rx_freePktQ_lock);
403 RX_TS_FPQ_LTOG(rx_ts_info);
405 /* Wakeup anyone waiting for packets */
408 MUTEX_EXIT(&rx_freePktQ_lock);
414 #else /* RX_ENABLE_TSFPQ */
415 /* num_pkts=0 means queue length is unknown */
417 rxi_FreePackets(int num_pkts, struct rx_queue *q)
420 register struct rx_packet *p, *np;
424 osi_Assert(num_pkts >= 0);
428 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
429 if (p->niovecs > 2) {
430 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
437 for (queue_Scan(q, p, np, rx_packet)) {
438 if (p->niovecs > 2) {
439 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
446 queue_SpliceAppend(q, &cbs);
452 MUTEX_ENTER(&rx_freePktQ_lock);
454 queue_SpliceAppend(&rx_freePacketQueue, q);
455 rx_nFreePackets += qlen;
457 /* Wakeup anyone waiting for packets */
460 MUTEX_EXIT(&rx_freePktQ_lock);
465 #endif /* RX_ENABLE_TSFPQ */
467 /* this one is kind of awful.
468 * In rxkad, the packet has been all shortened, and everything, ready for
469 * sending. All of a sudden, we discover we need some of that space back.
470 * This isn't terribly general, because it knows that the packets are only
471 * rounded up to the EBS (userdata + security header).
474 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
478 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
479 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
480 p->wirevec[i].iov_len += nb;
484 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
485 p->wirevec[i].iov_len += nb;
493 /* get sufficient space to store nb bytes of data (or more), and hook
494 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
495 * returns the number of bytes >0 which it failed to come up with.
496 * Don't need to worry about locking on packet, since only
497 * one thread can manipulate one at a time. Locking on continution
498 * packets is handled by AllocPacketBufs */
499 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
501 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
505 register struct rx_packet *cb, *ncb;
507 /* compute the number of cbuf's we need */
508 nv = nb / RX_CBUFFERSIZE;
509 if ((nv * RX_CBUFFERSIZE) < nb)
511 if ((nv + p->niovecs) > RX_MAXWVECS)
512 nv = RX_MAXWVECS - p->niovecs;
516 /* allocate buffers */
518 nv = AllocPacketBufs(class, nv, &q);
520 /* setup packet iovs */
521 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
523 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
524 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
527 nb -= (nv * RX_CBUFFERSIZE);
528 p->length += (nv * RX_CBUFFERSIZE);
534 /* Add more packet buffers */
535 #ifdef RX_ENABLE_TSFPQ
537 rxi_MorePackets(int apackets)
539 struct rx_packet *p, *e;
540 register struct rx_ts_info_t * rx_ts_info;
544 getme = apackets * sizeof(struct rx_packet);
545 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
547 PIN(p, getme); /* XXXXX */
548 memset((char *)p, 0, getme);
549 RX_TS_INFO_GET(rx_ts_info);
551 for (e = p + apackets; p < e; p++) {
552 RX_PACKET_IOV_INIT(p);
555 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
557 rx_ts_info->_FPQ.delta += apackets;
559 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
561 MUTEX_ENTER(&rx_freePktQ_lock);
563 RX_TS_FPQ_LTOG(rx_ts_info);
564 rxi_NeedMorePackets = FALSE;
567 MUTEX_EXIT(&rx_freePktQ_lock);
571 #else /* RX_ENABLE_TSFPQ */
573 rxi_MorePackets(int apackets)
575 struct rx_packet *p, *e;
579 getme = apackets * sizeof(struct rx_packet);
580 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
582 PIN(p, getme); /* XXXXX */
583 memset((char *)p, 0, getme);
585 MUTEX_ENTER(&rx_freePktQ_lock);
587 for (e = p + apackets; p < e; p++) {
588 RX_PACKET_IOV_INIT(p);
589 p->flags |= RX_PKTFLAG_FREE;
592 queue_Append(&rx_freePacketQueue, p);
594 rx_nFreePackets += apackets;
595 rxi_NeedMorePackets = FALSE;
598 MUTEX_EXIT(&rx_freePktQ_lock);
601 #endif /* RX_ENABLE_TSFPQ */
603 #ifdef RX_ENABLE_TSFPQ
605 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
607 struct rx_packet *p, *e;
608 register struct rx_ts_info_t * rx_ts_info;
612 getme = apackets * sizeof(struct rx_packet);
613 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
615 PIN(p, getme); /* XXXXX */
616 memset((char *)p, 0, getme);
617 RX_TS_INFO_GET(rx_ts_info);
619 for (e = p + apackets; p < e; p++) {
620 RX_PACKET_IOV_INIT(p);
623 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
625 rx_ts_info->_FPQ.delta += apackets;
628 (num_keep_local < apackets)) {
630 MUTEX_ENTER(&rx_freePktQ_lock);
632 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
633 rxi_NeedMorePackets = FALSE;
636 MUTEX_EXIT(&rx_freePktQ_lock);
640 #endif /* RX_ENABLE_TSFPQ */
643 /* Add more packet buffers */
645 rxi_MorePacketsNoLock(int apackets)
647 struct rx_packet *p, *e;
650 /* allocate enough packets that 1/4 of the packets will be able
651 * to hold maximal amounts of data */
652 apackets += (apackets / 4)
653 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
654 getme = apackets * sizeof(struct rx_packet);
655 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
657 memset((char *)p, 0, getme);
659 for (e = p + apackets; p < e; p++) {
660 RX_PACKET_IOV_INIT(p);
661 p->flags |= RX_PKTFLAG_FREE;
664 queue_Append(&rx_freePacketQueue, p);
667 rx_nFreePackets += apackets;
668 #ifdef RX_ENABLE_TSFPQ
669 /* TSFPQ patch also needs to keep track of total packets */
670 MUTEX_ENTER(&rx_stats_mutex);
671 rx_nPackets += apackets;
672 RX_TS_FPQ_COMPUTE_LIMITS;
673 MUTEX_EXIT(&rx_stats_mutex);
674 #endif /* RX_ENABLE_TSFPQ */
675 rxi_NeedMorePackets = FALSE;
681 rxi_FreeAllPackets(void)
683 /* must be called at proper interrupt level, etcetera */
684 /* MTUXXX need to free all Packets */
685 osi_Free(rx_mallocedP,
686 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
687 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
690 #ifdef RX_ENABLE_TSFPQ
692 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
694 register struct rx_ts_info_t * rx_ts_info;
698 RX_TS_INFO_GET(rx_ts_info);
700 if (num_keep_local != rx_ts_info->_FPQ.len) {
702 MUTEX_ENTER(&rx_freePktQ_lock);
703 if (num_keep_local < rx_ts_info->_FPQ.len) {
704 xfer = rx_ts_info->_FPQ.len - num_keep_local;
705 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
708 xfer = num_keep_local - rx_ts_info->_FPQ.len;
709 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
710 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
711 if (rx_nFreePackets < xfer) {
712 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
714 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
716 MUTEX_EXIT(&rx_freePktQ_lock);
722 rxi_FlushLocalPacketsTSFPQ(void)
724 rxi_AdjustLocalPacketsTSFPQ(0, 0);
726 #endif /* RX_ENABLE_TSFPQ */
728 /* Allocate more packets iff we need more continuation buffers */
729 /* In kernel, can't page in memory with interrupts disabled, so we
730 * don't use the event mechanism. */
732 rx_CheckPackets(void)
734 if (rxi_NeedMorePackets) {
735 rxi_MorePackets(rx_initSendWindow);
739 /* In the packet freeing routine below, the assumption is that
740 we want all of the packets to be used equally frequently, so that we
741 don't get packet buffers paging out. It would be just as valid to
742 assume that we DO want them to page out if not many are being used.
743 In any event, we assume the former, and append the packets to the end
745 /* This explanation is bogus. The free list doesn't remain in any kind of
746 useful order for afs_int32: the packets in use get pretty much randomly scattered
747 across all the pages. In order to permit unused {packets,bufs} to page out, they
748 must be stored so that packets which are adjacent in memory are adjacent in the
749 free list. An array springs rapidly to mind.
752 /* Actually free the packet p. */
753 #ifdef RX_ENABLE_TSFPQ
755 rxi_FreePacketNoLock(struct rx_packet *p)
757 register struct rx_ts_info_t * rx_ts_info;
758 dpf(("Free %lx\n", (unsigned long)p));
760 RX_TS_INFO_GET(rx_ts_info);
761 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
762 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
763 RX_TS_FPQ_LTOG(rx_ts_info);
766 #else /* RX_ENABLE_TSFPQ */
768 rxi_FreePacketNoLock(struct rx_packet *p)
770 dpf(("Free %lx\n", (unsigned long)p));
774 queue_Append(&rx_freePacketQueue, p);
776 #endif /* RX_ENABLE_TSFPQ */
778 #ifdef RX_ENABLE_TSFPQ
780 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
782 register struct rx_ts_info_t * rx_ts_info;
783 dpf(("Free %lx\n", (unsigned long)p));
785 RX_TS_INFO_GET(rx_ts_info);
786 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
788 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
790 MUTEX_ENTER(&rx_freePktQ_lock);
792 RX_TS_FPQ_LTOG(rx_ts_info);
794 /* Wakeup anyone waiting for packets */
797 MUTEX_EXIT(&rx_freePktQ_lock);
801 #endif /* RX_ENABLE_TSFPQ */
803 /* free continuation buffers off a packet into a queue of buffers */
805 rxi_FreeDataBufsToQueue(struct rx_packet *p, int first, struct rx_queue * q)
808 struct rx_packet * cb;
813 for (; first < p->niovecs; first++, count++) {
814 iov = &p->wirevec[first];
816 osi_Panic("rxi_PacketIOVToQueue: unexpected NULL iov");
817 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
818 RX_FPQ_MARK_FREE(cb);
828 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
830 struct iovec *iov, *end;
832 if (first != 1) /* MTUXXX */
833 osi_Panic("FreeDataBufs 1: first must be 1");
834 iov = &p->wirevec[1];
835 end = iov + (p->niovecs - 1);
836 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
837 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
838 for (iov++; iov < end; iov++) {
840 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
841 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
849 #ifdef RX_ENABLE_TSFPQ
851 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, int first, int flush_global)
853 struct iovec *iov, *end;
854 register struct rx_ts_info_t * rx_ts_info;
856 RX_TS_INFO_GET(rx_ts_info);
858 if (first != 1) /* MTUXXX */
859 osi_Panic("FreeDataBufs 1: first must be 1");
860 iov = &p->wirevec[1];
861 end = iov + (p->niovecs - 1);
862 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
863 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
864 for (iov++; iov < end; iov++) {
866 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
867 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
872 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
874 MUTEX_ENTER(&rx_freePktQ_lock);
876 RX_TS_FPQ_LTOG(rx_ts_info);
878 /* Wakeup anyone waiting for packets */
881 MUTEX_EXIT(&rx_freePktQ_lock);
886 #endif /* RX_ENABLE_TSFPQ */
888 int rxi_nBadIovecs = 0;
890 /* rxi_RestoreDataBufs
892 * Restore the correct sizes to the iovecs. Called when reusing a packet
893 * for reading off the wire.
896 rxi_RestoreDataBufs(struct rx_packet *p)
899 struct iovec *iov = &p->wirevec[2];
901 RX_PACKET_IOV_INIT(p);
903 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
904 if (!iov->iov_base) {
909 iov->iov_len = RX_CBUFFERSIZE;
913 #ifdef RX_ENABLE_TSFPQ
915 rxi_TrimDataBufs(struct rx_packet *p, int first)
918 struct iovec *iov, *end;
919 register struct rx_ts_info_t * rx_ts_info;
923 osi_Panic("TrimDataBufs 1: first must be 1");
925 /* Skip over continuation buffers containing message data */
926 iov = &p->wirevec[2];
927 end = iov + (p->niovecs - 2);
928 length = p->length - p->wirevec[1].iov_len;
929 for (; iov < end && length > 0; iov++) {
931 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
932 length -= iov->iov_len;
935 /* iov now points to the first empty data buffer. */
939 RX_TS_INFO_GET(rx_ts_info);
940 for (; iov < end; iov++) {
942 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
943 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
946 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
948 MUTEX_ENTER(&rx_freePktQ_lock);
950 RX_TS_FPQ_LTOG(rx_ts_info);
953 MUTEX_EXIT(&rx_freePktQ_lock);
959 #else /* RX_ENABLE_TSFPQ */
961 rxi_TrimDataBufs(struct rx_packet *p, int first)
964 struct iovec *iov, *end;
968 osi_Panic("TrimDataBufs 1: first must be 1");
970 /* Skip over continuation buffers containing message data */
971 iov = &p->wirevec[2];
972 end = iov + (p->niovecs - 2);
973 length = p->length - p->wirevec[1].iov_len;
974 for (; iov < end && length > 0; iov++) {
976 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
977 length -= iov->iov_len;
980 /* iov now points to the first empty data buffer. */
985 MUTEX_ENTER(&rx_freePktQ_lock);
987 for (; iov < end; iov++) {
989 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
990 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
995 MUTEX_EXIT(&rx_freePktQ_lock);
1000 #endif /* RX_ENABLE_TSFPQ */
1002 /* Free the packet p. P is assumed not to be on any queue, i.e.
1003 * remove it yourself first if you call this routine. */
1004 #ifdef RX_ENABLE_TSFPQ
1006 rxi_FreePacket(struct rx_packet *p)
1008 rxi_FreeDataBufsTSFPQ(p, 1, 0);
1009 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1011 #else /* RX_ENABLE_TSFPQ */
1013 rxi_FreePacket(struct rx_packet *p)
1018 MUTEX_ENTER(&rx_freePktQ_lock);
1020 rxi_FreeDataBufsNoLock(p, 1);
1021 rxi_FreePacketNoLock(p);
1022 /* Wakeup anyone waiting for packets */
1023 rxi_PacketsUnWait();
1025 MUTEX_EXIT(&rx_freePktQ_lock);
1028 #endif /* RX_ENABLE_TSFPQ */
1030 /* rxi_AllocPacket sets up p->length so it reflects the number of
1031 * bytes in the packet at this point, **not including** the header.
1032 * The header is absolutely necessary, besides, this is the way the
1033 * length field is usually used */
1034 #ifdef RX_ENABLE_TSFPQ
1036 rxi_AllocPacketNoLock(int class)
1038 register struct rx_packet *p;
1039 register struct rx_ts_info_t * rx_ts_info;
1041 RX_TS_INFO_GET(rx_ts_info);
1044 if (rxi_OverQuota(class)) {
1045 rxi_NeedMorePackets = TRUE;
1046 MUTEX_ENTER(&rx_stats_mutex);
1048 case RX_PACKET_CLASS_RECEIVE:
1049 rx_stats.receivePktAllocFailures++;
1051 case RX_PACKET_CLASS_SEND:
1052 rx_stats.sendPktAllocFailures++;
1054 case RX_PACKET_CLASS_SPECIAL:
1055 rx_stats.specialPktAllocFailures++;
1057 case RX_PACKET_CLASS_RECV_CBUF:
1058 rx_stats.receiveCbufPktAllocFailures++;
1060 case RX_PACKET_CLASS_SEND_CBUF:
1061 rx_stats.sendCbufPktAllocFailures++;
1064 MUTEX_EXIT(&rx_stats_mutex);
1065 return (struct rx_packet *)0;
1069 MUTEX_ENTER(&rx_stats_mutex);
1070 rx_stats.packetRequests++;
1071 MUTEX_EXIT(&rx_stats_mutex);
1073 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1076 if (queue_IsEmpty(&rx_freePacketQueue))
1077 osi_Panic("rxi_AllocPacket error");
1079 if (queue_IsEmpty(&rx_freePacketQueue))
1080 rxi_MorePacketsNoLock(rx_initSendWindow);
1084 RX_TS_FPQ_GTOL(rx_ts_info);
1087 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1089 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1092 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1093 * order to truncate outbound packets. In the near future, may need
1094 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1096 RX_PACKET_IOV_FULLINIT(p);
1099 #else /* RX_ENABLE_TSFPQ */
1101 rxi_AllocPacketNoLock(int class)
1103 register struct rx_packet *p;
1106 if (rxi_OverQuota(class)) {
1107 rxi_NeedMorePackets = TRUE;
1108 MUTEX_ENTER(&rx_stats_mutex);
1110 case RX_PACKET_CLASS_RECEIVE:
1111 rx_stats.receivePktAllocFailures++;
1113 case RX_PACKET_CLASS_SEND:
1114 rx_stats.sendPktAllocFailures++;
1116 case RX_PACKET_CLASS_SPECIAL:
1117 rx_stats.specialPktAllocFailures++;
1119 case RX_PACKET_CLASS_RECV_CBUF:
1120 rx_stats.receiveCbufPktAllocFailures++;
1122 case RX_PACKET_CLASS_SEND_CBUF:
1123 rx_stats.sendCbufPktAllocFailures++;
1126 MUTEX_EXIT(&rx_stats_mutex);
1127 return (struct rx_packet *)0;
1131 MUTEX_ENTER(&rx_stats_mutex);
1132 rx_stats.packetRequests++;
1133 MUTEX_EXIT(&rx_stats_mutex);
1136 if (queue_IsEmpty(&rx_freePacketQueue))
1137 osi_Panic("rxi_AllocPacket error");
1139 if (queue_IsEmpty(&rx_freePacketQueue))
1140 rxi_MorePacketsNoLock(rx_initSendWindow);
1144 p = queue_First(&rx_freePacketQueue, rx_packet);
1146 RX_FPQ_MARK_USED(p);
1148 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1151 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1152 * order to truncate outbound packets. In the near future, may need
1153 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1155 RX_PACKET_IOV_FULLINIT(p);
1158 #endif /* RX_ENABLE_TSFPQ */
1160 #ifdef RX_ENABLE_TSFPQ
1162 rxi_AllocPacketTSFPQ(int class, int pull_global)
1164 register struct rx_packet *p;
1165 register struct rx_ts_info_t * rx_ts_info;
1167 RX_TS_INFO_GET(rx_ts_info);
1169 MUTEX_ENTER(&rx_stats_mutex);
1170 rx_stats.packetRequests++;
1171 MUTEX_EXIT(&rx_stats_mutex);
1173 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1174 MUTEX_ENTER(&rx_freePktQ_lock);
1176 if (queue_IsEmpty(&rx_freePacketQueue))
1177 rxi_MorePacketsNoLock(rx_initSendWindow);
1179 RX_TS_FPQ_GTOL(rx_ts_info);
1181 MUTEX_EXIT(&rx_freePktQ_lock);
1182 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1186 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1188 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1190 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1191 * order to truncate outbound packets. In the near future, may need
1192 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1194 RX_PACKET_IOV_FULLINIT(p);
1197 #endif /* RX_ENABLE_TSFPQ */
1199 #ifdef RX_ENABLE_TSFPQ
1201 rxi_AllocPacket(int class)
1203 register struct rx_packet *p;
1205 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1208 #else /* RX_ENABLE_TSFPQ */
1210 rxi_AllocPacket(int class)
1212 register struct rx_packet *p;
1214 MUTEX_ENTER(&rx_freePktQ_lock);
1215 p = rxi_AllocPacketNoLock(class);
1216 MUTEX_EXIT(&rx_freePktQ_lock);
1219 #endif /* RX_ENABLE_TSFPQ */
1221 /* This guy comes up with as many buffers as it {takes,can get} given
1222 * the MTU for this call. It also sets the packet length before
1223 * returning. caution: this is often called at NETPRI
1224 * Called with call locked.
1227 rxi_AllocSendPacket(register struct rx_call *call, int want)
1229 register struct rx_packet *p = (struct rx_packet *)0;
1231 register unsigned delta;
1234 mud = call->MTU - RX_HEADER_SIZE;
1236 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1237 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1239 #ifdef RX_ENABLE_TSFPQ
1240 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1242 want = MIN(want, mud);
1244 if ((unsigned)want > p->length)
1245 (void)rxi_AllocDataBuf(p, (want - p->length),
1246 RX_PACKET_CLASS_SEND_CBUF);
1248 if ((unsigned)p->length > mud)
1251 if (delta >= p->length) {
1259 #endif /* RX_ENABLE_TSFPQ */
1261 while (!(call->error)) {
1262 MUTEX_ENTER(&rx_freePktQ_lock);
1263 /* if an error occurred, or we get the packet we want, we're done */
1264 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1265 MUTEX_EXIT(&rx_freePktQ_lock);
1268 want = MIN(want, mud);
1270 if ((unsigned)want > p->length)
1271 (void)rxi_AllocDataBuf(p, (want - p->length),
1272 RX_PACKET_CLASS_SEND_CBUF);
1274 if ((unsigned)p->length > mud)
1277 if (delta >= p->length) {
1286 /* no error occurred, and we didn't get a packet, so we sleep.
1287 * At this point, we assume that packets will be returned
1288 * sooner or later, as packets are acknowledged, and so we
1291 call->flags |= RX_CALL_WAIT_PACKETS;
1292 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1293 MUTEX_EXIT(&call->lock);
1294 rx_waitingForPackets = 1;
1296 #ifdef RX_ENABLE_LOCKS
1297 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1299 osi_rxSleep(&rx_waitingForPackets);
1301 MUTEX_EXIT(&rx_freePktQ_lock);
1302 MUTEX_ENTER(&call->lock);
1303 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1304 call->flags &= ~RX_CALL_WAIT_PACKETS;
1313 /* Windows does not use file descriptors. */
1314 #define CountFDs(amax) 0
1316 /* count the number of used FDs */
1318 CountFDs(register int amax)
1321 register int i, code;
1325 for (i = 0; i < amax; i++) {
1326 code = fstat(i, &tstat);
1332 #endif /* AFS_NT40_ENV */
1335 #define CountFDs(amax) amax
1339 #if !defined(KERNEL) || defined(UKERNEL)
1341 /* This function reads a single packet from the interface into the
1342 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1343 * (host,port) of the sender are stored in the supplied variables, and
1344 * the data length of the packet is stored in the packet structure.
1345 * The header is decoded. */
1347 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1350 struct sockaddr_in from;
1353 register afs_int32 tlen, savelen;
1355 rx_computelen(p, tlen);
1356 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1358 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1359 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1360 * it once in order to avoid races. */
1363 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1371 /* Extend the last iovec for padding, it's just to make sure that the
1372 * read doesn't return more data than we expect, and is done to get around
1373 * our problems caused by the lack of a length field in the rx header.
1374 * Use the extra buffer that follows the localdata in each packet
1376 savelen = p->wirevec[p->niovecs - 1].iov_len;
1377 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1379 memset((char *)&msg, 0, sizeof(msg));
1380 msg.msg_name = (char *)&from;
1381 msg.msg_namelen = sizeof(struct sockaddr_in);
1382 msg.msg_iov = p->wirevec;
1383 msg.msg_iovlen = p->niovecs;
1384 nbytes = rxi_Recvmsg(socket, &msg, 0);
1386 /* restore the vec to its correct state */
1387 p->wirevec[p->niovecs - 1].iov_len = savelen;
1389 p->length = (nbytes - RX_HEADER_SIZE);
1390 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1391 if (nbytes < 0 && errno == EWOULDBLOCK) {
1392 MUTEX_ENTER(&rx_stats_mutex);
1393 rx_stats.noPacketOnRead++;
1394 MUTEX_EXIT(&rx_stats_mutex);
1395 } else if (nbytes <= 0) {
1396 MUTEX_ENTER(&rx_stats_mutex);
1397 rx_stats.bogusPacketOnRead++;
1398 rx_stats.bogusHost = from.sin_addr.s_addr;
1399 MUTEX_EXIT(&rx_stats_mutex);
1400 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1401 ntohs(from.sin_port), nbytes));
1406 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1407 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1408 rxi_DecodePacketHeader(p);
1410 *host = from.sin_addr.s_addr;
1411 *port = from.sin_port;
1413 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1414 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1415 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1417 rxi_TrimDataBufs(p, 1);
1422 /* Extract packet header. */
1423 rxi_DecodePacketHeader(p);
1425 *host = from.sin_addr.s_addr;
1426 *port = from.sin_port;
1427 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1428 struct rx_peer *peer;
1429 MUTEX_ENTER(&rx_stats_mutex);
1430 rx_stats.packetsRead[p->header.type - 1]++;
1431 MUTEX_EXIT(&rx_stats_mutex);
1433 * Try to look up this peer structure. If it doesn't exist,
1434 * don't create a new one -
1435 * we don't keep count of the bytes sent/received if a peer
1436 * structure doesn't already exist.
1438 * The peer/connection cleanup code assumes that there is 1 peer
1439 * per connection. If we actually created a peer structure here
1440 * and this packet was an rxdebug packet, the peer structure would
1441 * never be cleaned up.
1443 peer = rxi_FindPeer(*host, *port, 0, 0);
1444 /* Since this may not be associated with a connection,
1445 * it may have no refCount, meaning we could race with
1448 if (peer && (peer->refCount > 0)) {
1449 MUTEX_ENTER(&peer->peer_lock);
1450 hadd32(peer->bytesReceived, p->length);
1451 MUTEX_EXIT(&peer->peer_lock);
1455 /* Free any empty packet buffers at the end of this packet */
1456 rxi_TrimDataBufs(p, 1);
1462 #endif /* !KERNEL || UKERNEL */
1464 /* This function splits off the first packet in a jumbo packet.
1465 * As of AFS 3.5, jumbograms contain more than one fixed size
1466 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1467 * last packet header. All packets (except the last) are padded to
1468 * fall on RX_CBUFFERSIZE boundaries.
1469 * HACK: We store the length of the first n-1 packets in the
1470 * last two pad bytes. */
1473 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1476 struct rx_packet *np;
1477 struct rx_jumboHeader *jp;
1483 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1484 * bytes in length. All but the first packet are preceded by
1485 * an abbreviated four byte header. The length of the last packet
1486 * is calculated from the size of the jumbogram. */
1487 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1489 if ((int)p->length < length) {
1490 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1493 niov = p->niovecs - 2;
1495 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1498 iov = &p->wirevec[2];
1499 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1501 /* Get a pointer to the abbreviated packet header */
1502 jp = (struct rx_jumboHeader *)
1503 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1505 /* Set up the iovecs for the next packet */
1506 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1507 np->wirevec[0].iov_len = sizeof(struct rx_header);
1508 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1509 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1510 np->niovecs = niov + 1;
1511 for (i = 2, iov++; i <= niov; i++, iov++) {
1512 np->wirevec[i] = *iov;
1514 np->length = p->length - length;
1515 p->length = RX_JUMBOBUFFERSIZE;
1518 /* Convert the jumbo packet header to host byte order */
1519 temp = ntohl(*(afs_uint32 *) jp);
1520 jp->flags = (u_char) (temp >> 24);
1521 jp->cksum = (u_short) (temp);
1523 /* Fill in the packet header */
1524 np->header = p->header;
1525 np->header.serial = p->header.serial + 1;
1526 np->header.seq = p->header.seq + 1;
1527 np->header.flags = jp->flags;
1528 np->header.spare = jp->cksum;
1534 /* Send a udp datagram */
1536 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1537 int length, int istack)
1542 memset(&msg, 0, sizeof(msg));
1544 msg.msg_iovlen = nvecs;
1545 msg.msg_name = addr;
1546 msg.msg_namelen = sizeof(struct sockaddr_in);
1548 ret = rxi_Sendmsg(socket, &msg, 0);
1552 #elif !defined(UKERNEL)
1554 * message receipt is done in rxk_input or rx_put.
1557 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1559 * Copy an mblock to the contiguous area pointed to by cp.
1560 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1561 * but it doesn't really.
1562 * Returns the number of bytes not transferred.
1563 * The message is NOT changed.
1566 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1570 for (; mp && len > 0; mp = mp->b_cont) {
1571 if (mp->b_datap->db_type != M_DATA) {
1574 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1575 memcpy(cp, (char *)mp->b_rptr, n);
1583 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1584 * but it doesn't really.
1585 * This sucks, anyway, do it like m_cpy.... below
1588 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1591 register int m, n, o, t, i;
1593 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1594 if (mp->b_datap->db_type != M_DATA) {
1597 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1603 t = iovs[i].iov_len;
1606 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1616 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1617 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1619 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1621 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1624 unsigned int l1, l2, i, t;
1626 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1627 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1630 if (m->m_len <= off) {
1640 p1 = mtod(m, caddr_t) + off;
1641 l1 = m->m_len - off;
1643 p2 = iovs[0].iov_base;
1644 l2 = iovs[0].iov_len;
1647 t = MIN(l1, MIN(l2, (unsigned int)len));
1658 p1 = mtod(m, caddr_t);
1664 p2 = iovs[i].iov_base;
1665 l2 = iovs[i].iov_len;
1673 #endif /* AFS_SUN5_ENV */
1675 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1677 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1678 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1684 struct rx_packet *phandle;
1685 int hdr_len, data_len;
1690 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1697 #endif /*KERNEL && !UKERNEL */
1700 /* send a response to a debug packet */
1703 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1704 afs_int32 ahost, short aport, int istack)
1706 struct rx_debugIn tin;
1708 struct rx_serverQueueEntry *np, *nqe;
1711 * Only respond to client-initiated Rx debug packets,
1712 * and clear the client flag in the response.
1714 if (ap->header.flags & RX_CLIENT_INITIATED) {
1715 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1716 rxi_EncodePacketHeader(ap);
1721 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1722 /* all done with packet, now set length to the truth, so we can
1723 * reuse this packet */
1724 rx_computelen(ap, ap->length);
1726 tin.type = ntohl(tin.type);
1727 tin.index = ntohl(tin.index);
1729 case RX_DEBUGI_GETSTATS:{
1730 struct rx_debugStats tstat;
1732 /* get basic stats */
1733 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1734 tstat.version = RX_DEBUGI_VERSION;
1735 #ifndef RX_ENABLE_LOCKS
1736 tstat.waitingForPackets = rx_waitingForPackets;
1738 MUTEX_ENTER(&rx_serverPool_lock);
1739 tstat.nFreePackets = htonl(rx_nFreePackets);
1740 tstat.callsExecuted = htonl(rxi_nCalls);
1741 tstat.packetReclaims = htonl(rx_packetReclaims);
1742 tstat.usedFDs = CountFDs(64);
1743 tstat.nWaiting = htonl(rx_nWaiting);
1744 tstat.nWaited = htonl(rx_nWaited);
1745 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1747 MUTEX_EXIT(&rx_serverPool_lock);
1748 tstat.idleThreads = htonl(tstat.idleThreads);
1749 tl = sizeof(struct rx_debugStats) - ap->length;
1751 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1754 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1756 ap->length = sizeof(struct rx_debugStats);
1757 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1758 rx_computelen(ap, ap->length);
1763 case RX_DEBUGI_GETALLCONN:
1764 case RX_DEBUGI_GETCONN:{
1766 register struct rx_connection *tc;
1767 struct rx_call *tcall;
1768 struct rx_debugConn tconn;
1769 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1772 tl = sizeof(struct rx_debugConn) - ap->length;
1774 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1778 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1779 /* get N'th (maybe) "interesting" connection info */
1780 for (i = 0; i < rx_hashTableSize; i++) {
1781 #if !defined(KERNEL)
1782 /* the time complexity of the algorithm used here
1783 * exponentially increses with the number of connections.
1785 #ifdef AFS_PTHREAD_ENV
1791 MUTEX_ENTER(&rx_connHashTable_lock);
1792 /* We might be slightly out of step since we are not
1793 * locking each call, but this is only debugging output.
1795 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1796 if ((all || rxi_IsConnInteresting(tc))
1797 && tin.index-- <= 0) {
1798 tconn.host = tc->peer->host;
1799 tconn.port = tc->peer->port;
1800 tconn.cid = htonl(tc->cid);
1801 tconn.epoch = htonl(tc->epoch);
1802 tconn.serial = htonl(tc->serial);
1803 for (j = 0; j < RX_MAXCALLS; j++) {
1804 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1805 if ((tcall = tc->call[j])) {
1806 tconn.callState[j] = tcall->state;
1807 tconn.callMode[j] = tcall->mode;
1808 tconn.callFlags[j] = tcall->flags;
1809 if (queue_IsNotEmpty(&tcall->rq))
1810 tconn.callOther[j] |= RX_OTHER_IN;
1811 if (queue_IsNotEmpty(&tcall->tq))
1812 tconn.callOther[j] |= RX_OTHER_OUT;
1814 tconn.callState[j] = RX_STATE_NOTINIT;
1817 tconn.natMTU = htonl(tc->peer->natMTU);
1818 tconn.error = htonl(tc->error);
1819 tconn.flags = tc->flags;
1820 tconn.type = tc->type;
1821 tconn.securityIndex = tc->securityIndex;
1822 if (tc->securityObject) {
1823 RXS_GetStats(tc->securityObject, tc,
1825 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1826 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1829 DOHTONL(packetsReceived);
1830 DOHTONL(packetsSent);
1831 DOHTONL(bytesReceived);
1835 sizeof(tconn.secStats.spares) /
1840 sizeof(tconn.secStats.sparel) /
1841 sizeof(afs_int32); i++)
1845 MUTEX_EXIT(&rx_connHashTable_lock);
1846 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1849 ap->length = sizeof(struct rx_debugConn);
1850 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1856 MUTEX_EXIT(&rx_connHashTable_lock);
1858 /* if we make it here, there are no interesting packets */
1859 tconn.cid = htonl(0xffffffff); /* means end */
1860 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1863 ap->length = sizeof(struct rx_debugConn);
1864 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1870 * Pass back all the peer structures we have available
1873 case RX_DEBUGI_GETPEER:{
1875 register struct rx_peer *tp;
1876 struct rx_debugPeer tpeer;
1879 tl = sizeof(struct rx_debugPeer) - ap->length;
1881 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1885 memset((char *)&tpeer, 0, sizeof(tpeer));
1886 for (i = 0; i < rx_hashTableSize; i++) {
1887 #if !defined(KERNEL)
1888 /* the time complexity of the algorithm used here
1889 * exponentially increses with the number of peers.
1891 * Yielding after processing each hash table entry
1892 * and dropping rx_peerHashTable_lock.
1893 * also increases the risk that we will miss a new
1894 * entry - but we are willing to live with this
1895 * limitation since this is meant for debugging only
1897 #ifdef AFS_PTHREAD_ENV
1903 MUTEX_ENTER(&rx_peerHashTable_lock);
1904 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1905 if (tin.index-- <= 0) {
1906 tpeer.host = tp->host;
1907 tpeer.port = tp->port;
1908 tpeer.ifMTU = htons(tp->ifMTU);
1909 tpeer.idleWhen = htonl(tp->idleWhen);
1910 tpeer.refCount = htons(tp->refCount);
1911 tpeer.burstSize = tp->burstSize;
1912 tpeer.burst = tp->burst;
1913 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1914 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1915 tpeer.rtt = htonl(tp->rtt);
1916 tpeer.rtt_dev = htonl(tp->rtt_dev);
1917 tpeer.timeout.sec = htonl(tp->timeout.sec);
1918 tpeer.timeout.usec = htonl(tp->timeout.usec);
1919 tpeer.nSent = htonl(tp->nSent);
1920 tpeer.reSends = htonl(tp->reSends);
1921 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1922 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1923 tpeer.rateFlag = htonl(tp->rateFlag);
1924 tpeer.natMTU = htons(tp->natMTU);
1925 tpeer.maxMTU = htons(tp->maxMTU);
1926 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1927 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1928 tpeer.MTU = htons(tp->MTU);
1929 tpeer.cwind = htons(tp->cwind);
1930 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1931 tpeer.congestSeq = htons(tp->congestSeq);
1932 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1933 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1934 tpeer.bytesReceived.high =
1935 htonl(tp->bytesReceived.high);
1936 tpeer.bytesReceived.low =
1937 htonl(tp->bytesReceived.low);
1939 MUTEX_EXIT(&rx_peerHashTable_lock);
1940 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1943 ap->length = sizeof(struct rx_debugPeer);
1944 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1950 MUTEX_EXIT(&rx_peerHashTable_lock);
1952 /* if we make it here, there are no interesting packets */
1953 tpeer.host = htonl(0xffffffff); /* means end */
1954 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1957 ap->length = sizeof(struct rx_debugPeer);
1958 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1963 case RX_DEBUGI_RXSTATS:{
1967 tl = sizeof(rx_stats) - ap->length;
1969 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1973 /* Since its all int32s convert to network order with a loop. */
1974 MUTEX_ENTER(&rx_stats_mutex);
1975 s = (afs_int32 *) & rx_stats;
1976 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1977 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1980 ap->length = sizeof(rx_stats);
1981 MUTEX_EXIT(&rx_stats_mutex);
1982 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1988 /* error response packet */
1989 tin.type = htonl(RX_DEBUGI_BADTYPE);
1990 tin.index = tin.type;
1991 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1993 ap->length = sizeof(struct rx_debugIn);
1994 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2002 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2003 afs_int32 ahost, short aport, int istack)
2008 * Only respond to client-initiated version requests, and
2009 * clear that flag in the response.
2011 if (ap->header.flags & RX_CLIENT_INITIATED) {
2014 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2015 rxi_EncodePacketHeader(ap);
2016 memset(buf, 0, sizeof(buf));
2017 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2018 rx_packetwrite(ap, 0, 65, buf);
2021 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2029 /* send a debug packet back to the sender */
2031 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2032 afs_int32 ahost, short aport, afs_int32 istack)
2034 struct sockaddr_in taddr;
2040 int waslocked = ISAFS_GLOCK();
2043 taddr.sin_family = AF_INET;
2044 taddr.sin_port = aport;
2045 taddr.sin_addr.s_addr = ahost;
2046 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2047 taddr.sin_len = sizeof(struct sockaddr_in);
2050 /* We need to trim the niovecs. */
2051 nbytes = apacket->length;
2052 for (i = 1; i < apacket->niovecs; i++) {
2053 if (nbytes <= apacket->wirevec[i].iov_len) {
2054 savelen = apacket->wirevec[i].iov_len;
2055 saven = apacket->niovecs;
2056 apacket->wirevec[i].iov_len = nbytes;
2057 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2059 nbytes -= apacket->wirevec[i].iov_len;
2062 #ifdef RX_KERNEL_TRACE
2063 if (ICL_SETACTIVE(afs_iclSetp)) {
2066 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2067 "before osi_NetSend()");
2075 /* debug packets are not reliably delivered, hence the cast below. */
2076 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2077 apacket->length + RX_HEADER_SIZE, istack);
2079 #ifdef RX_KERNEL_TRACE
2080 if (ICL_SETACTIVE(afs_iclSetp)) {
2082 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2083 "after osi_NetSend()");
2092 if (saven) { /* means we truncated the packet above. */
2093 apacket->wirevec[i - 1].iov_len = savelen;
2094 apacket->niovecs = saven;
2099 /* Send the packet to appropriate destination for the specified
2100 * call. The header is first encoded and placed in the packet.
2103 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2104 struct rx_packet *p, int istack)
2110 struct sockaddr_in addr;
2111 register struct rx_peer *peer = conn->peer;
2114 char deliveryType = 'S';
2116 /* The address we're sending the packet to */
2117 memset(&addr, 0, sizeof(addr));
2118 addr.sin_family = AF_INET;
2119 addr.sin_port = peer->port;
2120 addr.sin_addr.s_addr = peer->host;
2122 /* This stuff should be revamped, I think, so that most, if not
2123 * all, of the header stuff is always added here. We could
2124 * probably do away with the encode/decode routines. XXXXX */
2126 /* Stamp each packet with a unique serial number. The serial
2127 * number is maintained on a connection basis because some types
2128 * of security may be based on the serial number of the packet,
2129 * and security is handled on a per authenticated-connection
2131 /* Pre-increment, to guarantee no zero serial number; a zero
2132 * serial number means the packet was never sent. */
2133 MUTEX_ENTER(&conn->conn_data_lock);
2134 p->header.serial = ++conn->serial;
2135 MUTEX_EXIT(&conn->conn_data_lock);
2136 /* This is so we can adjust retransmit time-outs better in the face of
2137 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2139 if (p->firstSerial == 0) {
2140 p->firstSerial = p->header.serial;
2143 /* If an output tracer function is defined, call it with the packet and
2144 * network address. Note this function may modify its arguments. */
2145 if (rx_almostSent) {
2146 int drop = (*rx_almostSent) (p, &addr);
2147 /* drop packet if return value is non-zero? */
2149 deliveryType = 'D'; /* Drop the packet */
2153 /* Get network byte order header */
2154 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2155 * touch ALL the fields */
2157 /* Send the packet out on the same socket that related packets are being
2161 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2164 /* Possibly drop this packet, for testing purposes */
2165 if ((deliveryType == 'D')
2166 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2167 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2168 deliveryType = 'D'; /* Drop the packet */
2170 deliveryType = 'S'; /* Send the packet */
2171 #endif /* RXDEBUG */
2173 /* Loop until the packet is sent. We'd prefer just to use a
2174 * blocking socket, but unfortunately the interface doesn't
2175 * allow us to have the socket block in send mode, and not
2176 * block in receive mode */
2178 waslocked = ISAFS_GLOCK();
2179 #ifdef RX_KERNEL_TRACE
2180 if (ICL_SETACTIVE(afs_iclSetp)) {
2183 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2184 "before osi_NetSend()");
2193 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2194 p->length + RX_HEADER_SIZE, istack)) != 0) {
2195 /* send failed, so let's hurry up the resend, eh? */
2196 MUTEX_ENTER(&rx_stats_mutex);
2197 rx_stats.netSendFailures++;
2198 MUTEX_EXIT(&rx_stats_mutex);
2199 p->retryTime = p->timeSent; /* resend it very soon */
2200 clock_Addmsec(&(p->retryTime),
2201 10 + (((afs_uint32) p->backoff) << 8));
2204 /* Windows is nice -- it can tell us right away that we cannot
2205 * reach this recipient by returning an WSAEHOSTUNREACH error
2206 * code. So, when this happens let's "down" the host NOW so
2207 * we don't sit around waiting for this host to timeout later.
2209 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2210 call->lastReceiveTime = 0;
2212 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2213 /* Linux is nice -- it can tell us right away that we cannot
2214 * reach this recipient by returning an ENETUNREACH error
2215 * code. So, when this happens let's "down" the host NOW so
2216 * we don't sit around waiting for this host to timeout later.
2218 if (call && code == -ENETUNREACH)
2219 call->lastReceiveTime = 0;
2223 #ifdef RX_KERNEL_TRACE
2224 if (ICL_SETACTIVE(afs_iclSetp)) {
2226 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2227 "after osi_NetSend()");
2238 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2240 MUTEX_ENTER(&rx_stats_mutex);
2241 rx_stats.packetsSent[p->header.type - 1]++;
2242 MUTEX_EXIT(&rx_stats_mutex);
2243 MUTEX_ENTER(&peer->peer_lock);
2244 hadd32(peer->bytesSent, p->length);
2245 MUTEX_EXIT(&peer->peer_lock);
2248 /* Send a list of packets to appropriate destination for the specified
2249 * connection. The headers are first encoded and placed in the packets.
2252 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2253 struct rx_packet **list, int len, int istack)
2255 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2258 struct sockaddr_in addr;
2259 register struct rx_peer *peer = conn->peer;
2261 struct rx_packet *p = NULL;
2262 struct iovec wirevec[RX_MAXIOVECS];
2263 int i, length, code;
2266 struct rx_jumboHeader *jp;
2268 char deliveryType = 'S';
2270 /* The address we're sending the packet to */
2271 addr.sin_family = AF_INET;
2272 addr.sin_port = peer->port;
2273 addr.sin_addr.s_addr = peer->host;
2275 if (len + 1 > RX_MAXIOVECS) {
2276 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2280 * Stamp the packets in this jumbogram with consecutive serial numbers
2282 MUTEX_ENTER(&conn->conn_data_lock);
2283 serial = conn->serial;
2284 conn->serial += len;
2285 MUTEX_EXIT(&conn->conn_data_lock);
2288 /* This stuff should be revamped, I think, so that most, if not
2289 * all, of the header stuff is always added here. We could
2290 * probably do away with the encode/decode routines. XXXXX */
2293 length = RX_HEADER_SIZE;
2294 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2295 wirevec[0].iov_len = RX_HEADER_SIZE;
2296 for (i = 0; i < len; i++) {
2299 /* The whole 3.5 jumbogram scheme relies on packets fitting
2300 * in a single packet buffer. */
2301 if (p->niovecs > 2) {
2302 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2305 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2308 if (p->length != RX_JUMBOBUFFERSIZE) {
2309 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2311 p->header.flags |= RX_JUMBO_PACKET;
2312 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2313 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2315 wirevec[i + 1].iov_len = p->length;
2316 length += p->length;
2318 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2320 /* Convert jumbo packet header to network byte order */
2321 temp = (afs_uint32) (p->header.flags) << 24;
2322 temp |= (afs_uint32) (p->header.spare);
2323 *(afs_uint32 *) jp = htonl(temp);
2325 jp = (struct rx_jumboHeader *)
2326 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2328 /* Stamp each packet with a unique serial number. The serial
2329 * number is maintained on a connection basis because some types
2330 * of security may be based on the serial number of the packet,
2331 * and security is handled on a per authenticated-connection
2333 /* Pre-increment, to guarantee no zero serial number; a zero
2334 * serial number means the packet was never sent. */
2335 p->header.serial = ++serial;
2336 /* This is so we can adjust retransmit time-outs better in the face of
2337 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2339 if (p->firstSerial == 0) {
2340 p->firstSerial = p->header.serial;
2343 /* If an output tracer function is defined, call it with the packet and
2344 * network address. Note this function may modify its arguments. */
2345 if (rx_almostSent) {
2346 int drop = (*rx_almostSent) (p, &addr);
2347 /* drop packet if return value is non-zero? */
2349 deliveryType = 'D'; /* Drop the packet */
2353 /* Get network byte order header */
2354 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2355 * touch ALL the fields */
2358 /* Send the packet out on the same socket that related packets are being
2362 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2365 /* Possibly drop this packet, for testing purposes */
2366 if ((deliveryType == 'D')
2367 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2368 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2369 deliveryType = 'D'; /* Drop the packet */
2371 deliveryType = 'S'; /* Send the packet */
2372 #endif /* RXDEBUG */
2374 /* Loop until the packet is sent. We'd prefer just to use a
2375 * blocking socket, but unfortunately the interface doesn't
2376 * allow us to have the socket block in send mode, and not
2377 * block in receive mode */
2378 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2379 waslocked = ISAFS_GLOCK();
2380 if (!istack && waslocked)
2384 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2386 /* send failed, so let's hurry up the resend, eh? */
2387 MUTEX_ENTER(&rx_stats_mutex);
2388 rx_stats.netSendFailures++;
2389 MUTEX_EXIT(&rx_stats_mutex);
2390 for (i = 0; i < len; i++) {
2392 p->retryTime = p->timeSent; /* resend it very soon */
2393 clock_Addmsec(&(p->retryTime),
2394 10 + (((afs_uint32) p->backoff) << 8));
2397 /* Windows is nice -- it can tell us right away that we cannot
2398 * reach this recipient by returning an WSAEHOSTUNREACH error
2399 * code. So, when this happens let's "down" the host NOW so
2400 * we don't sit around waiting for this host to timeout later.
2402 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2403 call->lastReceiveTime = 0;
2405 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2406 /* Linux is nice -- it can tell us right away that we cannot
2407 * reach this recipient by returning an ENETUNREACH error
2408 * code. So, when this happens let's "down" the host NOW so
2409 * we don't sit around waiting for this host to timeout later.
2411 if (call && code == -ENETUNREACH)
2412 call->lastReceiveTime = 0;
2415 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2416 if (!istack && waslocked)
2424 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2427 MUTEX_ENTER(&rx_stats_mutex);
2428 rx_stats.packetsSent[p->header.type - 1]++;
2429 MUTEX_EXIT(&rx_stats_mutex);
2430 MUTEX_ENTER(&peer->peer_lock);
2432 hadd32(peer->bytesSent, p->length);
2433 MUTEX_EXIT(&peer->peer_lock);
2437 /* Send a "special" packet to the peer connection. If call is
2438 * specified, then the packet is directed to a specific call channel
2439 * associated with the connection, otherwise it is directed to the
2440 * connection only. Uses optionalPacket if it is supplied, rather than
2441 * allocating a new packet buffer. Nbytes is the length of the data
2442 * portion of the packet. If data is non-null, nbytes of data are
2443 * copied into the packet. Type is the type of the packet, as defined
2444 * in rx.h. Bug: there's a lot of duplication between this and other
2445 * routines. This needs to be cleaned up. */
2447 rxi_SendSpecial(register struct rx_call *call,
2448 register struct rx_connection *conn,
2449 struct rx_packet *optionalPacket, int type, char *data,
2450 int nbytes, int istack)
2452 /* Some of the following stuff should be common code for all
2453 * packet sends (it's repeated elsewhere) */
2454 register struct rx_packet *p;
2456 int savelen = 0, saven = 0;
2457 int channel, callNumber;
2459 channel = call->channel;
2460 callNumber = *call->callNumber;
2461 /* BUSY packets refer to the next call on this connection */
2462 if (type == RX_PACKET_TYPE_BUSY) {
2471 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2473 osi_Panic("rxi_SendSpecial failure");
2480 p->header.serviceId = conn->serviceId;
2481 p->header.securityIndex = conn->securityIndex;
2482 p->header.cid = (conn->cid | channel);
2483 p->header.callNumber = callNumber;
2485 p->header.epoch = conn->epoch;
2486 p->header.type = type;
2487 p->header.flags = 0;
2488 if (conn->type == RX_CLIENT_CONNECTION)
2489 p->header.flags |= RX_CLIENT_INITIATED;
2491 rx_packetwrite(p, 0, nbytes, data);
2493 for (i = 1; i < p->niovecs; i++) {
2494 if (nbytes <= p->wirevec[i].iov_len) {
2495 savelen = p->wirevec[i].iov_len;
2497 p->wirevec[i].iov_len = nbytes;
2498 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2500 nbytes -= p->wirevec[i].iov_len;
2504 rxi_Send(call, p, istack);
2506 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2507 if (saven) { /* means we truncated the packet above. We probably don't */
2508 /* really need to do this, but it seems safer this way, given that */
2509 /* sneaky optionalPacket... */
2510 p->wirevec[i - 1].iov_len = savelen;
2513 if (!optionalPacket)
2515 return optionalPacket;
2519 /* Encode the packet's header (from the struct header in the packet to
2520 * the net byte order representation in the wire representation of the
2521 * packet, which is what is actually sent out on the wire) */
2523 rxi_EncodePacketHeader(register struct rx_packet *p)
2525 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2527 memset((char *)buf, 0, RX_HEADER_SIZE);
2528 *buf++ = htonl(p->header.epoch);
2529 *buf++ = htonl(p->header.cid);
2530 *buf++ = htonl(p->header.callNumber);
2531 *buf++ = htonl(p->header.seq);
2532 *buf++ = htonl(p->header.serial);
2533 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2534 | (((afs_uint32) p->header.flags) << 16)
2535 | (p->header.userStatus << 8) | p->header.securityIndex);
2536 /* Note: top 16 bits of this next word were reserved */
2537 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2540 /* Decode the packet's header (from net byte order to a struct header) */
2542 rxi_DecodePacketHeader(register struct rx_packet *p)
2544 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2547 p->header.epoch = ntohl(*buf);
2549 p->header.cid = ntohl(*buf);
2551 p->header.callNumber = ntohl(*buf);
2553 p->header.seq = ntohl(*buf);
2555 p->header.serial = ntohl(*buf);
2561 /* C will truncate byte fields to bytes for me */
2562 p->header.type = temp >> 24;
2563 p->header.flags = temp >> 16;
2564 p->header.userStatus = temp >> 8;
2565 p->header.securityIndex = temp >> 0;
2570 p->header.serviceId = (temp & 0xffff);
2571 p->header.spare = temp >> 16;
2572 /* Note: top 16 bits of this last word are the security checksum */
2576 rxi_PrepareSendPacket(register struct rx_call *call,
2577 register struct rx_packet *p, register int last)
2579 register struct rx_connection *conn = call->conn;
2581 ssize_t len; /* len must be a signed type; it can go negative */
2583 p->flags &= ~RX_PKTFLAG_ACKED;
2584 p->header.cid = (conn->cid | call->channel);
2585 p->header.serviceId = conn->serviceId;
2586 p->header.securityIndex = conn->securityIndex;
2588 /* No data packets on call 0. Where do these come from? */
2589 if (*call->callNumber == 0)
2590 *call->callNumber = 1;
2592 p->header.callNumber = *call->callNumber;
2593 p->header.seq = call->tnext++;
2594 p->header.epoch = conn->epoch;
2595 p->header.type = RX_PACKET_TYPE_DATA;
2596 p->header.flags = 0;
2597 p->header.spare = 0;
2598 if (conn->type == RX_CLIENT_CONNECTION)
2599 p->header.flags |= RX_CLIENT_INITIATED;
2602 p->header.flags |= RX_LAST_PACKET;
2604 clock_Zero(&p->retryTime); /* Never yet transmitted */
2605 clock_Zero(&p->firstSent); /* Never yet transmitted */
2606 p->header.serial = 0; /* Another way of saying never transmitted... */
2609 /* Now that we're sure this is the last data on the call, make sure
2610 * that the "length" and the sum of the iov_lens matches. */
2611 len = p->length + call->conn->securityHeaderSize;
2613 for (i = 1; i < p->niovecs && len > 0; i++) {
2614 len -= p->wirevec[i].iov_len;
2617 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2624 /* Free any extra elements in the wirevec */
2625 for (j = MAX(2, i), nb = p->niovecs - j; j < p->niovecs; j++) {
2626 queue_Append(&q,RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2629 rxi_FreePackets(nb, &q);
2631 p->niovecs = MAX(2, i);
2632 p->wirevec[MAX(2, i) - 1].iov_len += len;
2634 RXS_PreparePacket(conn->securityObject, call, p);
2637 /* Given an interface MTU size, calculate an adjusted MTU size that
2638 * will make efficient use of the RX buffers when the peer is sending
2639 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2641 rxi_AdjustIfMTU(int mtu)
2646 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2647 if (mtu <= adjMTU) {
2654 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2655 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2658 /* Given an interface MTU size, and the peer's advertised max receive
2659 * size, calculate an adjisted maxMTU size that makes efficient use
2660 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2662 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2664 int maxMTU = mtu * rxi_nSendFrags;
2665 maxMTU = MIN(maxMTU, peerMaxMTU);
2666 return rxi_AdjustIfMTU(maxMTU);
2669 /* Given a packet size, figure out how many datagram packet will fit.
2670 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2671 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2672 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2674 rxi_AdjustDgramPackets(int frags, int mtu)
2677 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2680 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2681 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2682 /* subtract the size of the first and last packets */
2683 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2687 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));