2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 # include "afs/sysincludes.h"
23 # include "afsincludes.h"
24 # include "rx/rx_kcommon.h"
25 # include "rx/rx_clock.h"
26 # include "rx/rx_queue.h"
27 # include "rx/rx_packet.h"
28 # else /* defined(UKERNEL) */
29 # ifdef RX_KERNEL_TRACE
30 # include "../rx/rx_kcommon.h"
33 # ifndef AFS_LINUX20_ENV
36 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 # include "afs/sysincludes.h"
39 # if defined(AFS_OBSD_ENV)
42 # include "h/socket.h"
43 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 # include "sys/mount.h" /* it gets pulled in by something later anyway */
49 # include "netinet/in.h"
50 # include "afs/afs_osi.h"
51 # include "rx_kmutex.h"
52 # include "rx/rx_clock.h"
53 # include "rx/rx_queue.h"
55 # include <sys/sysmacros.h>
57 # include "rx/rx_packet.h"
58 # endif /* defined(UKERNEL) */
59 # include "rx/rx_internal.h"
60 # include "rx/rx_globals.h"
62 # include "sys/types.h"
63 # include <sys/stat.h>
65 # if defined(AFS_NT40_ENV)
66 # include <winsock2.h>
68 # define EWOULDBLOCK WSAEWOULDBLOCK
71 # include "rx_xmit_nt.h"
74 # include <sys/socket.h>
75 # include <netinet/in.h>
77 # include "rx_clock.h"
78 # include "rx_internal.h"
80 # include "rx_queue.h"
82 # include <sys/sysmacros.h>
84 # include "rx_packet.h"
85 # include "rx_globals.h"
95 /* rxdb_fileID is used to identify the lock location, along with line#. */
96 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
97 #endif /* RX_LOCKS_DB */
98 struct rx_packet *rx_mallocedP = 0;
100 extern char cml_version_number[];
102 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
104 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
105 afs_int32 ahost, short aport,
108 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
110 struct rx_queue * q);
111 #ifdef RX_ENABLE_TSFPQ
113 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
116 /* some rules about packets:
117 * 1. When a packet is allocated, the final iov_buf contains room for
118 * a security trailer, but iov_len masks that fact. If the security
119 * package wants to add the trailer, it may do so, and then extend
120 * iov_len appropriately. For this reason, packet's niovecs and
121 * iov_len fields should be accurate before calling PreparePacket.
125 * all packet buffers (iov_base) are integral multiples of
127 * offset is an integral multiple of the word size.
130 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
134 for (l = 0, i = 1; i < packet->niovecs; i++) {
135 if (l + packet->wirevec[i].iov_len > offset) {
137 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
140 l += packet->wirevec[i].iov_len;
147 * all packet buffers (iov_base) are integral multiples of the word size.
148 * offset is an integral multiple of the word size.
151 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
155 for (l = 0, i = 1; i < packet->niovecs; i++) {
156 if (l + packet->wirevec[i].iov_len > offset) {
157 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
158 (offset - l))) = data;
161 l += packet->wirevec[i].iov_len;
168 * all packet buffers (iov_base) are integral multiples of the
170 * offset is an integral multiple of the word size.
172 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
175 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
178 unsigned int i, j, l, r;
179 for (l = 0, i = 1; i < packet->niovecs; i++) {
180 if (l + packet->wirevec[i].iov_len > offset) {
183 l += packet->wirevec[i].iov_len;
186 /* i is the iovec which contains the first little bit of data in which we
187 * are interested. l is the total length of everything prior to this iovec.
188 * j is the number of bytes we can safely copy out of this iovec.
189 * offset only applies to the first iovec.
192 while ((resid > 0) && (i < packet->niovecs)) {
193 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
194 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
197 l += packet->wirevec[i].iov_len;
202 return (resid ? (r - resid) : r);
207 * all packet buffers (iov_base) are integral multiples of the
209 * offset is an integral multiple of the word size.
212 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
217 for (l = 0, i = 1; i < packet->niovecs; i++) {
218 if (l + packet->wirevec[i].iov_len > offset) {
221 l += packet->wirevec[i].iov_len;
224 /* i is the iovec which contains the first little bit of data in which we
225 * are interested. l is the total length of everything prior to this iovec.
226 * j is the number of bytes we can safely copy out of this iovec.
227 * offset only applies to the first iovec.
230 while ((resid > 0) && (i < RX_MAXWVECS)) {
231 if (i >= packet->niovecs)
232 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
235 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
236 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
240 l += packet->wirevec[i].iov_len;
245 return (resid ? (r - resid) : r);
249 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
251 register struct rx_packet *p, *np;
253 num_pkts = AllocPacketBufs(class, num_pkts, q);
255 for (queue_Scan(q, p, np, rx_packet)) {
256 RX_PACKET_IOV_FULLINIT(p);
262 #ifdef RX_ENABLE_TSFPQ
264 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
266 register struct rx_ts_info_t * rx_ts_info;
270 RX_TS_INFO_GET(rx_ts_info);
272 transfer = num_pkts - rx_ts_info->_FPQ.len;
275 MUTEX_ENTER(&rx_freePktQ_lock);
276 transfer = MAX(transfer, rx_TSFPQGlobSize);
277 if (transfer > rx_nFreePackets) {
278 /* alloc enough for us, plus a few globs for other threads */
279 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
282 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
284 MUTEX_EXIT(&rx_freePktQ_lock);
288 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
292 #else /* RX_ENABLE_TSFPQ */
294 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
305 MUTEX_ENTER(&rx_freePktQ_lock);
308 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
309 num_pkts--, overq++);
312 rxi_NeedMorePackets = TRUE;
314 case RX_PACKET_CLASS_RECEIVE:
315 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
317 case RX_PACKET_CLASS_SEND:
318 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
320 case RX_PACKET_CLASS_SPECIAL:
321 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_RECV_CBUF:
324 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SEND_CBUF:
327 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
332 if (rx_nFreePackets < num_pkts)
333 num_pkts = rx_nFreePackets;
336 rxi_NeedMorePackets = TRUE;
340 if (rx_nFreePackets < num_pkts) {
341 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
345 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
347 i++, c=queue_Next(c, rx_packet)) {
351 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
353 rx_nFreePackets -= num_pkts;
358 MUTEX_EXIT(&rx_freePktQ_lock);
363 #endif /* RX_ENABLE_TSFPQ */
366 * Free a packet currently used as a continuation buffer
368 #ifdef RX_ENABLE_TSFPQ
369 /* num_pkts=0 means queue length is unknown */
371 rxi_FreePackets(int num_pkts, struct rx_queue * q)
373 register struct rx_ts_info_t * rx_ts_info;
374 register struct rx_packet *c, *nc;
377 osi_Assert(num_pkts >= 0);
378 RX_TS_INFO_GET(rx_ts_info);
381 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
382 rxi_FreeDataBufsTSFPQ(c, 2, 0);
385 for (queue_Scan(q, c, nc, rx_packet)) {
386 rxi_FreeDataBufsTSFPQ(c, 2, 0);
391 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
394 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
396 MUTEX_ENTER(&rx_freePktQ_lock);
398 RX_TS_FPQ_LTOG(rx_ts_info);
400 /* Wakeup anyone waiting for packets */
403 MUTEX_EXIT(&rx_freePktQ_lock);
409 #else /* RX_ENABLE_TSFPQ */
410 /* num_pkts=0 means queue length is unknown */
412 rxi_FreePackets(int num_pkts, struct rx_queue *q)
415 register struct rx_packet *p, *np;
419 osi_Assert(num_pkts >= 0);
423 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
424 if (p->niovecs > 2) {
425 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
432 for (queue_Scan(q, p, np, rx_packet)) {
433 if (p->niovecs > 2) {
434 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
441 queue_SpliceAppend(q, &cbs);
447 MUTEX_ENTER(&rx_freePktQ_lock);
449 queue_SpliceAppend(&rx_freePacketQueue, q);
450 rx_nFreePackets += qlen;
452 /* Wakeup anyone waiting for packets */
455 MUTEX_EXIT(&rx_freePktQ_lock);
460 #endif /* RX_ENABLE_TSFPQ */
462 /* this one is kind of awful.
463 * In rxkad, the packet has been all shortened, and everything, ready for
464 * sending. All of a sudden, we discover we need some of that space back.
465 * This isn't terribly general, because it knows that the packets are only
466 * rounded up to the EBS (userdata + security header).
469 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
473 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
474 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
475 p->wirevec[i].iov_len += nb;
479 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
480 p->wirevec[i].iov_len += nb;
488 /* get sufficient space to store nb bytes of data (or more), and hook
489 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
490 * returns the number of bytes >0 which it failed to come up with.
491 * Don't need to worry about locking on packet, since only
492 * one thread can manipulate one at a time. Locking on continution
493 * packets is handled by AllocPacketBufs */
494 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
496 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
500 register struct rx_packet *cb, *ncb;
502 /* compute the number of cbuf's we need */
503 nv = nb / RX_CBUFFERSIZE;
504 if ((nv * RX_CBUFFERSIZE) < nb)
506 if ((nv + p->niovecs) > RX_MAXWVECS)
507 nv = RX_MAXWVECS - p->niovecs;
511 /* allocate buffers */
513 nv = AllocPacketBufs(class, nv, &q);
515 /* setup packet iovs */
516 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
518 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
519 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
522 nb -= (nv * RX_CBUFFERSIZE);
523 p->length += (nv * RX_CBUFFERSIZE);
529 /* Add more packet buffers */
530 #ifdef RX_ENABLE_TSFPQ
532 rxi_MorePackets(int apackets)
534 struct rx_packet *p, *e;
535 register struct rx_ts_info_t * rx_ts_info;
539 getme = apackets * sizeof(struct rx_packet);
540 p = (struct rx_packet *)osi_Alloc(getme);
543 PIN(p, getme); /* XXXXX */
544 memset((char *)p, 0, getme);
545 RX_TS_INFO_GET(rx_ts_info);
547 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
548 /* TSFPQ patch also needs to keep track of total packets */
549 MUTEX_ENTER(&rx_stats_mutex);
550 rx_nPackets += apackets;
551 RX_TS_FPQ_COMPUTE_LIMITS;
552 MUTEX_EXIT(&rx_stats_mutex);
554 for (e = p + apackets; p < e; p++) {
555 RX_PACKET_IOV_INIT(p);
558 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
561 MUTEX_ENTER(&rx_freePktQ_lock);
563 MUTEX_EXIT(&rx_freePktQ_lock);
566 rx_ts_info->_FPQ.delta += apackets;
568 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
570 MUTEX_ENTER(&rx_freePktQ_lock);
572 RX_TS_FPQ_LTOG(rx_ts_info);
573 rxi_NeedMorePackets = FALSE;
576 MUTEX_EXIT(&rx_freePktQ_lock);
580 #else /* RX_ENABLE_TSFPQ */
582 rxi_MorePackets(int apackets)
584 struct rx_packet *p, *e;
588 getme = apackets * sizeof(struct rx_packet);
589 p = (struct rx_packet *)osi_Alloc(getme);
592 PIN(p, getme); /* XXXXX */
593 memset((char *)p, 0, getme);
595 MUTEX_ENTER(&rx_freePktQ_lock);
597 for (e = p + apackets; p < e; p++) {
598 RX_PACKET_IOV_INIT(p);
599 p->flags |= RX_PKTFLAG_FREE;
602 queue_Append(&rx_freePacketQueue, p);
606 rx_nFreePackets += apackets;
607 rxi_NeedMorePackets = FALSE;
610 MUTEX_EXIT(&rx_freePktQ_lock);
613 #endif /* RX_ENABLE_TSFPQ */
615 #ifdef RX_ENABLE_TSFPQ
617 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
619 struct rx_packet *p, *e;
620 register struct rx_ts_info_t * rx_ts_info;
624 getme = apackets * sizeof(struct rx_packet);
625 p = (struct rx_packet *)osi_Alloc(getme);
627 PIN(p, getme); /* XXXXX */
628 memset((char *)p, 0, getme);
629 RX_TS_INFO_GET(rx_ts_info);
631 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
632 /* TSFPQ patch also needs to keep track of total packets */
633 MUTEX_ENTER(&rx_stats_mutex);
634 rx_nPackets += apackets;
635 RX_TS_FPQ_COMPUTE_LIMITS;
636 MUTEX_EXIT(&rx_stats_mutex);
638 for (e = p + apackets; p < e; p++) {
639 RX_PACKET_IOV_INIT(p);
641 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
644 MUTEX_ENTER(&rx_freePktQ_lock);
646 MUTEX_EXIT(&rx_freePktQ_lock);
649 rx_ts_info->_FPQ.delta += apackets;
652 (num_keep_local < apackets)) {
654 MUTEX_ENTER(&rx_freePktQ_lock);
656 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
657 rxi_NeedMorePackets = FALSE;
660 MUTEX_EXIT(&rx_freePktQ_lock);
664 #endif /* RX_ENABLE_TSFPQ */
667 /* Add more packet buffers */
669 rxi_MorePacketsNoLock(int apackets)
671 #ifdef RX_ENABLE_TSFPQ
672 register struct rx_ts_info_t * rx_ts_info;
673 #endif /* RX_ENABLE_TSFPQ */
674 struct rx_packet *p, *e;
677 /* allocate enough packets that 1/4 of the packets will be able
678 * to hold maximal amounts of data */
679 apackets += (apackets / 4)
680 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
682 getme = apackets * sizeof(struct rx_packet);
683 p = (struct rx_packet *)osi_Alloc(getme);
685 apackets -= apackets / 4;
686 osi_Assert(apackets > 0);
689 memset((char *)p, 0, getme);
691 #ifdef RX_ENABLE_TSFPQ
692 RX_TS_INFO_GET(rx_ts_info);
693 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
694 #endif /* RX_ENABLE_TSFPQ */
696 for (e = p + apackets; p < e; p++) {
697 RX_PACKET_IOV_INIT(p);
698 p->flags |= RX_PKTFLAG_FREE;
701 queue_Append(&rx_freePacketQueue, p);
705 rx_nFreePackets += apackets;
706 #ifdef RX_ENABLE_TSFPQ
707 /* TSFPQ patch also needs to keep track of total packets */
708 MUTEX_ENTER(&rx_stats_mutex);
709 rx_nPackets += apackets;
710 RX_TS_FPQ_COMPUTE_LIMITS;
711 MUTEX_EXIT(&rx_stats_mutex);
712 #endif /* RX_ENABLE_TSFPQ */
713 rxi_NeedMorePackets = FALSE;
719 rxi_FreeAllPackets(void)
721 /* must be called at proper interrupt level, etcetera */
722 /* MTUXXX need to free all Packets */
723 osi_Free(rx_mallocedP,
724 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
725 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
728 #ifdef RX_ENABLE_TSFPQ
730 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
732 register struct rx_ts_info_t * rx_ts_info;
736 RX_TS_INFO_GET(rx_ts_info);
738 if (num_keep_local != rx_ts_info->_FPQ.len) {
740 MUTEX_ENTER(&rx_freePktQ_lock);
741 if (num_keep_local < rx_ts_info->_FPQ.len) {
742 xfer = rx_ts_info->_FPQ.len - num_keep_local;
743 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
746 xfer = num_keep_local - rx_ts_info->_FPQ.len;
747 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
748 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
749 if (rx_nFreePackets < xfer) {
750 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
752 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
754 MUTEX_EXIT(&rx_freePktQ_lock);
760 rxi_FlushLocalPacketsTSFPQ(void)
762 rxi_AdjustLocalPacketsTSFPQ(0, 0);
764 #endif /* RX_ENABLE_TSFPQ */
766 /* Allocate more packets iff we need more continuation buffers */
767 /* In kernel, can't page in memory with interrupts disabled, so we
768 * don't use the event mechanism. */
770 rx_CheckPackets(void)
772 if (rxi_NeedMorePackets) {
773 rxi_MorePackets(rx_initSendWindow);
777 /* In the packet freeing routine below, the assumption is that
778 we want all of the packets to be used equally frequently, so that we
779 don't get packet buffers paging out. It would be just as valid to
780 assume that we DO want them to page out if not many are being used.
781 In any event, we assume the former, and append the packets to the end
783 /* This explanation is bogus. The free list doesn't remain in any kind of
784 useful order for afs_int32: the packets in use get pretty much randomly scattered
785 across all the pages. In order to permit unused {packets,bufs} to page out, they
786 must be stored so that packets which are adjacent in memory are adjacent in the
787 free list. An array springs rapidly to mind.
790 /* Actually free the packet p. */
791 #ifdef RX_ENABLE_TSFPQ
793 rxi_FreePacketNoLock(struct rx_packet *p)
795 register struct rx_ts_info_t * rx_ts_info;
796 dpf(("Free %lx\n", (unsigned long)p));
798 RX_TS_INFO_GET(rx_ts_info);
799 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
800 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
801 RX_TS_FPQ_LTOG(rx_ts_info);
804 #else /* RX_ENABLE_TSFPQ */
806 rxi_FreePacketNoLock(struct rx_packet *p)
808 dpf(("Free %lx\n", (unsigned long)p));
812 queue_Append(&rx_freePacketQueue, p);
814 #endif /* RX_ENABLE_TSFPQ */
816 #ifdef RX_ENABLE_TSFPQ
818 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
820 register struct rx_ts_info_t * rx_ts_info;
821 dpf(("Free %lx\n", (unsigned long)p));
823 RX_TS_INFO_GET(rx_ts_info);
824 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
826 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
828 MUTEX_ENTER(&rx_freePktQ_lock);
830 RX_TS_FPQ_LTOG(rx_ts_info);
832 /* Wakeup anyone waiting for packets */
835 MUTEX_EXIT(&rx_freePktQ_lock);
839 #endif /* RX_ENABLE_TSFPQ */
842 * free continuation buffers off a packet into a queue
844 * [IN] p -- packet from which continuation buffers will be freed
845 * [IN] first -- iovec offset of first continuation buffer to free
846 * [IN] q -- queue into which continuation buffers will be chained
849 * number of continuation buffers freed
851 #ifndef RX_ENABLE_TSFPQ
853 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
856 struct rx_packet * cb;
859 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
860 iov = &p->wirevec[first];
862 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
863 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
864 RX_FPQ_MARK_FREE(cb);
875 * free packet continuation buffers into the global free packet pool
877 * [IN] p -- packet from which to free continuation buffers
878 * [IN] first -- iovec offset of first continuation buffer to free
884 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
888 for (first = MAX(2, first); first < p->niovecs; first++) {
889 iov = &p->wirevec[first];
891 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
892 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
900 #ifdef RX_ENABLE_TSFPQ
902 * free packet continuation buffers into the thread-local free pool
904 * [IN] p -- packet from which continuation buffers will be freed
905 * [IN] first -- iovec offset of first continuation buffer to free
906 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
907 * global free pool before returning
913 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
916 register struct rx_ts_info_t * rx_ts_info;
918 RX_TS_INFO_GET(rx_ts_info);
920 for (first = MAX(2, first); first < p->niovecs; first++) {
921 iov = &p->wirevec[first];
923 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
924 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
929 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
931 MUTEX_ENTER(&rx_freePktQ_lock);
933 RX_TS_FPQ_LTOG(rx_ts_info);
935 /* Wakeup anyone waiting for packets */
938 MUTEX_EXIT(&rx_freePktQ_lock);
943 #endif /* RX_ENABLE_TSFPQ */
945 int rxi_nBadIovecs = 0;
947 /* rxi_RestoreDataBufs
949 * Restore the correct sizes to the iovecs. Called when reusing a packet
950 * for reading off the wire.
953 rxi_RestoreDataBufs(struct rx_packet *p)
956 struct iovec *iov = &p->wirevec[2];
958 RX_PACKET_IOV_INIT(p);
960 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
961 if (!iov->iov_base) {
966 iov->iov_len = RX_CBUFFERSIZE;
970 #ifdef RX_ENABLE_TSFPQ
972 rxi_TrimDataBufs(struct rx_packet *p, int first)
975 struct iovec *iov, *end;
976 register struct rx_ts_info_t * rx_ts_info;
980 osi_Panic("TrimDataBufs 1: first must be 1");
982 /* Skip over continuation buffers containing message data */
983 iov = &p->wirevec[2];
984 end = iov + (p->niovecs - 2);
985 length = p->length - p->wirevec[1].iov_len;
986 for (; iov < end && length > 0; iov++) {
988 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
989 length -= iov->iov_len;
992 /* iov now points to the first empty data buffer. */
996 RX_TS_INFO_GET(rx_ts_info);
997 for (; iov < end; iov++) {
999 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1000 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1003 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1005 MUTEX_ENTER(&rx_freePktQ_lock);
1007 RX_TS_FPQ_LTOG(rx_ts_info);
1008 rxi_PacketsUnWait();
1010 MUTEX_EXIT(&rx_freePktQ_lock);
1016 #else /* RX_ENABLE_TSFPQ */
1018 rxi_TrimDataBufs(struct rx_packet *p, int first)
1021 struct iovec *iov, *end;
1025 osi_Panic("TrimDataBufs 1: first must be 1");
1027 /* Skip over continuation buffers containing message data */
1028 iov = &p->wirevec[2];
1029 end = iov + (p->niovecs - 2);
1030 length = p->length - p->wirevec[1].iov_len;
1031 for (; iov < end && length > 0; iov++) {
1033 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1034 length -= iov->iov_len;
1037 /* iov now points to the first empty data buffer. */
1042 MUTEX_ENTER(&rx_freePktQ_lock);
1044 for (; iov < end; iov++) {
1046 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1047 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1050 rxi_PacketsUnWait();
1052 MUTEX_EXIT(&rx_freePktQ_lock);
1057 #endif /* RX_ENABLE_TSFPQ */
1059 /* Free the packet p. P is assumed not to be on any queue, i.e.
1060 * remove it yourself first if you call this routine. */
1061 #ifdef RX_ENABLE_TSFPQ
1063 rxi_FreePacket(struct rx_packet *p)
1065 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1066 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1068 #else /* RX_ENABLE_TSFPQ */
1070 rxi_FreePacket(struct rx_packet *p)
1075 MUTEX_ENTER(&rx_freePktQ_lock);
1077 rxi_FreeDataBufsNoLock(p, 2);
1078 rxi_FreePacketNoLock(p);
1079 /* Wakeup anyone waiting for packets */
1080 rxi_PacketsUnWait();
1082 MUTEX_EXIT(&rx_freePktQ_lock);
1085 #endif /* RX_ENABLE_TSFPQ */
1087 /* rxi_AllocPacket sets up p->length so it reflects the number of
1088 * bytes in the packet at this point, **not including** the header.
1089 * The header is absolutely necessary, besides, this is the way the
1090 * length field is usually used */
1091 #ifdef RX_ENABLE_TSFPQ
1093 rxi_AllocPacketNoLock(int class)
1095 register struct rx_packet *p;
1096 register struct rx_ts_info_t * rx_ts_info;
1098 RX_TS_INFO_GET(rx_ts_info);
1101 if (rxi_OverQuota(class)) {
1102 rxi_NeedMorePackets = TRUE;
1104 case RX_PACKET_CLASS_RECEIVE:
1105 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1107 case RX_PACKET_CLASS_SEND:
1108 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1110 case RX_PACKET_CLASS_SPECIAL:
1111 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1113 case RX_PACKET_CLASS_RECV_CBUF:
1114 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1116 case RX_PACKET_CLASS_SEND_CBUF:
1117 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1120 return (struct rx_packet *)0;
1124 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1125 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1128 if (queue_IsEmpty(&rx_freePacketQueue))
1129 osi_Panic("rxi_AllocPacket error");
1131 if (queue_IsEmpty(&rx_freePacketQueue))
1132 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1136 RX_TS_FPQ_GTOL(rx_ts_info);
1139 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1141 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1144 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1145 * order to truncate outbound packets. In the near future, may need
1146 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1148 RX_PACKET_IOV_FULLINIT(p);
1151 #else /* RX_ENABLE_TSFPQ */
1153 rxi_AllocPacketNoLock(int class)
1155 register struct rx_packet *p;
1158 if (rxi_OverQuota(class)) {
1159 rxi_NeedMorePackets = TRUE;
1161 case RX_PACKET_CLASS_RECEIVE:
1162 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1164 case RX_PACKET_CLASS_SEND:
1165 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1167 case RX_PACKET_CLASS_SPECIAL:
1168 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1170 case RX_PACKET_CLASS_RECV_CBUF:
1171 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1173 case RX_PACKET_CLASS_SEND_CBUF:
1174 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1177 return (struct rx_packet *)0;
1181 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1184 if (queue_IsEmpty(&rx_freePacketQueue))
1185 osi_Panic("rxi_AllocPacket error");
1187 if (queue_IsEmpty(&rx_freePacketQueue))
1188 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1192 p = queue_First(&rx_freePacketQueue, rx_packet);
1194 RX_FPQ_MARK_USED(p);
1196 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1199 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1200 * order to truncate outbound packets. In the near future, may need
1201 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1203 RX_PACKET_IOV_FULLINIT(p);
1206 #endif /* RX_ENABLE_TSFPQ */
1208 #ifdef RX_ENABLE_TSFPQ
1210 rxi_AllocPacketTSFPQ(int class, int pull_global)
1212 register struct rx_packet *p;
1213 register struct rx_ts_info_t * rx_ts_info;
1215 RX_TS_INFO_GET(rx_ts_info);
1217 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1218 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1219 MUTEX_ENTER(&rx_freePktQ_lock);
1221 if (queue_IsEmpty(&rx_freePacketQueue))
1222 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1224 RX_TS_FPQ_GTOL(rx_ts_info);
1226 MUTEX_EXIT(&rx_freePktQ_lock);
1227 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1231 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1233 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1235 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1236 * order to truncate outbound packets. In the near future, may need
1237 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1239 RX_PACKET_IOV_FULLINIT(p);
1242 #endif /* RX_ENABLE_TSFPQ */
1244 #ifdef RX_ENABLE_TSFPQ
1246 rxi_AllocPacket(int class)
1248 register struct rx_packet *p;
1250 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1253 #else /* RX_ENABLE_TSFPQ */
1255 rxi_AllocPacket(int class)
1257 register struct rx_packet *p;
1259 MUTEX_ENTER(&rx_freePktQ_lock);
1260 p = rxi_AllocPacketNoLock(class);
1261 MUTEX_EXIT(&rx_freePktQ_lock);
1264 #endif /* RX_ENABLE_TSFPQ */
1266 /* This guy comes up with as many buffers as it {takes,can get} given
1267 * the MTU for this call. It also sets the packet length before
1268 * returning. caution: this is often called at NETPRI
1269 * Called with call locked.
1272 rxi_AllocSendPacket(register struct rx_call *call, int want)
1274 register struct rx_packet *p = (struct rx_packet *)0;
1276 register unsigned delta;
1279 mud = call->MTU - RX_HEADER_SIZE;
1281 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1282 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1284 #ifdef RX_ENABLE_TSFPQ
1285 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1287 want = MIN(want, mud);
1289 if ((unsigned)want > p->length)
1290 (void)rxi_AllocDataBuf(p, (want - p->length),
1291 RX_PACKET_CLASS_SEND_CBUF);
1293 if ((unsigned)p->length > mud)
1296 if (delta >= p->length) {
1304 #endif /* RX_ENABLE_TSFPQ */
1306 while (!(call->error)) {
1307 MUTEX_ENTER(&rx_freePktQ_lock);
1308 /* if an error occurred, or we get the packet we want, we're done */
1309 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1310 MUTEX_EXIT(&rx_freePktQ_lock);
1313 want = MIN(want, mud);
1315 if ((unsigned)want > p->length)
1316 (void)rxi_AllocDataBuf(p, (want - p->length),
1317 RX_PACKET_CLASS_SEND_CBUF);
1319 if ((unsigned)p->length > mud)
1322 if (delta >= p->length) {
1331 /* no error occurred, and we didn't get a packet, so we sleep.
1332 * At this point, we assume that packets will be returned
1333 * sooner or later, as packets are acknowledged, and so we
1336 call->flags |= RX_CALL_WAIT_PACKETS;
1337 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1338 MUTEX_EXIT(&call->lock);
1339 rx_waitingForPackets = 1;
1341 #ifdef RX_ENABLE_LOCKS
1342 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1344 osi_rxSleep(&rx_waitingForPackets);
1346 MUTEX_EXIT(&rx_freePktQ_lock);
1347 MUTEX_ENTER(&call->lock);
1348 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1349 call->flags &= ~RX_CALL_WAIT_PACKETS;
1358 /* Windows does not use file descriptors. */
1359 #define CountFDs(amax) 0
1361 /* count the number of used FDs */
1363 CountFDs(register int amax)
1366 register int i, code;
1370 for (i = 0; i < amax; i++) {
1371 code = fstat(i, &tstat);
1377 #endif /* AFS_NT40_ENV */
1380 #define CountFDs(amax) amax
1384 #if !defined(KERNEL) || defined(UKERNEL)
1386 /* This function reads a single packet from the interface into the
1387 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1388 * (host,port) of the sender are stored in the supplied variables, and
1389 * the data length of the packet is stored in the packet structure.
1390 * The header is decoded. */
1392 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1395 struct sockaddr_in from;
1398 register afs_int32 tlen, savelen;
1400 rx_computelen(p, tlen);
1401 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1403 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1404 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1405 * it once in order to avoid races. */
1408 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1416 /* Extend the last iovec for padding, it's just to make sure that the
1417 * read doesn't return more data than we expect, and is done to get around
1418 * our problems caused by the lack of a length field in the rx header.
1419 * Use the extra buffer that follows the localdata in each packet
1421 savelen = p->wirevec[p->niovecs - 1].iov_len;
1422 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1424 memset((char *)&msg, 0, sizeof(msg));
1425 msg.msg_name = (char *)&from;
1426 msg.msg_namelen = sizeof(struct sockaddr_in);
1427 msg.msg_iov = p->wirevec;
1428 msg.msg_iovlen = p->niovecs;
1429 nbytes = rxi_Recvmsg(socket, &msg, 0);
1431 /* restore the vec to its correct state */
1432 p->wirevec[p->niovecs - 1].iov_len = savelen;
1434 p->length = (nbytes - RX_HEADER_SIZE);
1435 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1436 if (nbytes < 0 && errno == EWOULDBLOCK) {
1437 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1438 } else if (nbytes <= 0) {
1439 MUTEX_ENTER(&rx_stats_mutex);
1440 rx_stats.bogusPacketOnRead++;
1441 rx_stats.bogusHost = from.sin_addr.s_addr;
1442 MUTEX_EXIT(&rx_stats_mutex);
1443 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1444 ntohs(from.sin_port), nbytes));
1449 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1450 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1451 rxi_DecodePacketHeader(p);
1453 *host = from.sin_addr.s_addr;
1454 *port = from.sin_port;
1456 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1457 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1458 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1460 rxi_TrimDataBufs(p, 1);
1465 /* Extract packet header. */
1466 rxi_DecodePacketHeader(p);
1468 *host = from.sin_addr.s_addr;
1469 *port = from.sin_port;
1470 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1471 struct rx_peer *peer;
1472 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1474 * Try to look up this peer structure. If it doesn't exist,
1475 * don't create a new one -
1476 * we don't keep count of the bytes sent/received if a peer
1477 * structure doesn't already exist.
1479 * The peer/connection cleanup code assumes that there is 1 peer
1480 * per connection. If we actually created a peer structure here
1481 * and this packet was an rxdebug packet, the peer structure would
1482 * never be cleaned up.
1484 peer = rxi_FindPeer(*host, *port, 0, 0);
1485 /* Since this may not be associated with a connection,
1486 * it may have no refCount, meaning we could race with
1489 if (peer && (peer->refCount > 0)) {
1490 MUTEX_ENTER(&peer->peer_lock);
1491 hadd32(peer->bytesReceived, p->length);
1492 MUTEX_EXIT(&peer->peer_lock);
1496 /* Free any empty packet buffers at the end of this packet */
1497 rxi_TrimDataBufs(p, 1);
1503 #endif /* !KERNEL || UKERNEL */
1505 /* This function splits off the first packet in a jumbo packet.
1506 * As of AFS 3.5, jumbograms contain more than one fixed size
1507 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1508 * last packet header. All packets (except the last) are padded to
1509 * fall on RX_CBUFFERSIZE boundaries.
1510 * HACK: We store the length of the first n-1 packets in the
1511 * last two pad bytes. */
1514 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1517 struct rx_packet *np;
1518 struct rx_jumboHeader *jp;
1524 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1525 * bytes in length. All but the first packet are preceded by
1526 * an abbreviated four byte header. The length of the last packet
1527 * is calculated from the size of the jumbogram. */
1528 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1530 if ((int)p->length < length) {
1531 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1534 niov = p->niovecs - 2;
1536 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1539 iov = &p->wirevec[2];
1540 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1542 /* Get a pointer to the abbreviated packet header */
1543 jp = (struct rx_jumboHeader *)
1544 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1546 /* Set up the iovecs for the next packet */
1547 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1548 np->wirevec[0].iov_len = sizeof(struct rx_header);
1549 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1550 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1551 np->niovecs = niov + 1;
1552 for (i = 2, iov++; i <= niov; i++, iov++) {
1553 np->wirevec[i] = *iov;
1555 np->length = p->length - length;
1556 p->length = RX_JUMBOBUFFERSIZE;
1559 /* Convert the jumbo packet header to host byte order */
1560 temp = ntohl(*(afs_uint32 *) jp);
1561 jp->flags = (u_char) (temp >> 24);
1562 jp->cksum = (u_short) (temp);
1564 /* Fill in the packet header */
1565 np->header = p->header;
1566 np->header.serial = p->header.serial + 1;
1567 np->header.seq = p->header.seq + 1;
1568 np->header.flags = jp->flags;
1569 np->header.spare = jp->cksum;
1575 /* Send a udp datagram */
1577 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1578 int length, int istack)
1583 memset(&msg, 0, sizeof(msg));
1585 msg.msg_iovlen = nvecs;
1586 msg.msg_name = addr;
1587 msg.msg_namelen = sizeof(struct sockaddr_in);
1589 ret = rxi_Sendmsg(socket, &msg, 0);
1593 #elif !defined(UKERNEL)
1595 * message receipt is done in rxk_input or rx_put.
1598 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1600 * Copy an mblock to the contiguous area pointed to by cp.
1601 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1602 * but it doesn't really.
1603 * Returns the number of bytes not transferred.
1604 * The message is NOT changed.
1607 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1611 for (; mp && len > 0; mp = mp->b_cont) {
1612 if (mp->b_datap->db_type != M_DATA) {
1615 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1616 memcpy(cp, (char *)mp->b_rptr, n);
1624 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1625 * but it doesn't really.
1626 * This sucks, anyway, do it like m_cpy.... below
1629 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1632 register int m, n, o, t, i;
1634 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1635 if (mp->b_datap->db_type != M_DATA) {
1638 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1644 t = iovs[i].iov_len;
1647 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1657 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1658 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1660 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1662 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1665 unsigned int l1, l2, i, t;
1667 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1668 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1671 if (m->m_len <= off) {
1681 p1 = mtod(m, caddr_t) + off;
1682 l1 = m->m_len - off;
1684 p2 = iovs[0].iov_base;
1685 l2 = iovs[0].iov_len;
1688 t = MIN(l1, MIN(l2, (unsigned int)len));
1699 p1 = mtod(m, caddr_t);
1705 p2 = iovs[i].iov_base;
1706 l2 = iovs[i].iov_len;
1714 #endif /* AFS_SUN5_ENV */
1716 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1718 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1719 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1725 struct rx_packet *phandle;
1726 int hdr_len, data_len;
1731 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1738 #endif /*KERNEL && !UKERNEL */
1741 /* send a response to a debug packet */
1744 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1745 afs_int32 ahost, short aport, int istack)
1747 struct rx_debugIn tin;
1749 struct rx_serverQueueEntry *np, *nqe;
1752 * Only respond to client-initiated Rx debug packets,
1753 * and clear the client flag in the response.
1755 if (ap->header.flags & RX_CLIENT_INITIATED) {
1756 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1757 rxi_EncodePacketHeader(ap);
1762 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1763 /* all done with packet, now set length to the truth, so we can
1764 * reuse this packet */
1765 rx_computelen(ap, ap->length);
1767 tin.type = ntohl(tin.type);
1768 tin.index = ntohl(tin.index);
1770 case RX_DEBUGI_GETSTATS:{
1771 struct rx_debugStats tstat;
1773 /* get basic stats */
1774 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1775 tstat.version = RX_DEBUGI_VERSION;
1776 #ifndef RX_ENABLE_LOCKS
1777 tstat.waitingForPackets = rx_waitingForPackets;
1779 MUTEX_ENTER(&rx_serverPool_lock);
1780 tstat.nFreePackets = htonl(rx_nFreePackets);
1781 tstat.nPackets = htonl(rx_nPackets);
1782 tstat.callsExecuted = htonl(rxi_nCalls);
1783 tstat.packetReclaims = htonl(rx_packetReclaims);
1784 tstat.usedFDs = CountFDs(64);
1785 tstat.nWaiting = htonl(rx_nWaiting);
1786 tstat.nWaited = htonl(rx_nWaited);
1787 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1789 MUTEX_EXIT(&rx_serverPool_lock);
1790 tstat.idleThreads = htonl(tstat.idleThreads);
1791 tl = sizeof(struct rx_debugStats) - ap->length;
1793 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1796 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1798 ap->length = sizeof(struct rx_debugStats);
1799 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1800 rx_computelen(ap, ap->length);
1805 case RX_DEBUGI_GETALLCONN:
1806 case RX_DEBUGI_GETCONN:{
1808 register struct rx_connection *tc;
1809 struct rx_call *tcall;
1810 struct rx_debugConn tconn;
1811 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1814 tl = sizeof(struct rx_debugConn) - ap->length;
1816 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1820 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1821 /* get N'th (maybe) "interesting" connection info */
1822 for (i = 0; i < rx_hashTableSize; i++) {
1823 #if !defined(KERNEL)
1824 /* the time complexity of the algorithm used here
1825 * exponentially increses with the number of connections.
1827 #ifdef AFS_PTHREAD_ENV
1833 RWLOCK_RDLOCK(&rx_connHashTable_lock);
1834 /* We might be slightly out of step since we are not
1835 * locking each call, but this is only debugging output.
1837 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1838 if ((all || rxi_IsConnInteresting(tc))
1839 && tin.index-- <= 0) {
1840 tconn.host = tc->peer->host;
1841 tconn.port = tc->peer->port;
1842 tconn.cid = htonl(tc->cid);
1843 tconn.epoch = htonl(tc->epoch);
1844 tconn.serial = htonl(tc->serial);
1845 for (j = 0; j < RX_MAXCALLS; j++) {
1846 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1847 if ((tcall = tc->call[j])) {
1848 tconn.callState[j] = tcall->state;
1849 tconn.callMode[j] = tcall->mode;
1850 tconn.callFlags[j] = tcall->flags;
1851 if (queue_IsNotEmpty(&tcall->rq))
1852 tconn.callOther[j] |= RX_OTHER_IN;
1853 if (queue_IsNotEmpty(&tcall->tq))
1854 tconn.callOther[j] |= RX_OTHER_OUT;
1856 tconn.callState[j] = RX_STATE_NOTINIT;
1859 tconn.natMTU = htonl(tc->peer->natMTU);
1860 tconn.error = htonl(tc->error);
1861 tconn.flags = tc->flags;
1862 tconn.type = tc->type;
1863 tconn.securityIndex = tc->securityIndex;
1864 if (tc->securityObject) {
1865 RXS_GetStats(tc->securityObject, tc,
1867 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1868 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1871 DOHTONL(packetsReceived);
1872 DOHTONL(packetsSent);
1873 DOHTONL(bytesReceived);
1877 sizeof(tconn.secStats.spares) /
1882 sizeof(tconn.secStats.sparel) /
1883 sizeof(afs_int32); i++)
1887 RWLOCK_UNLOCK(&rx_connHashTable_lock);
1888 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1891 ap->length = sizeof(struct rx_debugConn);
1892 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1898 RWLOCK_UNLOCK(&rx_connHashTable_lock);
1900 /* if we make it here, there are no interesting packets */
1901 tconn.cid = htonl(0xffffffff); /* means end */
1902 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1905 ap->length = sizeof(struct rx_debugConn);
1906 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1912 * Pass back all the peer structures we have available
1915 case RX_DEBUGI_GETPEER:{
1917 register struct rx_peer *tp;
1918 struct rx_debugPeer tpeer;
1921 tl = sizeof(struct rx_debugPeer) - ap->length;
1923 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1927 memset((char *)&tpeer, 0, sizeof(tpeer));
1928 for (i = 0; i < rx_hashTableSize; i++) {
1929 #if !defined(KERNEL)
1930 /* the time complexity of the algorithm used here
1931 * exponentially increses with the number of peers.
1933 * Yielding after processing each hash table entry
1934 * and dropping rx_peerHashTable_lock.
1935 * also increases the risk that we will miss a new
1936 * entry - but we are willing to live with this
1937 * limitation since this is meant for debugging only
1939 #ifdef AFS_PTHREAD_ENV
1945 RWLOCK_RDLOCK(&rx_peerHashTable_lock);
1946 /* XXX should copy out, then unlock and byteswap */
1947 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1948 if (tin.index-- <= 0) {
1949 tpeer.host = tp->host;
1950 tpeer.port = tp->port;
1951 tpeer.ifMTU = htons(tp->ifMTU);
1952 tpeer.idleWhen = htonl(tp->idleWhen);
1953 tpeer.refCount = htons(tp->refCount);
1954 tpeer.burstSize = tp->burstSize;
1955 tpeer.burst = tp->burst;
1956 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1957 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1958 tpeer.rtt = htonl(tp->rtt);
1959 tpeer.rtt_dev = htonl(tp->rtt_dev);
1960 tpeer.timeout.sec = htonl(tp->timeout.sec);
1961 tpeer.timeout.usec = htonl(tp->timeout.usec);
1962 tpeer.nSent = htonl(tp->nSent);
1963 tpeer.reSends = htonl(tp->reSends);
1964 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1965 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1966 tpeer.rateFlag = htonl(tp->rateFlag);
1967 tpeer.natMTU = htons(tp->natMTU);
1968 tpeer.maxMTU = htons(tp->maxMTU);
1969 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1970 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1971 tpeer.MTU = htons(tp->MTU);
1972 tpeer.cwind = htons(tp->cwind);
1973 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1974 tpeer.congestSeq = htons(tp->congestSeq);
1975 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1976 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1977 tpeer.bytesReceived.high =
1978 htonl(tp->bytesReceived.high);
1979 tpeer.bytesReceived.low =
1980 htonl(tp->bytesReceived.low);
1982 RWLOCK_UNLOCK(&rx_peerHashTable_lock);
1983 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1986 ap->length = sizeof(struct rx_debugPeer);
1987 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1993 RWLOCK_UNLOCK(&rx_peerHashTable_lock);
1995 /* if we make it here, there are no interesting packets */
1996 tpeer.host = htonl(0xffffffff); /* means end */
1997 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2000 ap->length = sizeof(struct rx_debugPeer);
2001 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2006 case RX_DEBUGI_RXSTATS:{
2010 tl = sizeof(rx_stats) - ap->length;
2012 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2016 /* Since its all int32s convert to network order with a loop. */
2017 MUTEX_ENTER(&rx_stats_mutex);
2018 s = (afs_int32 *) & rx_stats;
2019 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2020 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2023 ap->length = sizeof(rx_stats);
2024 MUTEX_EXIT(&rx_stats_mutex);
2025 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2031 /* error response packet */
2032 tin.type = htonl(RX_DEBUGI_BADTYPE);
2033 tin.index = tin.type;
2034 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2036 ap->length = sizeof(struct rx_debugIn);
2037 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2045 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2046 afs_int32 ahost, short aport, int istack)
2051 * Only respond to client-initiated version requests, and
2052 * clear that flag in the response.
2054 if (ap->header.flags & RX_CLIENT_INITIATED) {
2057 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2058 rxi_EncodePacketHeader(ap);
2059 memset(buf, 0, sizeof(buf));
2060 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2061 rx_packetwrite(ap, 0, 65, buf);
2064 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2072 /* send a debug packet back to the sender */
2074 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2075 afs_int32 ahost, short aport, afs_int32 istack)
2077 struct sockaddr_in taddr;
2083 int waslocked = ISAFS_GLOCK();
2086 taddr.sin_family = AF_INET;
2087 taddr.sin_port = aport;
2088 taddr.sin_addr.s_addr = ahost;
2089 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2090 taddr.sin_len = sizeof(struct sockaddr_in);
2093 /* We need to trim the niovecs. */
2094 nbytes = apacket->length;
2095 for (i = 1; i < apacket->niovecs; i++) {
2096 if (nbytes <= apacket->wirevec[i].iov_len) {
2097 savelen = apacket->wirevec[i].iov_len;
2098 saven = apacket->niovecs;
2099 apacket->wirevec[i].iov_len = nbytes;
2100 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2102 nbytes -= apacket->wirevec[i].iov_len;
2105 #ifdef RX_KERNEL_TRACE
2106 if (ICL_SETACTIVE(afs_iclSetp)) {
2109 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2110 "before osi_NetSend()");
2118 /* debug packets are not reliably delivered, hence the cast below. */
2119 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2120 apacket->length + RX_HEADER_SIZE, istack);
2122 #ifdef RX_KERNEL_TRACE
2123 if (ICL_SETACTIVE(afs_iclSetp)) {
2125 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2126 "after osi_NetSend()");
2135 if (saven) { /* means we truncated the packet above. */
2136 apacket->wirevec[i - 1].iov_len = savelen;
2137 apacket->niovecs = saven;
2142 /* Send the packet to appropriate destination for the specified
2143 * call. The header is first encoded and placed in the packet.
2146 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2147 struct rx_packet *p, int istack)
2153 struct sockaddr_in addr;
2154 register struct rx_peer *peer = conn->peer;
2157 char deliveryType = 'S';
2159 /* The address we're sending the packet to */
2160 memset(&addr, 0, sizeof(addr));
2161 addr.sin_family = AF_INET;
2162 addr.sin_port = peer->port;
2163 addr.sin_addr.s_addr = peer->host;
2165 /* This stuff should be revamped, I think, so that most, if not
2166 * all, of the header stuff is always added here. We could
2167 * probably do away with the encode/decode routines. XXXXX */
2169 /* Stamp each packet with a unique serial number. The serial
2170 * number is maintained on a connection basis because some types
2171 * of security may be based on the serial number of the packet,
2172 * and security is handled on a per authenticated-connection
2174 /* Pre-increment, to guarantee no zero serial number; a zero
2175 * serial number means the packet was never sent. */
2176 MUTEX_ENTER(&conn->conn_data_lock);
2177 p->header.serial = ++conn->serial;
2178 MUTEX_EXIT(&conn->conn_data_lock);
2179 /* This is so we can adjust retransmit time-outs better in the face of
2180 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2182 if (p->firstSerial == 0) {
2183 p->firstSerial = p->header.serial;
2186 /* If an output tracer function is defined, call it with the packet and
2187 * network address. Note this function may modify its arguments. */
2188 if (rx_almostSent) {
2189 int drop = (*rx_almostSent) (p, &addr);
2190 /* drop packet if return value is non-zero? */
2192 deliveryType = 'D'; /* Drop the packet */
2196 /* Get network byte order header */
2197 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2198 * touch ALL the fields */
2200 /* Send the packet out on the same socket that related packets are being
2204 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2207 /* Possibly drop this packet, for testing purposes */
2208 if ((deliveryType == 'D')
2209 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2210 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2211 deliveryType = 'D'; /* Drop the packet */
2213 deliveryType = 'S'; /* Send the packet */
2214 #endif /* RXDEBUG */
2216 /* Loop until the packet is sent. We'd prefer just to use a
2217 * blocking socket, but unfortunately the interface doesn't
2218 * allow us to have the socket block in send mode, and not
2219 * block in receive mode */
2221 waslocked = ISAFS_GLOCK();
2222 #ifdef RX_KERNEL_TRACE
2223 if (ICL_SETACTIVE(afs_iclSetp)) {
2226 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2227 "before osi_NetSend()");
2236 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2237 p->length + RX_HEADER_SIZE, istack)) != 0) {
2238 /* send failed, so let's hurry up the resend, eh? */
2239 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2240 p->retryTime = p->timeSent; /* resend it very soon */
2241 clock_Addmsec(&(p->retryTime),
2242 10 + (((afs_uint32) p->backoff) << 8));
2243 /* Some systems are nice and tell us right away that we cannot
2244 * reach this recipient by returning an error code.
2245 * So, when this happens let's "down" the host NOW so
2246 * we don't sit around waiting for this host to timeout later.
2250 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2251 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2252 code == -ENETUNREACH
2253 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2254 code == EHOSTUNREACH
2259 call->lastReceiveTime = 0;
2262 #ifdef RX_KERNEL_TRACE
2263 if (ICL_SETACTIVE(afs_iclSetp)) {
2265 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2266 "after osi_NetSend()");
2277 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2279 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2280 MUTEX_ENTER(&peer->peer_lock);
2281 hadd32(peer->bytesSent, p->length);
2282 MUTEX_EXIT(&peer->peer_lock);
2285 /* Send a list of packets to appropriate destination for the specified
2286 * connection. The headers are first encoded and placed in the packets.
2289 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2290 struct rx_packet **list, int len, int istack)
2292 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2295 struct sockaddr_in addr;
2296 register struct rx_peer *peer = conn->peer;
2298 struct rx_packet *p = NULL;
2299 struct iovec wirevec[RX_MAXIOVECS];
2300 int i, length, code;
2303 struct rx_jumboHeader *jp;
2305 char deliveryType = 'S';
2307 /* The address we're sending the packet to */
2308 addr.sin_family = AF_INET;
2309 addr.sin_port = peer->port;
2310 addr.sin_addr.s_addr = peer->host;
2312 if (len + 1 > RX_MAXIOVECS) {
2313 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2317 * Stamp the packets in this jumbogram with consecutive serial numbers
2319 MUTEX_ENTER(&conn->conn_data_lock);
2320 serial = conn->serial;
2321 conn->serial += len;
2322 MUTEX_EXIT(&conn->conn_data_lock);
2325 /* This stuff should be revamped, I think, so that most, if not
2326 * all, of the header stuff is always added here. We could
2327 * probably do away with the encode/decode routines. XXXXX */
2330 length = RX_HEADER_SIZE;
2331 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2332 wirevec[0].iov_len = RX_HEADER_SIZE;
2333 for (i = 0; i < len; i++) {
2336 /* The whole 3.5 jumbogram scheme relies on packets fitting
2337 * in a single packet buffer. */
2338 if (p->niovecs > 2) {
2339 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2342 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2345 if (p->length != RX_JUMBOBUFFERSIZE) {
2346 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2348 p->header.flags |= RX_JUMBO_PACKET;
2349 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2350 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2352 wirevec[i + 1].iov_len = p->length;
2353 length += p->length;
2355 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2357 /* Convert jumbo packet header to network byte order */
2358 temp = (afs_uint32) (p->header.flags) << 24;
2359 temp |= (afs_uint32) (p->header.spare);
2360 *(afs_uint32 *) jp = htonl(temp);
2362 jp = (struct rx_jumboHeader *)
2363 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2365 /* Stamp each packet with a unique serial number. The serial
2366 * number is maintained on a connection basis because some types
2367 * of security may be based on the serial number of the packet,
2368 * and security is handled on a per authenticated-connection
2370 /* Pre-increment, to guarantee no zero serial number; a zero
2371 * serial number means the packet was never sent. */
2372 p->header.serial = ++serial;
2373 /* This is so we can adjust retransmit time-outs better in the face of
2374 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2376 if (p->firstSerial == 0) {
2377 p->firstSerial = p->header.serial;
2380 /* If an output tracer function is defined, call it with the packet and
2381 * network address. Note this function may modify its arguments. */
2382 if (rx_almostSent) {
2383 int drop = (*rx_almostSent) (p, &addr);
2384 /* drop packet if return value is non-zero? */
2386 deliveryType = 'D'; /* Drop the packet */
2390 /* Get network byte order header */
2391 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2392 * touch ALL the fields */
2395 /* Send the packet out on the same socket that related packets are being
2399 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2402 /* Possibly drop this packet, for testing purposes */
2403 if ((deliveryType == 'D')
2404 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2405 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2406 deliveryType = 'D'; /* Drop the packet */
2408 deliveryType = 'S'; /* Send the packet */
2409 #endif /* RXDEBUG */
2411 /* Loop until the packet is sent. We'd prefer just to use a
2412 * blocking socket, but unfortunately the interface doesn't
2413 * allow us to have the socket block in send mode, and not
2414 * block in receive mode */
2415 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2416 waslocked = ISAFS_GLOCK();
2417 if (!istack && waslocked)
2421 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2423 /* send failed, so let's hurry up the resend, eh? */
2424 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2425 for (i = 0; i < len; i++) {
2427 p->retryTime = p->timeSent; /* resend it very soon */
2428 clock_Addmsec(&(p->retryTime),
2429 10 + (((afs_uint32) p->backoff) << 8));
2431 /* Some systems are nice and tell us right away that we cannot
2432 * reach this recipient by returning an error code.
2433 * So, when this happens let's "down" the host NOW so
2434 * we don't sit around waiting for this host to timeout later.
2438 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2439 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2440 code == -ENETUNREACH
2441 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2442 code == EHOSTUNREACH
2447 call->lastReceiveTime = 0;
2449 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2450 if (!istack && waslocked)
2458 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2461 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2462 MUTEX_ENTER(&peer->peer_lock);
2463 hadd32(peer->bytesSent, p->length);
2464 MUTEX_EXIT(&peer->peer_lock);
2468 /* Send a "special" packet to the peer connection. If call is
2469 * specified, then the packet is directed to a specific call channel
2470 * associated with the connection, otherwise it is directed to the
2471 * connection only. Uses optionalPacket if it is supplied, rather than
2472 * allocating a new packet buffer. Nbytes is the length of the data
2473 * portion of the packet. If data is non-null, nbytes of data are
2474 * copied into the packet. Type is the type of the packet, as defined
2475 * in rx.h. Bug: there's a lot of duplication between this and other
2476 * routines. This needs to be cleaned up. */
2478 rxi_SendSpecial(register struct rx_call *call,
2479 register struct rx_connection *conn,
2480 struct rx_packet *optionalPacket, int type, char *data,
2481 int nbytes, int istack)
2483 /* Some of the following stuff should be common code for all
2484 * packet sends (it's repeated elsewhere) */
2485 register struct rx_packet *p;
2487 int savelen = 0, saven = 0;
2488 int channel, callNumber;
2490 channel = call->channel;
2491 callNumber = *call->callNumber;
2492 /* BUSY packets refer to the next call on this connection */
2493 if (type == RX_PACKET_TYPE_BUSY) {
2502 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2504 osi_Panic("rxi_SendSpecial failure");
2511 p->header.serviceId = conn->serviceId;
2512 p->header.securityIndex = conn->securityIndex;
2513 p->header.cid = (conn->cid | channel);
2514 p->header.callNumber = callNumber;
2516 p->header.epoch = conn->epoch;
2517 p->header.type = type;
2518 p->header.flags = 0;
2519 if (conn->type == RX_CLIENT_CONNECTION)
2520 p->header.flags |= RX_CLIENT_INITIATED;
2522 rx_packetwrite(p, 0, nbytes, data);
2524 for (i = 1; i < p->niovecs; i++) {
2525 if (nbytes <= p->wirevec[i].iov_len) {
2526 savelen = p->wirevec[i].iov_len;
2528 p->wirevec[i].iov_len = nbytes;
2529 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2531 nbytes -= p->wirevec[i].iov_len;
2535 rxi_Send(call, p, istack);
2537 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2538 if (saven) { /* means we truncated the packet above. We probably don't */
2539 /* really need to do this, but it seems safer this way, given that */
2540 /* sneaky optionalPacket... */
2541 p->wirevec[i - 1].iov_len = savelen;
2544 if (!optionalPacket)
2546 return optionalPacket;
2550 /* Encode the packet's header (from the struct header in the packet to
2551 * the net byte order representation in the wire representation of the
2552 * packet, which is what is actually sent out on the wire) */
2554 rxi_EncodePacketHeader(register struct rx_packet *p)
2556 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2558 memset((char *)buf, 0, RX_HEADER_SIZE);
2559 *buf++ = htonl(p->header.epoch);
2560 *buf++ = htonl(p->header.cid);
2561 *buf++ = htonl(p->header.callNumber);
2562 *buf++ = htonl(p->header.seq);
2563 *buf++ = htonl(p->header.serial);
2564 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2565 | (((afs_uint32) p->header.flags) << 16)
2566 | (p->header.userStatus << 8) | p->header.securityIndex);
2567 /* Note: top 16 bits of this next word were reserved */
2568 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2571 /* Decode the packet's header (from net byte order to a struct header) */
2573 rxi_DecodePacketHeader(register struct rx_packet *p)
2575 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2578 p->header.epoch = ntohl(*buf);
2580 p->header.cid = ntohl(*buf);
2582 p->header.callNumber = ntohl(*buf);
2584 p->header.seq = ntohl(*buf);
2586 p->header.serial = ntohl(*buf);
2592 /* C will truncate byte fields to bytes for me */
2593 p->header.type = temp >> 24;
2594 p->header.flags = temp >> 16;
2595 p->header.userStatus = temp >> 8;
2596 p->header.securityIndex = temp >> 0;
2601 p->header.serviceId = (temp & 0xffff);
2602 p->header.spare = temp >> 16;
2603 /* Note: top 16 bits of this last word are the security checksum */
2607 rxi_PrepareSendPacket(register struct rx_call *call,
2608 register struct rx_packet *p, register int last)
2610 register struct rx_connection *conn = call->conn;
2612 ssize_t len; /* len must be a signed type; it can go negative */
2614 p->flags &= ~RX_PKTFLAG_ACKED;
2615 p->header.cid = (conn->cid | call->channel);
2616 p->header.serviceId = conn->serviceId;
2617 p->header.securityIndex = conn->securityIndex;
2619 /* No data packets on call 0. Where do these come from? */
2620 if (*call->callNumber == 0)
2621 *call->callNumber = 1;
2623 p->header.callNumber = *call->callNumber;
2624 p->header.seq = call->tnext++;
2625 p->header.epoch = conn->epoch;
2626 p->header.type = RX_PACKET_TYPE_DATA;
2627 p->header.flags = 0;
2628 p->header.spare = 0;
2629 if (conn->type == RX_CLIENT_CONNECTION)
2630 p->header.flags |= RX_CLIENT_INITIATED;
2633 p->header.flags |= RX_LAST_PACKET;
2635 clock_Zero(&p->retryTime); /* Never yet transmitted */
2636 clock_Zero(&p->firstSent); /* Never yet transmitted */
2637 p->header.serial = 0; /* Another way of saying never transmitted... */
2640 /* Now that we're sure this is the last data on the call, make sure
2641 * that the "length" and the sum of the iov_lens matches. */
2642 len = p->length + call->conn->securityHeaderSize;
2644 for (i = 1; i < p->niovecs && len > 0; i++) {
2645 len -= p->wirevec[i].iov_len;
2648 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2649 } else if (i < p->niovecs) {
2650 /* Free any extra elements in the wirevec */
2651 #if defined(RX_ENABLE_TSFPQ)
2652 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2653 #else /* !RX_ENABLE_TSFPQ */
2654 MUTEX_ENTER(&rx_freePktQ_lock);
2655 rxi_FreeDataBufsNoLock(p, i);
2656 MUTEX_EXIT(&rx_freePktQ_lock);
2657 #endif /* !RX_ENABLE_TSFPQ */
2661 p->wirevec[i - 1].iov_len += len;
2662 RXS_PreparePacket(conn->securityObject, call, p);
2665 /* Given an interface MTU size, calculate an adjusted MTU size that
2666 * will make efficient use of the RX buffers when the peer is sending
2667 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2669 rxi_AdjustIfMTU(int mtu)
2674 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2676 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2677 if (mtu <= adjMTU) {
2684 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2685 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2688 /* Given an interface MTU size, and the peer's advertised max receive
2689 * size, calculate an adjisted maxMTU size that makes efficient use
2690 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2692 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2694 int maxMTU = mtu * rxi_nSendFrags;
2695 maxMTU = MIN(maxMTU, peerMaxMTU);
2696 return rxi_AdjustIfMTU(maxMTU);
2699 /* Given a packet size, figure out how many datagram packet will fit.
2700 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2701 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2702 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2704 rxi_AdjustDgramPackets(int frags, int mtu)
2707 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2710 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2711 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2712 /* subtract the size of the first and last packets */
2713 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2717 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));