2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
20 #include "afs/sysincludes.h"
21 #include "afsincludes.h"
22 #include "rx/rx_kcommon.h"
23 #include "rx/rx_clock.h"
24 #include "rx/rx_queue.h"
25 #include "rx/rx_packet.h"
26 #else /* defined(UKERNEL) */
27 #ifdef RX_KERNEL_TRACE
28 #include "../rx/rx_kcommon.h"
31 #ifndef AFS_LINUX20_ENV
34 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
35 #include "afs/sysincludes.h"
37 #if defined(AFS_OBSD_ENV)
41 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
42 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
43 #include "sys/mount.h" /* it gets pulled in by something later anyway */
47 #include "netinet/in.h"
48 #include "afs/afs_osi.h"
49 #include "rx_kmutex.h"
50 #include "rx/rx_clock.h"
51 #include "rx/rx_queue.h"
53 #include <sys/sysmacros.h>
55 #include "rx/rx_packet.h"
56 #endif /* defined(UKERNEL) */
57 #include "rx/rx_globals.h"
59 #include "sys/types.h"
62 #if defined(AFS_NT40_ENV)
65 #define EWOULDBLOCK WSAEWOULDBLOCK
68 #include "rx_xmit_nt.h"
71 #include <sys/socket.h>
72 #include <netinet/in.h>
78 #include <sys/sysmacros.h>
80 #include "rx_packet.h"
81 #include "rx_globals.h"
91 /* rxdb_fileID is used to identify the lock location, along with line#. */
92 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
93 #endif /* RX_LOCKS_DB */
94 static struct rx_packet *rx_mallocedP = 0;
96 static afs_uint32 rx_packet_id = 0;
99 extern char cml_version_number[];
101 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
103 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
104 afs_int32 ahost, short aport,
107 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
109 struct rx_queue * q);
110 #ifdef RX_ENABLE_TSFPQ
112 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
115 /* some rules about packets:
116 * 1. When a packet is allocated, the final iov_buf contains room for
117 * a security trailer, but iov_len masks that fact. If the security
118 * package wants to add the trailer, it may do so, and then extend
119 * iov_len appropriately. For this reason, packet's niovecs and
120 * iov_len fields should be accurate before calling PreparePacket.
124 * all packet buffers (iov_base) are integral multiples of
126 * offset is an integral multiple of the word size.
129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
133 for (l = 0, i = 1; i < packet->niovecs; i++) {
134 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the word size.
147 * offset is an integral multiple of the word size.
150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
154 for (l = 0, i = 1; i < packet->niovecs; i++) {
155 if (l + packet->wirevec[i].iov_len > offset) {
156 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
157 (offset - l))) = data;
160 l += packet->wirevec[i].iov_len;
167 * all packet buffers (iov_base) are integral multiples of the
169 * offset is an integral multiple of the word size.
171 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
177 unsigned int i, j, l, r;
178 for (l = 0, i = 1; i < packet->niovecs; i++) {
179 if (l + packet->wirevec[i].iov_len > offset) {
182 l += packet->wirevec[i].iov_len;
185 /* i is the iovec which contains the first little bit of data in which we
186 * are interested. l is the total length of everything prior to this iovec.
187 * j is the number of bytes we can safely copy out of this iovec.
188 * offset only applies to the first iovec.
191 while ((resid > 0) && (i < packet->niovecs)) {
192 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
193 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
196 l += packet->wirevec[i].iov_len;
201 return (resid ? (r - resid) : r);
206 * all packet buffers (iov_base) are integral multiples of the
208 * offset is an integral multiple of the word size.
211 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
216 for (l = 0, i = 1; i < packet->niovecs; i++) {
217 if (l + packet->wirevec[i].iov_len > offset) {
220 l += packet->wirevec[i].iov_len;
223 /* i is the iovec which contains the first little bit of data in which we
224 * are interested. l is the total length of everything prior to this iovec.
225 * j is the number of bytes we can safely copy out of this iovec.
226 * offset only applies to the first iovec.
229 while ((resid > 0) && (i <= RX_MAXWVECS)) {
230 if (i >= packet->niovecs)
231 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
234 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
235 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
239 l += packet->wirevec[i].iov_len;
244 return (resid ? (r - resid) : r);
248 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
250 struct rx_packet *p, *np;
252 num_pkts = AllocPacketBufs(class, num_pkts, q);
254 for (queue_Scan(q, p, np, rx_packet)) {
255 RX_PACKET_IOV_FULLINIT(p);
261 #ifdef RX_ENABLE_TSFPQ
263 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
265 struct rx_ts_info_t * rx_ts_info;
269 RX_TS_INFO_GET(rx_ts_info);
271 transfer = num_pkts - rx_ts_info->_FPQ.len;
274 MUTEX_ENTER(&rx_freePktQ_lock);
275 transfer = MAX(transfer, rx_TSFPQGlobSize);
276 if (transfer > rx_nFreePackets) {
277 /* alloc enough for us, plus a few globs for other threads */
278 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
281 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
283 MUTEX_EXIT(&rx_freePktQ_lock);
287 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
291 #else /* RX_ENABLE_TSFPQ */
293 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
304 MUTEX_ENTER(&rx_freePktQ_lock);
307 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
308 num_pkts--, overq++);
311 rxi_NeedMorePackets = TRUE;
312 if (rx_stats_active) {
314 case RX_PACKET_CLASS_RECEIVE:
315 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
317 case RX_PACKET_CLASS_SEND:
318 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
320 case RX_PACKET_CLASS_SPECIAL:
321 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_RECV_CBUF:
324 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SEND_CBUF:
327 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
333 if (rx_nFreePackets < num_pkts)
334 num_pkts = rx_nFreePackets;
337 rxi_NeedMorePackets = TRUE;
341 if (rx_nFreePackets < num_pkts) {
342 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
346 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
348 i++, c=queue_Next(c, rx_packet)) {
352 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
354 rx_nFreePackets -= num_pkts;
359 MUTEX_EXIT(&rx_freePktQ_lock);
364 #endif /* RX_ENABLE_TSFPQ */
367 * Free a packet currently used as a continuation buffer
369 #ifdef RX_ENABLE_TSFPQ
370 /* num_pkts=0 means queue length is unknown */
372 rxi_FreePackets(int num_pkts, struct rx_queue * q)
374 struct rx_ts_info_t * rx_ts_info;
375 struct rx_packet *c, *nc;
378 osi_Assert(num_pkts >= 0);
379 RX_TS_INFO_GET(rx_ts_info);
382 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
383 rxi_FreeDataBufsTSFPQ(c, 2, 0);
386 for (queue_Scan(q, c, nc, rx_packet)) {
387 rxi_FreeDataBufsTSFPQ(c, 2, 0);
392 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
395 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
397 MUTEX_ENTER(&rx_freePktQ_lock);
399 RX_TS_FPQ_LTOG(rx_ts_info);
401 /* Wakeup anyone waiting for packets */
404 MUTEX_EXIT(&rx_freePktQ_lock);
410 #else /* RX_ENABLE_TSFPQ */
411 /* num_pkts=0 means queue length is unknown */
413 rxi_FreePackets(int num_pkts, struct rx_queue *q)
416 struct rx_packet *p, *np;
420 osi_Assert(num_pkts >= 0);
424 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
425 if (p->niovecs > 2) {
426 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
433 for (queue_Scan(q, p, np, rx_packet)) {
434 if (p->niovecs > 2) {
435 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
442 queue_SpliceAppend(q, &cbs);
448 MUTEX_ENTER(&rx_freePktQ_lock);
450 queue_SpliceAppend(&rx_freePacketQueue, q);
451 rx_nFreePackets += qlen;
453 /* Wakeup anyone waiting for packets */
456 MUTEX_EXIT(&rx_freePktQ_lock);
461 #endif /* RX_ENABLE_TSFPQ */
463 /* this one is kind of awful.
464 * In rxkad, the packet has been all shortened, and everything, ready for
465 * sending. All of a sudden, we discover we need some of that space back.
466 * This isn't terribly general, because it knows that the packets are only
467 * rounded up to the EBS (userdata + security header).
470 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
474 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
475 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
476 p->wirevec[i].iov_len += nb;
480 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
481 p->wirevec[i].iov_len += nb;
489 /* get sufficient space to store nb bytes of data (or more), and hook
490 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
491 * returns the number of bytes >0 which it failed to come up with.
492 * Don't need to worry about locking on packet, since only
493 * one thread can manipulate one at a time. Locking on continution
494 * packets is handled by AllocPacketBufs */
495 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
497 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
501 struct rx_packet *cb, *ncb;
503 /* compute the number of cbuf's we need */
504 nv = nb / RX_CBUFFERSIZE;
505 if ((nv * RX_CBUFFERSIZE) < nb)
507 if ((nv + p->niovecs) > RX_MAXWVECS)
508 nv = RX_MAXWVECS - p->niovecs;
512 /* allocate buffers */
514 nv = AllocPacketBufs(class, nv, &q);
516 /* setup packet iovs */
517 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
519 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
520 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
523 nb -= (nv * RX_CBUFFERSIZE);
524 p->length += (nv * RX_CBUFFERSIZE);
530 /* Add more packet buffers */
531 #ifdef RX_ENABLE_TSFPQ
533 rxi_MorePackets(int apackets)
535 struct rx_packet *p, *e;
536 struct rx_ts_info_t * rx_ts_info;
540 getme = apackets * sizeof(struct rx_packet);
541 p = (struct rx_packet *)osi_Alloc(getme);
544 PIN(p, getme); /* XXXXX */
545 memset((char *)p, 0, getme);
546 RX_TS_INFO_GET(rx_ts_info);
548 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
549 /* TSFPQ patch also needs to keep track of total packets */
551 MUTEX_ENTER(&rx_packets_mutex);
552 rx_nPackets += apackets;
553 RX_TS_FPQ_COMPUTE_LIMITS;
554 MUTEX_EXIT(&rx_packets_mutex);
556 for (e = p + apackets; p < e; p++) {
557 RX_PACKET_IOV_INIT(p);
560 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
563 MUTEX_ENTER(&rx_freePktQ_lock);
564 #ifdef RXDEBUG_PACKET
565 p->packetId = rx_packet_id++;
566 p->allNextp = rx_mallocedP;
567 #endif /* RXDEBUG_PACKET */
569 MUTEX_EXIT(&rx_freePktQ_lock);
572 rx_ts_info->_FPQ.delta += apackets;
574 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
576 MUTEX_ENTER(&rx_freePktQ_lock);
578 RX_TS_FPQ_LTOG(rx_ts_info);
579 rxi_NeedMorePackets = FALSE;
582 MUTEX_EXIT(&rx_freePktQ_lock);
586 #else /* RX_ENABLE_TSFPQ */
588 rxi_MorePackets(int apackets)
590 struct rx_packet *p, *e;
594 getme = apackets * sizeof(struct rx_packet);
595 p = (struct rx_packet *)osi_Alloc(getme);
598 PIN(p, getme); /* XXXXX */
599 memset((char *)p, 0, getme);
601 MUTEX_ENTER(&rx_freePktQ_lock);
603 for (e = p + apackets; p < e; p++) {
604 RX_PACKET_IOV_INIT(p);
605 p->flags |= RX_PKTFLAG_FREE;
608 queue_Append(&rx_freePacketQueue, p);
609 #ifdef RXDEBUG_PACKET
610 p->packetId = rx_packet_id++;
611 p->allNextp = rx_mallocedP;
612 #endif /* RXDEBUG_PACKET */
616 rx_nFreePackets += apackets;
617 rxi_NeedMorePackets = FALSE;
620 MUTEX_EXIT(&rx_freePktQ_lock);
623 #endif /* RX_ENABLE_TSFPQ */
625 #ifdef RX_ENABLE_TSFPQ
627 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
629 struct rx_packet *p, *e;
630 struct rx_ts_info_t * rx_ts_info;
634 getme = apackets * sizeof(struct rx_packet);
635 p = (struct rx_packet *)osi_Alloc(getme);
637 PIN(p, getme); /* XXXXX */
638 memset((char *)p, 0, getme);
639 RX_TS_INFO_GET(rx_ts_info);
641 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
642 /* TSFPQ patch also needs to keep track of total packets */
643 MUTEX_ENTER(&rx_packets_mutex);
644 rx_nPackets += apackets;
645 RX_TS_FPQ_COMPUTE_LIMITS;
646 MUTEX_EXIT(&rx_packets_mutex);
648 for (e = p + apackets; p < e; p++) {
649 RX_PACKET_IOV_INIT(p);
651 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
654 MUTEX_ENTER(&rx_freePktQ_lock);
655 #ifdef RXDEBUG_PACKET
656 p->packetId = rx_packet_id++;
657 p->allNextp = rx_mallocedP;
658 #endif /* RXDEBUG_PACKET */
660 MUTEX_EXIT(&rx_freePktQ_lock);
663 rx_ts_info->_FPQ.delta += apackets;
666 (num_keep_local < apackets)) {
668 MUTEX_ENTER(&rx_freePktQ_lock);
670 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
671 rxi_NeedMorePackets = FALSE;
674 MUTEX_EXIT(&rx_freePktQ_lock);
678 #endif /* RX_ENABLE_TSFPQ */
681 /* Add more packet buffers */
683 rxi_MorePacketsNoLock(int apackets)
685 #ifdef RX_ENABLE_TSFPQ
686 struct rx_ts_info_t * rx_ts_info;
687 #endif /* RX_ENABLE_TSFPQ */
688 struct rx_packet *p, *e;
691 /* allocate enough packets that 1/4 of the packets will be able
692 * to hold maximal amounts of data */
693 apackets += (apackets / 4)
694 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
696 getme = apackets * sizeof(struct rx_packet);
697 p = (struct rx_packet *)osi_Alloc(getme);
699 apackets -= apackets / 4;
700 osi_Assert(apackets > 0);
703 memset((char *)p, 0, getme);
705 #ifdef RX_ENABLE_TSFPQ
706 RX_TS_INFO_GET(rx_ts_info);
707 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
708 #endif /* RX_ENABLE_TSFPQ */
710 for (e = p + apackets; p < e; p++) {
711 RX_PACKET_IOV_INIT(p);
712 p->flags |= RX_PKTFLAG_FREE;
715 queue_Append(&rx_freePacketQueue, p);
716 #ifdef RXDEBUG_PACKET
717 p->packetId = rx_packet_id++;
718 p->allNextp = rx_mallocedP;
719 #endif /* RXDEBUG_PACKET */
723 rx_nFreePackets += apackets;
724 #ifdef RX_ENABLE_TSFPQ
725 /* TSFPQ patch also needs to keep track of total packets */
726 MUTEX_ENTER(&rx_packets_mutex);
727 rx_nPackets += apackets;
728 RX_TS_FPQ_COMPUTE_LIMITS;
729 MUTEX_EXIT(&rx_packets_mutex);
730 #endif /* RX_ENABLE_TSFPQ */
731 rxi_NeedMorePackets = FALSE;
737 rxi_FreeAllPackets(void)
739 /* must be called at proper interrupt level, etcetera */
740 /* MTUXXX need to free all Packets */
741 osi_Free(rx_mallocedP,
742 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
743 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
746 #ifdef RX_ENABLE_TSFPQ
748 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
750 struct rx_ts_info_t * rx_ts_info;
754 RX_TS_INFO_GET(rx_ts_info);
756 if (num_keep_local != rx_ts_info->_FPQ.len) {
758 MUTEX_ENTER(&rx_freePktQ_lock);
759 if (num_keep_local < rx_ts_info->_FPQ.len) {
760 xfer = rx_ts_info->_FPQ.len - num_keep_local;
761 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
764 xfer = num_keep_local - rx_ts_info->_FPQ.len;
765 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
766 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
767 if (rx_nFreePackets < xfer) {
768 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
770 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
772 MUTEX_EXIT(&rx_freePktQ_lock);
778 rxi_FlushLocalPacketsTSFPQ(void)
780 rxi_AdjustLocalPacketsTSFPQ(0, 0);
782 #endif /* RX_ENABLE_TSFPQ */
784 /* Allocate more packets iff we need more continuation buffers */
785 /* In kernel, can't page in memory with interrupts disabled, so we
786 * don't use the event mechanism. */
788 rx_CheckPackets(void)
790 if (rxi_NeedMorePackets) {
791 rxi_MorePackets(rx_initSendWindow);
795 /* In the packet freeing routine below, the assumption is that
796 we want all of the packets to be used equally frequently, so that we
797 don't get packet buffers paging out. It would be just as valid to
798 assume that we DO want them to page out if not many are being used.
799 In any event, we assume the former, and append the packets to the end
801 /* This explanation is bogus. The free list doesn't remain in any kind of
802 useful order for afs_int32: the packets in use get pretty much randomly scattered
803 across all the pages. In order to permit unused {packets,bufs} to page out, they
804 must be stored so that packets which are adjacent in memory are adjacent in the
805 free list. An array springs rapidly to mind.
808 /* Actually free the packet p. */
809 #ifdef RX_ENABLE_TSFPQ
811 rxi_FreePacketNoLock(struct rx_packet *p)
813 struct rx_ts_info_t * rx_ts_info;
814 dpf(("Free %lx\n", (unsigned long)p));
816 RX_TS_INFO_GET(rx_ts_info);
817 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
818 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
819 RX_TS_FPQ_LTOG(rx_ts_info);
822 #else /* RX_ENABLE_TSFPQ */
824 rxi_FreePacketNoLock(struct rx_packet *p)
826 dpf(("Free %lx\n", (unsigned long)p));
830 queue_Append(&rx_freePacketQueue, p);
832 #endif /* RX_ENABLE_TSFPQ */
834 #ifdef RX_ENABLE_TSFPQ
836 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
838 struct rx_ts_info_t * rx_ts_info;
839 dpf(("Free %lx\n", (unsigned long)p));
841 RX_TS_INFO_GET(rx_ts_info);
842 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
844 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
846 MUTEX_ENTER(&rx_freePktQ_lock);
848 RX_TS_FPQ_LTOG(rx_ts_info);
850 /* Wakeup anyone waiting for packets */
853 MUTEX_EXIT(&rx_freePktQ_lock);
857 #endif /* RX_ENABLE_TSFPQ */
860 * free continuation buffers off a packet into a queue
862 * [IN] p -- packet from which continuation buffers will be freed
863 * [IN] first -- iovec offset of first continuation buffer to free
864 * [IN] q -- queue into which continuation buffers will be chained
867 * number of continuation buffers freed
869 #ifndef RX_ENABLE_TSFPQ
871 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
874 struct rx_packet * cb;
877 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
878 iov = &p->wirevec[first];
880 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
881 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
882 RX_FPQ_MARK_FREE(cb);
893 * free packet continuation buffers into the global free packet pool
895 * [IN] p -- packet from which to free continuation buffers
896 * [IN] first -- iovec offset of first continuation buffer to free
902 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
906 for (first = MAX(2, first); first < p->niovecs; first++) {
907 iov = &p->wirevec[first];
909 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
910 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
918 #ifdef RX_ENABLE_TSFPQ
920 * free packet continuation buffers into the thread-local free pool
922 * [IN] p -- packet from which continuation buffers will be freed
923 * [IN] first -- iovec offset of first continuation buffer to free
924 * any value less than 2, the min number of iovecs,
925 * is treated as if it is 2.
926 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
927 * global free pool before returning
933 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
936 struct rx_ts_info_t * rx_ts_info;
938 RX_TS_INFO_GET(rx_ts_info);
940 for (first = MAX(2, first); first < p->niovecs; first++) {
941 iov = &p->wirevec[first];
943 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
944 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
949 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
951 MUTEX_ENTER(&rx_freePktQ_lock);
953 RX_TS_FPQ_LTOG(rx_ts_info);
955 /* Wakeup anyone waiting for packets */
958 MUTEX_EXIT(&rx_freePktQ_lock);
963 #endif /* RX_ENABLE_TSFPQ */
965 int rxi_nBadIovecs = 0;
967 /* rxi_RestoreDataBufs
969 * Restore the correct sizes to the iovecs. Called when reusing a packet
970 * for reading off the wire.
973 rxi_RestoreDataBufs(struct rx_packet *p)
976 struct iovec *iov = &p->wirevec[2];
978 RX_PACKET_IOV_INIT(p);
980 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
981 if (!iov->iov_base) {
986 iov->iov_len = RX_CBUFFERSIZE;
990 #ifdef RX_ENABLE_TSFPQ
992 rxi_TrimDataBufs(struct rx_packet *p, int first)
995 struct iovec *iov, *end;
996 struct rx_ts_info_t * rx_ts_info;
1000 osi_Panic("TrimDataBufs 1: first must be 1");
1002 /* Skip over continuation buffers containing message data */
1003 iov = &p->wirevec[2];
1004 end = iov + (p->niovecs - 2);
1005 length = p->length - p->wirevec[1].iov_len;
1006 for (; iov < end && length > 0; iov++) {
1008 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1009 length -= iov->iov_len;
1012 /* iov now points to the first empty data buffer. */
1016 RX_TS_INFO_GET(rx_ts_info);
1017 for (; iov < end; iov++) {
1019 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1020 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1023 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1025 MUTEX_ENTER(&rx_freePktQ_lock);
1027 RX_TS_FPQ_LTOG(rx_ts_info);
1028 rxi_PacketsUnWait();
1030 MUTEX_EXIT(&rx_freePktQ_lock);
1036 #else /* RX_ENABLE_TSFPQ */
1038 rxi_TrimDataBufs(struct rx_packet *p, int first)
1041 struct iovec *iov, *end;
1045 osi_Panic("TrimDataBufs 1: first must be 1");
1047 /* Skip over continuation buffers containing message data */
1048 iov = &p->wirevec[2];
1049 end = iov + (p->niovecs - 2);
1050 length = p->length - p->wirevec[1].iov_len;
1051 for (; iov < end && length > 0; iov++) {
1053 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1054 length -= iov->iov_len;
1057 /* iov now points to the first empty data buffer. */
1062 MUTEX_ENTER(&rx_freePktQ_lock);
1064 for (; iov < end; iov++) {
1066 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1067 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1070 rxi_PacketsUnWait();
1072 MUTEX_EXIT(&rx_freePktQ_lock);
1077 #endif /* RX_ENABLE_TSFPQ */
1079 /* Free the packet p. P is assumed not to be on any queue, i.e.
1080 * remove it yourself first if you call this routine. */
1081 #ifdef RX_ENABLE_TSFPQ
1083 rxi_FreePacket(struct rx_packet *p)
1085 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1086 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1088 #else /* RX_ENABLE_TSFPQ */
1090 rxi_FreePacket(struct rx_packet *p)
1095 MUTEX_ENTER(&rx_freePktQ_lock);
1097 rxi_FreeDataBufsNoLock(p, 2);
1098 rxi_FreePacketNoLock(p);
1099 /* Wakeup anyone waiting for packets */
1100 rxi_PacketsUnWait();
1102 MUTEX_EXIT(&rx_freePktQ_lock);
1105 #endif /* RX_ENABLE_TSFPQ */
1107 /* rxi_AllocPacket sets up p->length so it reflects the number of
1108 * bytes in the packet at this point, **not including** the header.
1109 * The header is absolutely necessary, besides, this is the way the
1110 * length field is usually used */
1111 #ifdef RX_ENABLE_TSFPQ
1113 rxi_AllocPacketNoLock(int class)
1115 struct rx_packet *p;
1116 struct rx_ts_info_t * rx_ts_info;
1118 RX_TS_INFO_GET(rx_ts_info);
1121 if (rxi_OverQuota(class)) {
1122 rxi_NeedMorePackets = TRUE;
1123 if (rx_stats_active) {
1125 case RX_PACKET_CLASS_RECEIVE:
1126 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1128 case RX_PACKET_CLASS_SEND:
1129 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1131 case RX_PACKET_CLASS_SPECIAL:
1132 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1134 case RX_PACKET_CLASS_RECV_CBUF:
1135 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1137 case RX_PACKET_CLASS_SEND_CBUF:
1138 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1142 return (struct rx_packet *)0;
1146 if (rx_stats_active)
1147 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1148 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1151 if (queue_IsEmpty(&rx_freePacketQueue))
1152 osi_Panic("rxi_AllocPacket error");
1154 if (queue_IsEmpty(&rx_freePacketQueue))
1155 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1159 RX_TS_FPQ_GTOL(rx_ts_info);
1162 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1164 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1167 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1168 * order to truncate outbound packets. In the near future, may need
1169 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1171 RX_PACKET_IOV_FULLINIT(p);
1174 #else /* RX_ENABLE_TSFPQ */
1176 rxi_AllocPacketNoLock(int class)
1178 struct rx_packet *p;
1181 if (rxi_OverQuota(class)) {
1182 rxi_NeedMorePackets = TRUE;
1183 if (rx_stats_active) {
1185 case RX_PACKET_CLASS_RECEIVE:
1186 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1188 case RX_PACKET_CLASS_SEND:
1189 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1191 case RX_PACKET_CLASS_SPECIAL:
1192 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1194 case RX_PACKET_CLASS_RECV_CBUF:
1195 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1197 case RX_PACKET_CLASS_SEND_CBUF:
1198 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1202 return (struct rx_packet *)0;
1206 if (rx_stats_active)
1207 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1210 if (queue_IsEmpty(&rx_freePacketQueue))
1211 osi_Panic("rxi_AllocPacket error");
1213 if (queue_IsEmpty(&rx_freePacketQueue))
1214 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1218 p = queue_First(&rx_freePacketQueue, rx_packet);
1220 RX_FPQ_MARK_USED(p);
1222 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1225 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1226 * order to truncate outbound packets. In the near future, may need
1227 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1229 RX_PACKET_IOV_FULLINIT(p);
1232 #endif /* RX_ENABLE_TSFPQ */
1234 #ifdef RX_ENABLE_TSFPQ
1236 rxi_AllocPacketTSFPQ(int class, int pull_global)
1238 struct rx_packet *p;
1239 struct rx_ts_info_t * rx_ts_info;
1241 RX_TS_INFO_GET(rx_ts_info);
1243 if (rx_stats_active)
1244 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1245 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1246 MUTEX_ENTER(&rx_freePktQ_lock);
1248 if (queue_IsEmpty(&rx_freePacketQueue))
1249 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1251 RX_TS_FPQ_GTOL(rx_ts_info);
1253 MUTEX_EXIT(&rx_freePktQ_lock);
1254 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1258 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1260 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1262 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1263 * order to truncate outbound packets. In the near future, may need
1264 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1266 RX_PACKET_IOV_FULLINIT(p);
1269 #endif /* RX_ENABLE_TSFPQ */
1271 #ifdef RX_ENABLE_TSFPQ
1273 rxi_AllocPacket(int class)
1275 struct rx_packet *p;
1277 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1280 #else /* RX_ENABLE_TSFPQ */
1282 rxi_AllocPacket(int class)
1284 struct rx_packet *p;
1286 MUTEX_ENTER(&rx_freePktQ_lock);
1287 p = rxi_AllocPacketNoLock(class);
1288 MUTEX_EXIT(&rx_freePktQ_lock);
1291 #endif /* RX_ENABLE_TSFPQ */
1293 /* This guy comes up with as many buffers as it {takes,can get} given
1294 * the MTU for this call. It also sets the packet length before
1295 * returning. caution: this is often called at NETPRI
1296 * Called with call locked.
1299 rxi_AllocSendPacket(struct rx_call *call, int want)
1301 struct rx_packet *p = (struct rx_packet *)0;
1306 mud = call->MTU - RX_HEADER_SIZE;
1308 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1309 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1311 #ifdef RX_ENABLE_TSFPQ
1312 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1314 want = MIN(want, mud);
1316 if ((unsigned)want > p->length)
1317 (void)rxi_AllocDataBuf(p, (want - p->length),
1318 RX_PACKET_CLASS_SEND_CBUF);
1320 if ((unsigned)p->length > mud)
1323 if (delta >= p->length) {
1331 #endif /* RX_ENABLE_TSFPQ */
1333 while (!(call->error)) {
1334 MUTEX_ENTER(&rx_freePktQ_lock);
1335 /* if an error occurred, or we get the packet we want, we're done */
1336 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1337 MUTEX_EXIT(&rx_freePktQ_lock);
1340 want = MIN(want, mud);
1342 if ((unsigned)want > p->length)
1343 (void)rxi_AllocDataBuf(p, (want - p->length),
1344 RX_PACKET_CLASS_SEND_CBUF);
1346 if ((unsigned)p->length > mud)
1349 if (delta >= p->length) {
1358 /* no error occurred, and we didn't get a packet, so we sleep.
1359 * At this point, we assume that packets will be returned
1360 * sooner or later, as packets are acknowledged, and so we
1363 call->flags |= RX_CALL_WAIT_PACKETS;
1364 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1365 MUTEX_EXIT(&call->lock);
1366 rx_waitingForPackets = 1;
1368 #ifdef RX_ENABLE_LOCKS
1369 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1371 osi_rxSleep(&rx_waitingForPackets);
1373 MUTEX_EXIT(&rx_freePktQ_lock);
1374 MUTEX_ENTER(&call->lock);
1375 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1376 call->flags &= ~RX_CALL_WAIT_PACKETS;
1385 /* Windows does not use file descriptors. */
1386 #define CountFDs(amax) 0
1388 /* count the number of used FDs */
1397 for (i = 0; i < amax; i++) {
1398 code = fstat(i, &tstat);
1404 #endif /* AFS_NT40_ENV */
1407 #define CountFDs(amax) amax
1411 #if !defined(KERNEL) || defined(UKERNEL)
1413 /* This function reads a single packet from the interface into the
1414 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1415 * (host,port) of the sender are stored in the supplied variables, and
1416 * the data length of the packet is stored in the packet structure.
1417 * The header is decoded. */
1419 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1422 struct sockaddr_in from;
1425 afs_int32 tlen, savelen;
1427 rx_computelen(p, tlen);
1428 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1430 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1431 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1432 * it once in order to avoid races. */
1435 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1443 /* Extend the last iovec for padding, it's just to make sure that the
1444 * read doesn't return more data than we expect, and is done to get around
1445 * our problems caused by the lack of a length field in the rx header.
1446 * Use the extra buffer that follows the localdata in each packet
1448 savelen = p->wirevec[p->niovecs - 1].iov_len;
1449 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1451 memset((char *)&msg, 0, sizeof(msg));
1452 msg.msg_name = (char *)&from;
1453 msg.msg_namelen = sizeof(struct sockaddr_in);
1454 msg.msg_iov = p->wirevec;
1455 msg.msg_iovlen = p->niovecs;
1456 nbytes = rxi_Recvmsg(socket, &msg, 0);
1458 /* restore the vec to its correct state */
1459 p->wirevec[p->niovecs - 1].iov_len = savelen;
1461 p->length = (nbytes - RX_HEADER_SIZE);
1462 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1463 if (nbytes < 0 && errno == EWOULDBLOCK) {
1464 if (rx_stats_active)
1465 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1466 } else if (nbytes <= 0) {
1467 if (rx_stats_active) {
1468 MUTEX_ENTER(&rx_stats_mutex);
1469 rx_stats.bogusPacketOnRead++;
1470 rx_stats.bogusHost = from.sin_addr.s_addr;
1471 MUTEX_EXIT(&rx_stats_mutex);
1473 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1474 ntohs(from.sin_port), nbytes));
1479 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1480 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1481 rxi_DecodePacketHeader(p);
1483 *host = from.sin_addr.s_addr;
1484 *port = from.sin_port;
1486 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1487 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1488 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1490 #ifdef RX_TRIMDATABUFS
1491 rxi_TrimDataBufs(p, 1);
1497 /* Extract packet header. */
1498 rxi_DecodePacketHeader(p);
1500 *host = from.sin_addr.s_addr;
1501 *port = from.sin_port;
1502 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1503 struct rx_peer *peer;
1504 if (rx_stats_active)
1505 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1507 * Try to look up this peer structure. If it doesn't exist,
1508 * don't create a new one -
1509 * we don't keep count of the bytes sent/received if a peer
1510 * structure doesn't already exist.
1512 * The peer/connection cleanup code assumes that there is 1 peer
1513 * per connection. If we actually created a peer structure here
1514 * and this packet was an rxdebug packet, the peer structure would
1515 * never be cleaned up.
1517 peer = rxi_FindPeer(*host, *port, 0, 0);
1518 /* Since this may not be associated with a connection,
1519 * it may have no refCount, meaning we could race with
1522 if (peer && (peer->refCount > 0)) {
1523 MUTEX_ENTER(&peer->peer_lock);
1524 hadd32(peer->bytesReceived, p->length);
1525 MUTEX_EXIT(&peer->peer_lock);
1529 #ifdef RX_TRIMDATABUFS
1530 /* Free any empty packet buffers at the end of this packet */
1531 rxi_TrimDataBufs(p, 1);
1537 #endif /* !KERNEL || UKERNEL */
1539 /* This function splits off the first packet in a jumbo packet.
1540 * As of AFS 3.5, jumbograms contain more than one fixed size
1541 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1542 * last packet header. All packets (except the last) are padded to
1543 * fall on RX_CBUFFERSIZE boundaries.
1544 * HACK: We store the length of the first n-1 packets in the
1545 * last two pad bytes. */
1548 rxi_SplitJumboPacket(struct rx_packet *p, afs_int32 host, short port,
1551 struct rx_packet *np;
1552 struct rx_jumboHeader *jp;
1558 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1559 * bytes in length. All but the first packet are preceded by
1560 * an abbreviated four byte header. The length of the last packet
1561 * is calculated from the size of the jumbogram. */
1562 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1564 if ((int)p->length < length) {
1565 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1568 niov = p->niovecs - 2;
1570 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1573 iov = &p->wirevec[2];
1574 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1576 /* Get a pointer to the abbreviated packet header */
1577 jp = (struct rx_jumboHeader *)
1578 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1580 /* Set up the iovecs for the next packet */
1581 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1582 np->wirevec[0].iov_len = sizeof(struct rx_header);
1583 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1584 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1585 np->niovecs = niov + 1;
1586 for (i = 2, iov++; i <= niov; i++, iov++) {
1587 np->wirevec[i] = *iov;
1589 np->length = p->length - length;
1590 p->length = RX_JUMBOBUFFERSIZE;
1593 /* Convert the jumbo packet header to host byte order */
1594 temp = ntohl(*(afs_uint32 *) jp);
1595 jp->flags = (u_char) (temp >> 24);
1596 jp->cksum = (u_short) (temp);
1598 /* Fill in the packet header */
1599 np->header = p->header;
1600 np->header.serial = p->header.serial + 1;
1601 np->header.seq = p->header.seq + 1;
1602 np->header.flags = jp->flags;
1603 np->header.spare = jp->cksum;
1609 /* Send a udp datagram */
1611 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1612 int length, int istack)
1617 memset(&msg, 0, sizeof(msg));
1619 msg.msg_iovlen = nvecs;
1620 msg.msg_name = addr;
1621 msg.msg_namelen = sizeof(struct sockaddr_in);
1623 ret = rxi_Sendmsg(socket, &msg, 0);
1627 #elif !defined(UKERNEL)
1629 * message receipt is done in rxk_input or rx_put.
1632 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1634 * Copy an mblock to the contiguous area pointed to by cp.
1635 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1636 * but it doesn't really.
1637 * Returns the number of bytes not transferred.
1638 * The message is NOT changed.
1641 cpytoc(mblk_t * mp, int off, int len, char *cp)
1645 for (; mp && len > 0; mp = mp->b_cont) {
1646 if (mp->b_datap->db_type != M_DATA) {
1649 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1650 memcpy(cp, (char *)mp->b_rptr, n);
1658 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1659 * but it doesn't really.
1660 * This sucks, anyway, do it like m_cpy.... below
1663 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1668 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1669 if (mp->b_datap->db_type != M_DATA) {
1672 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1678 t = iovs[i].iov_len;
1681 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1691 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1692 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1694 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1696 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1699 unsigned int l1, l2, i, t;
1701 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1702 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1705 if (m->m_len <= off) {
1715 p1 = mtod(m, caddr_t) + off;
1716 l1 = m->m_len - off;
1718 p2 = iovs[0].iov_base;
1719 l2 = iovs[0].iov_len;
1722 t = MIN(l1, MIN(l2, (unsigned int)len));
1733 p1 = mtod(m, caddr_t);
1739 p2 = iovs[i].iov_base;
1740 l2 = iovs[i].iov_len;
1748 #endif /* AFS_SUN5_ENV */
1750 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1752 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1753 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1759 struct rx_packet *phandle;
1760 int hdr_len, data_len;
1765 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1772 #endif /*KERNEL && !UKERNEL */
1775 /* send a response to a debug packet */
1778 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1779 afs_int32 ahost, short aport, int istack)
1781 struct rx_debugIn tin;
1783 struct rx_serverQueueEntry *np, *nqe;
1786 * Only respond to client-initiated Rx debug packets,
1787 * and clear the client flag in the response.
1789 if (ap->header.flags & RX_CLIENT_INITIATED) {
1790 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1791 rxi_EncodePacketHeader(ap);
1796 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1797 /* all done with packet, now set length to the truth, so we can
1798 * reuse this packet */
1799 rx_computelen(ap, ap->length);
1801 tin.type = ntohl(tin.type);
1802 tin.index = ntohl(tin.index);
1804 case RX_DEBUGI_GETSTATS:{
1805 struct rx_debugStats tstat;
1807 /* get basic stats */
1808 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1809 tstat.version = RX_DEBUGI_VERSION;
1810 #ifndef RX_ENABLE_LOCKS
1811 tstat.waitingForPackets = rx_waitingForPackets;
1813 MUTEX_ENTER(&rx_serverPool_lock);
1814 tstat.nFreePackets = htonl(rx_nFreePackets);
1815 tstat.nPackets = htonl(rx_nPackets);
1816 tstat.callsExecuted = htonl(rxi_nCalls);
1817 tstat.packetReclaims = htonl(rx_packetReclaims);
1818 tstat.usedFDs = CountFDs(64);
1819 tstat.nWaiting = htonl(rx_nWaiting);
1820 tstat.nWaited = htonl(rx_nWaited);
1821 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1823 MUTEX_EXIT(&rx_serverPool_lock);
1824 tstat.idleThreads = htonl(tstat.idleThreads);
1825 tl = sizeof(struct rx_debugStats) - ap->length;
1827 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1830 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1832 ap->length = sizeof(struct rx_debugStats);
1833 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1834 rx_computelen(ap, ap->length);
1839 case RX_DEBUGI_GETALLCONN:
1840 case RX_DEBUGI_GETCONN:{
1842 struct rx_connection *tc;
1843 struct rx_call *tcall;
1844 struct rx_debugConn tconn;
1845 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1848 tl = sizeof(struct rx_debugConn) - ap->length;
1850 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1854 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1855 /* get N'th (maybe) "interesting" connection info */
1856 for (i = 0; i < rx_hashTableSize; i++) {
1857 #if !defined(KERNEL)
1858 /* the time complexity of the algorithm used here
1859 * exponentially increses with the number of connections.
1861 #ifdef AFS_PTHREAD_ENV
1867 MUTEX_ENTER(&rx_connHashTable_lock);
1868 /* We might be slightly out of step since we are not
1869 * locking each call, but this is only debugging output.
1871 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1872 if ((all || rxi_IsConnInteresting(tc))
1873 && tin.index-- <= 0) {
1874 tconn.host = tc->peer->host;
1875 tconn.port = tc->peer->port;
1876 tconn.cid = htonl(tc->cid);
1877 tconn.epoch = htonl(tc->epoch);
1878 tconn.serial = htonl(tc->serial);
1879 for (j = 0; j < RX_MAXCALLS; j++) {
1880 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1881 if ((tcall = tc->call[j])) {
1882 tconn.callState[j] = tcall->state;
1883 tconn.callMode[j] = tcall->mode;
1884 tconn.callFlags[j] = tcall->flags;
1885 if (queue_IsNotEmpty(&tcall->rq))
1886 tconn.callOther[j] |= RX_OTHER_IN;
1887 if (queue_IsNotEmpty(&tcall->tq))
1888 tconn.callOther[j] |= RX_OTHER_OUT;
1890 tconn.callState[j] = RX_STATE_NOTINIT;
1893 tconn.natMTU = htonl(tc->peer->natMTU);
1894 tconn.error = htonl(tc->error);
1895 tconn.flags = tc->flags;
1896 tconn.type = tc->type;
1897 tconn.securityIndex = tc->securityIndex;
1898 if (tc->securityObject) {
1899 RXS_GetStats(tc->securityObject, tc,
1901 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1902 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1905 DOHTONL(packetsReceived);
1906 DOHTONL(packetsSent);
1907 DOHTONL(bytesReceived);
1911 sizeof(tconn.secStats.spares) /
1916 sizeof(tconn.secStats.sparel) /
1917 sizeof(afs_int32); i++)
1921 MUTEX_EXIT(&rx_connHashTable_lock);
1922 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1925 ap->length = sizeof(struct rx_debugConn);
1926 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1932 MUTEX_EXIT(&rx_connHashTable_lock);
1934 /* if we make it here, there are no interesting packets */
1935 tconn.cid = htonl(0xffffffff); /* means end */
1936 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1939 ap->length = sizeof(struct rx_debugConn);
1940 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1946 * Pass back all the peer structures we have available
1949 case RX_DEBUGI_GETPEER:{
1952 struct rx_debugPeer tpeer;
1955 tl = sizeof(struct rx_debugPeer) - ap->length;
1957 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1961 memset((char *)&tpeer, 0, sizeof(tpeer));
1962 for (i = 0; i < rx_hashTableSize; i++) {
1963 #if !defined(KERNEL)
1964 /* the time complexity of the algorithm used here
1965 * exponentially increses with the number of peers.
1967 * Yielding after processing each hash table entry
1968 * and dropping rx_peerHashTable_lock.
1969 * also increases the risk that we will miss a new
1970 * entry - but we are willing to live with this
1971 * limitation since this is meant for debugging only
1973 #ifdef AFS_PTHREAD_ENV
1979 MUTEX_ENTER(&rx_peerHashTable_lock);
1980 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1981 if (tin.index-- <= 0) {
1982 tpeer.host = tp->host;
1983 tpeer.port = tp->port;
1984 tpeer.ifMTU = htons(tp->ifMTU);
1985 tpeer.idleWhen = htonl(tp->idleWhen);
1986 tpeer.refCount = htons(tp->refCount);
1987 tpeer.burstSize = tp->burstSize;
1988 tpeer.burst = tp->burst;
1989 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1990 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1991 tpeer.rtt = htonl(tp->rtt);
1992 tpeer.rtt_dev = htonl(tp->rtt_dev);
1993 tpeer.timeout.sec = htonl(tp->timeout.sec);
1994 tpeer.timeout.usec = htonl(tp->timeout.usec);
1995 tpeer.nSent = htonl(tp->nSent);
1996 tpeer.reSends = htonl(tp->reSends);
1997 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1998 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1999 tpeer.rateFlag = htonl(tp->rateFlag);
2000 tpeer.natMTU = htons(tp->natMTU);
2001 tpeer.maxMTU = htons(tp->maxMTU);
2002 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2003 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2004 tpeer.MTU = htons(tp->MTU);
2005 tpeer.cwind = htons(tp->cwind);
2006 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2007 tpeer.congestSeq = htons(tp->congestSeq);
2008 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2009 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2010 tpeer.bytesReceived.high =
2011 htonl(tp->bytesReceived.high);
2012 tpeer.bytesReceived.low =
2013 htonl(tp->bytesReceived.low);
2015 MUTEX_EXIT(&rx_peerHashTable_lock);
2016 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2019 ap->length = sizeof(struct rx_debugPeer);
2020 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2026 MUTEX_EXIT(&rx_peerHashTable_lock);
2028 /* if we make it here, there are no interesting packets */
2029 tpeer.host = htonl(0xffffffff); /* means end */
2030 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2033 ap->length = sizeof(struct rx_debugPeer);
2034 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2039 case RX_DEBUGI_RXSTATS:{
2043 tl = sizeof(rx_stats) - ap->length;
2045 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2049 /* Since its all int32s convert to network order with a loop. */
2050 if (rx_stats_active)
2051 MUTEX_ENTER(&rx_stats_mutex);
2052 s = (afs_int32 *) & rx_stats;
2053 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2054 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2057 ap->length = sizeof(rx_stats);
2058 if (rx_stats_active)
2059 MUTEX_EXIT(&rx_stats_mutex);
2060 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2066 /* error response packet */
2067 tin.type = htonl(RX_DEBUGI_BADTYPE);
2068 tin.index = tin.type;
2069 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2071 ap->length = sizeof(struct rx_debugIn);
2072 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2080 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2081 afs_int32 ahost, short aport, int istack)
2086 * Only respond to client-initiated version requests, and
2087 * clear that flag in the response.
2089 if (ap->header.flags & RX_CLIENT_INITIATED) {
2092 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2093 rxi_EncodePacketHeader(ap);
2094 memset(buf, 0, sizeof(buf));
2095 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2096 rx_packetwrite(ap, 0, 65, buf);
2099 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2107 /* send a debug packet back to the sender */
2109 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2110 afs_int32 ahost, short aport, afs_int32 istack)
2112 struct sockaddr_in taddr;
2118 int waslocked = ISAFS_GLOCK();
2121 taddr.sin_family = AF_INET;
2122 taddr.sin_port = aport;
2123 taddr.sin_addr.s_addr = ahost;
2124 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2125 taddr.sin_len = sizeof(struct sockaddr_in);
2128 /* We need to trim the niovecs. */
2129 nbytes = apacket->length;
2130 for (i = 1; i < apacket->niovecs; i++) {
2131 if (nbytes <= apacket->wirevec[i].iov_len) {
2132 savelen = apacket->wirevec[i].iov_len;
2133 saven = apacket->niovecs;
2134 apacket->wirevec[i].iov_len = nbytes;
2135 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2137 nbytes -= apacket->wirevec[i].iov_len;
2140 #ifdef RX_KERNEL_TRACE
2141 if (ICL_SETACTIVE(afs_iclSetp)) {
2144 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2145 "before osi_NetSend()");
2153 /* debug packets are not reliably delivered, hence the cast below. */
2154 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2155 apacket->length + RX_HEADER_SIZE, istack);
2157 #ifdef RX_KERNEL_TRACE
2158 if (ICL_SETACTIVE(afs_iclSetp)) {
2160 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2161 "after osi_NetSend()");
2170 if (saven) { /* means we truncated the packet above. */
2171 apacket->wirevec[i - 1].iov_len = savelen;
2172 apacket->niovecs = saven;
2177 /* Send the packet to appropriate destination for the specified
2178 * call. The header is first encoded and placed in the packet.
2181 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2182 struct rx_packet *p, int istack)
2188 struct sockaddr_in addr;
2189 struct rx_peer *peer = conn->peer;
2192 char deliveryType = 'S';
2194 /* The address we're sending the packet to */
2195 memset(&addr, 0, sizeof(addr));
2196 addr.sin_family = AF_INET;
2197 addr.sin_port = peer->port;
2198 addr.sin_addr.s_addr = peer->host;
2200 /* This stuff should be revamped, I think, so that most, if not
2201 * all, of the header stuff is always added here. We could
2202 * probably do away with the encode/decode routines. XXXXX */
2204 /* Stamp each packet with a unique serial number. The serial
2205 * number is maintained on a connection basis because some types
2206 * of security may be based on the serial number of the packet,
2207 * and security is handled on a per authenticated-connection
2209 /* Pre-increment, to guarantee no zero serial number; a zero
2210 * serial number means the packet was never sent. */
2211 MUTEX_ENTER(&conn->conn_data_lock);
2212 p->header.serial = ++conn->serial;
2213 MUTEX_EXIT(&conn->conn_data_lock);
2214 /* This is so we can adjust retransmit time-outs better in the face of
2215 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2217 if (p->firstSerial == 0) {
2218 p->firstSerial = p->header.serial;
2221 /* If an output tracer function is defined, call it with the packet and
2222 * network address. Note this function may modify its arguments. */
2223 if (rx_almostSent) {
2224 int drop = (*rx_almostSent) (p, &addr);
2225 /* drop packet if return value is non-zero? */
2227 deliveryType = 'D'; /* Drop the packet */
2231 /* Get network byte order header */
2232 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2233 * touch ALL the fields */
2235 /* Send the packet out on the same socket that related packets are being
2239 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2242 /* Possibly drop this packet, for testing purposes */
2243 if ((deliveryType == 'D')
2244 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2245 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2246 deliveryType = 'D'; /* Drop the packet */
2248 deliveryType = 'S'; /* Send the packet */
2249 #endif /* RXDEBUG */
2251 /* Loop until the packet is sent. We'd prefer just to use a
2252 * blocking socket, but unfortunately the interface doesn't
2253 * allow us to have the socket block in send mode, and not
2254 * block in receive mode */
2256 waslocked = ISAFS_GLOCK();
2257 #ifdef RX_KERNEL_TRACE
2258 if (ICL_SETACTIVE(afs_iclSetp)) {
2261 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2262 "before osi_NetSend()");
2271 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2272 p->length + RX_HEADER_SIZE, istack)) != 0) {
2273 /* send failed, so let's hurry up the resend, eh? */
2274 if (rx_stats_active)
2275 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2276 p->retryTime = p->timeSent; /* resend it very soon */
2277 clock_Addmsec(&(p->retryTime),
2278 10 + (((afs_uint32) p->backoff) << 8));
2279 /* Some systems are nice and tell us right away that we cannot
2280 * reach this recipient by returning an error code.
2281 * So, when this happens let's "down" the host NOW so
2282 * we don't sit around waiting for this host to timeout later.
2286 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2287 #elif defined(AFS_LINUX20_ENV)
2288 code == -ENETUNREACH
2289 #elif defined(AFS_DARWIN_ENV)
2290 code == EHOSTUNREACH
2295 call->lastReceiveTime = 0;
2298 #ifdef RX_KERNEL_TRACE
2299 if (ICL_SETACTIVE(afs_iclSetp)) {
2301 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2302 "after osi_NetSend()");
2313 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2315 if (rx_stats_active)
2316 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2317 MUTEX_ENTER(&peer->peer_lock);
2318 hadd32(peer->bytesSent, p->length);
2319 MUTEX_EXIT(&peer->peer_lock);
2322 /* Send a list of packets to appropriate destination for the specified
2323 * connection. The headers are first encoded and placed in the packets.
2326 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2327 struct rx_packet **list, int len, int istack)
2329 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2332 struct sockaddr_in addr;
2333 struct rx_peer *peer = conn->peer;
2335 struct rx_packet *p = NULL;
2336 struct iovec wirevec[RX_MAXIOVECS];
2337 int i, length, code;
2340 struct rx_jumboHeader *jp;
2342 char deliveryType = 'S';
2344 /* The address we're sending the packet to */
2345 addr.sin_family = AF_INET;
2346 addr.sin_port = peer->port;
2347 addr.sin_addr.s_addr = peer->host;
2349 if (len + 1 > RX_MAXIOVECS) {
2350 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2354 * Stamp the packets in this jumbogram with consecutive serial numbers
2356 MUTEX_ENTER(&conn->conn_data_lock);
2357 serial = conn->serial;
2358 conn->serial += len;
2359 MUTEX_EXIT(&conn->conn_data_lock);
2362 /* This stuff should be revamped, I think, so that most, if not
2363 * all, of the header stuff is always added here. We could
2364 * probably do away with the encode/decode routines. XXXXX */
2367 length = RX_HEADER_SIZE;
2368 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2369 wirevec[0].iov_len = RX_HEADER_SIZE;
2370 for (i = 0; i < len; i++) {
2373 /* The whole 3.5 jumbogram scheme relies on packets fitting
2374 * in a single packet buffer. */
2375 if (p->niovecs > 2) {
2376 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2379 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2382 if (p->length != RX_JUMBOBUFFERSIZE) {
2383 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2385 p->header.flags |= RX_JUMBO_PACKET;
2386 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2387 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2389 wirevec[i + 1].iov_len = p->length;
2390 length += p->length;
2392 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2394 /* Convert jumbo packet header to network byte order */
2395 temp = (afs_uint32) (p->header.flags) << 24;
2396 temp |= (afs_uint32) (p->header.spare);
2397 *(afs_uint32 *) jp = htonl(temp);
2399 jp = (struct rx_jumboHeader *)
2400 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2402 /* Stamp each packet with a unique serial number. The serial
2403 * number is maintained on a connection basis because some types
2404 * of security may be based on the serial number of the packet,
2405 * and security is handled on a per authenticated-connection
2407 /* Pre-increment, to guarantee no zero serial number; a zero
2408 * serial number means the packet was never sent. */
2409 p->header.serial = ++serial;
2410 /* This is so we can adjust retransmit time-outs better in the face of
2411 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2413 if (p->firstSerial == 0) {
2414 p->firstSerial = p->header.serial;
2417 /* If an output tracer function is defined, call it with the packet and
2418 * network address. Note this function may modify its arguments. */
2419 if (rx_almostSent) {
2420 int drop = (*rx_almostSent) (p, &addr);
2421 /* drop packet if return value is non-zero? */
2423 deliveryType = 'D'; /* Drop the packet */
2427 /* Get network byte order header */
2428 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2429 * touch ALL the fields */
2432 /* Send the packet out on the same socket that related packets are being
2436 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2439 /* Possibly drop this packet, for testing purposes */
2440 if ((deliveryType == 'D')
2441 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2442 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2443 deliveryType = 'D'; /* Drop the packet */
2445 deliveryType = 'S'; /* Send the packet */
2446 #endif /* RXDEBUG */
2448 /* Loop until the packet is sent. We'd prefer just to use a
2449 * blocking socket, but unfortunately the interface doesn't
2450 * allow us to have the socket block in send mode, and not
2451 * block in receive mode */
2452 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2453 waslocked = ISAFS_GLOCK();
2454 if (!istack && waslocked)
2458 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2460 /* send failed, so let's hurry up the resend, eh? */
2461 if (rx_stats_active)
2462 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2463 for (i = 0; i < len; i++) {
2465 p->retryTime = p->timeSent; /* resend it very soon */
2466 clock_Addmsec(&(p->retryTime),
2467 10 + (((afs_uint32) p->backoff) << 8));
2469 /* Some systems are nice and tell us right away that we cannot
2470 * reach this recipient by returning an error code.
2471 * So, when this happens let's "down" the host NOW so
2472 * we don't sit around waiting for this host to timeout later.
2476 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2477 #elif defined(AFS_LINUX20_ENV)
2478 code == -ENETUNREACH
2479 #elif defined(AFS_DARWIN_ENV)
2480 code == EHOSTUNREACH
2485 call->lastReceiveTime = 0;
2487 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2488 if (!istack && waslocked)
2496 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2499 if (rx_stats_active)
2500 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2501 MUTEX_ENTER(&peer->peer_lock);
2502 hadd32(peer->bytesSent, p->length);
2503 MUTEX_EXIT(&peer->peer_lock);
2507 /* Send a "special" packet to the peer connection. If call is
2508 * specified, then the packet is directed to a specific call channel
2509 * associated with the connection, otherwise it is directed to the
2510 * connection only. Uses optionalPacket if it is supplied, rather than
2511 * allocating a new packet buffer. Nbytes is the length of the data
2512 * portion of the packet. If data is non-null, nbytes of data are
2513 * copied into the packet. Type is the type of the packet, as defined
2514 * in rx.h. Bug: there's a lot of duplication between this and other
2515 * routines. This needs to be cleaned up. */
2517 rxi_SendSpecial(struct rx_call *call,
2518 struct rx_connection *conn,
2519 struct rx_packet *optionalPacket, int type, char *data,
2520 int nbytes, int istack)
2522 /* Some of the following stuff should be common code for all
2523 * packet sends (it's repeated elsewhere) */
2524 struct rx_packet *p;
2526 int savelen = 0, saven = 0;
2527 int channel, callNumber;
2529 channel = call->channel;
2530 callNumber = *call->callNumber;
2531 /* BUSY packets refer to the next call on this connection */
2532 if (type == RX_PACKET_TYPE_BUSY) {
2541 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2543 osi_Panic("rxi_SendSpecial failure");
2550 p->header.serviceId = conn->serviceId;
2551 p->header.securityIndex = conn->securityIndex;
2552 p->header.cid = (conn->cid | channel);
2553 p->header.callNumber = callNumber;
2555 p->header.epoch = conn->epoch;
2556 p->header.type = type;
2557 p->header.flags = 0;
2558 if (conn->type == RX_CLIENT_CONNECTION)
2559 p->header.flags |= RX_CLIENT_INITIATED;
2561 rx_packetwrite(p, 0, nbytes, data);
2563 for (i = 1; i < p->niovecs; i++) {
2564 if (nbytes <= p->wirevec[i].iov_len) {
2565 savelen = p->wirevec[i].iov_len;
2567 p->wirevec[i].iov_len = nbytes;
2568 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2570 nbytes -= p->wirevec[i].iov_len;
2574 rxi_Send(call, p, istack);
2576 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2577 if (saven) { /* means we truncated the packet above. We probably don't */
2578 /* really need to do this, but it seems safer this way, given that */
2579 /* sneaky optionalPacket... */
2580 p->wirevec[i - 1].iov_len = savelen;
2583 if (!optionalPacket)
2585 return optionalPacket;
2589 /* Encode the packet's header (from the struct header in the packet to
2590 * the net byte order representation in the wire representation of the
2591 * packet, which is what is actually sent out on the wire) */
2593 rxi_EncodePacketHeader(struct rx_packet *p)
2595 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2597 memset((char *)buf, 0, RX_HEADER_SIZE);
2598 *buf++ = htonl(p->header.epoch);
2599 *buf++ = htonl(p->header.cid);
2600 *buf++ = htonl(p->header.callNumber);
2601 *buf++ = htonl(p->header.seq);
2602 *buf++ = htonl(p->header.serial);
2603 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2604 | (((afs_uint32) p->header.flags) << 16)
2605 | (p->header.userStatus << 8) | p->header.securityIndex);
2606 /* Note: top 16 bits of this next word were reserved */
2607 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2610 /* Decode the packet's header (from net byte order to a struct header) */
2612 rxi_DecodePacketHeader(struct rx_packet *p)
2614 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2617 p->header.epoch = ntohl(*buf);
2619 p->header.cid = ntohl(*buf);
2621 p->header.callNumber = ntohl(*buf);
2623 p->header.seq = ntohl(*buf);
2625 p->header.serial = ntohl(*buf);
2631 /* C will truncate byte fields to bytes for me */
2632 p->header.type = temp >> 24;
2633 p->header.flags = temp >> 16;
2634 p->header.userStatus = temp >> 8;
2635 p->header.securityIndex = temp >> 0;
2640 p->header.serviceId = (temp & 0xffff);
2641 p->header.spare = temp >> 16;
2642 /* Note: top 16 bits of this last word are the security checksum */
2646 rxi_PrepareSendPacket(struct rx_call *call,
2647 struct rx_packet *p, int last)
2649 struct rx_connection *conn = call->conn;
2651 ssize_t len; /* len must be a signed type; it can go negative */
2653 p->flags &= ~RX_PKTFLAG_ACKED;
2654 p->header.cid = (conn->cid | call->channel);
2655 p->header.serviceId = conn->serviceId;
2656 p->header.securityIndex = conn->securityIndex;
2658 /* No data packets on call 0. Where do these come from? */
2659 if (*call->callNumber == 0)
2660 *call->callNumber = 1;
2662 p->header.callNumber = *call->callNumber;
2663 p->header.seq = call->tnext++;
2664 p->header.epoch = conn->epoch;
2665 p->header.type = RX_PACKET_TYPE_DATA;
2666 p->header.flags = 0;
2667 p->header.spare = 0;
2668 if (conn->type == RX_CLIENT_CONNECTION)
2669 p->header.flags |= RX_CLIENT_INITIATED;
2672 p->header.flags |= RX_LAST_PACKET;
2674 clock_Zero(&p->retryTime); /* Never yet transmitted */
2675 clock_Zero(&p->firstSent); /* Never yet transmitted */
2676 p->header.serial = 0; /* Another way of saying never transmitted... */
2679 /* Now that we're sure this is the last data on the call, make sure
2680 * that the "length" and the sum of the iov_lens matches. */
2681 len = p->length + call->conn->securityHeaderSize;
2683 for (i = 1; i < p->niovecs && len > 0; i++) {
2684 len -= p->wirevec[i].iov_len;
2687 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2688 } else if (i < p->niovecs) {
2689 /* Free any extra elements in the wirevec */
2690 #if defined(RX_ENABLE_TSFPQ)
2691 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2692 #else /* !RX_ENABLE_TSFPQ */
2693 MUTEX_ENTER(&rx_freePktQ_lock);
2694 rxi_FreeDataBufsNoLock(p, i);
2695 MUTEX_EXIT(&rx_freePktQ_lock);
2696 #endif /* !RX_ENABLE_TSFPQ */
2701 p->wirevec[i - 1].iov_len += len;
2702 RXS_PreparePacket(conn->securityObject, call, p);
2705 /* Given an interface MTU size, calculate an adjusted MTU size that
2706 * will make efficient use of the RX buffers when the peer is sending
2707 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2709 rxi_AdjustIfMTU(int mtu)
2714 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2716 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2717 if (mtu <= adjMTU) {
2724 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2725 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2728 /* Given an interface MTU size, and the peer's advertised max receive
2729 * size, calculate an adjisted maxMTU size that makes efficient use
2730 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2732 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2734 int maxMTU = mtu * rxi_nSendFrags;
2735 maxMTU = MIN(maxMTU, peerMaxMTU);
2736 return rxi_AdjustIfMTU(maxMTU);
2739 /* Given a packet size, figure out how many datagram packet will fit.
2740 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2741 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2742 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2744 rxi_AdjustDgramPackets(int frags, int mtu)
2747 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2750 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2751 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2752 /* subtract the size of the first and last packets */
2753 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2757 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2762 * This function can be used by the Windows Cache Manager
2763 * to dump the list of all rx packets so that we can determine
2764 * where the packet leakage is.
2766 int rx_DumpPackets(FILE *outputFile, char *cookie)
2768 #ifdef RXDEBUG_PACKET
2770 struct rx_packet *p;
2774 MUTEX_ENTER(&rx_freePktQ_lock);
2775 sprintf(output, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2776 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2778 for (p = rx_mallocedP; p; p = p->allNextp) {
2779 sprintf(output, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2780 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2781 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2782 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2783 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2784 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2785 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2788 sprintf(output, "%s - End dumping all Rx Packets\r\n", cookie);
2789 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2791 MUTEX_EXIT(&rx_freePktQ_lock);
2793 #endif /* RXDEBUG_PACKET */
2796 #endif /* AFS_NT40_ENV */