2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
19 #include "afs/sysincludes.h"
20 #include "afsincludes.h"
21 #include "rx/rx_kcommon.h"
22 #include "rx/rx_clock.h"
23 #include "rx/rx_queue.h"
24 #include "rx/rx_packet.h"
25 #include "rx/rx_atomic.h"
26 #include "rx/rx_internal.h"
27 #else /* defined(UKERNEL) */
28 #ifdef RX_KERNEL_TRACE
29 #include "../rx/rx_kcommon.h"
32 #ifndef AFS_LINUX20_ENV
35 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
36 #include "afs/sysincludes.h"
38 #if defined(AFS_OBSD_ENV)
42 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
43 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
44 #include "sys/mount.h" /* it gets pulled in by something later anyway */
48 #include "netinet/in.h"
49 #include "afs/afs_osi.h"
50 #include "rx_kmutex.h"
51 #include "rx/rx_clock.h"
52 #include "rx/rx_queue.h"
53 #include "rx_atomic.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #include "rx_internal.h"
59 #endif /* defined(UKERNEL) */
60 #include "rx/rx_globals.h"
62 #include "sys/types.h"
65 #if defined(AFS_NT40_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include "rx_xmit_nt.h"
74 #include <sys/socket.h>
75 #include <netinet/in.h>
81 #include <sys/sysmacros.h>
83 #include "rx_packet.h"
84 #include "rx_atomic.h"
85 #include "rx_globals.h"
86 #include "rx_internal.h"
96 /* rxdb_fileID is used to identify the lock location, along with line#. */
97 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
98 #endif /* RX_LOCKS_DB */
99 static struct rx_packet *rx_mallocedP = 0;
100 #ifdef RXDEBUG_PACKET
101 static afs_uint32 rx_packet_id = 0;
104 extern char cml_version_number[];
106 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
108 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
109 afs_uint32 ahost, short aport,
112 #ifdef RX_ENABLE_TSFPQ
114 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
116 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
118 struct rx_queue * q);
121 /* some rules about packets:
122 * 1. When a packet is allocated, the final iov_buf contains room for
123 * a security trailer, but iov_len masks that fact. If the security
124 * package wants to add the trailer, it may do so, and then extend
125 * iov_len appropriately. For this reason, packet's niovecs and
126 * iov_len fields should be accurate before calling PreparePacket.
130 * all packet buffers (iov_base) are integral multiples of
132 * offset is an integral multiple of the word size.
135 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
139 for (l = 0, i = 1; i < packet->niovecs; i++) {
140 if (l + packet->wirevec[i].iov_len > offset) {
142 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the word size.
153 * offset is an integral multiple of the word size.
156 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
160 for (l = 0, i = 1; i < packet->niovecs; i++) {
161 if (l + packet->wirevec[i].iov_len > offset) {
162 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
163 (offset - l))) = data;
166 l += packet->wirevec[i].iov_len;
173 * all packet buffers (iov_base) are integral multiples of the
175 * offset is an integral multiple of the word size.
177 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
180 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
183 unsigned int i, j, l, r;
184 for (l = 0, i = 1; i < packet->niovecs; i++) {
185 if (l + packet->wirevec[i].iov_len > offset) {
188 l += packet->wirevec[i].iov_len;
191 /* i is the iovec which contains the first little bit of data in which we
192 * are interested. l is the total length of everything prior to this iovec.
193 * j is the number of bytes we can safely copy out of this iovec.
194 * offset only applies to the first iovec.
197 while ((r > 0) && (i < packet->niovecs)) {
198 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
199 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
202 l += packet->wirevec[i].iov_len;
207 return (r ? (resid - r) : resid);
212 * all packet buffers (iov_base) are integral multiples of the
214 * offset is an integral multiple of the word size.
217 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
219 unsigned int i, j, l, o, r;
222 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
223 if (l + packet->wirevec[i].iov_len > o) {
226 l += packet->wirevec[i].iov_len;
229 /* i is the iovec which contains the first little bit of data in which we
230 * are interested. l is the total length of everything prior to this iovec.
231 * j is the number of bytes we can safely copy out of this iovec.
232 * offset only applies to the first iovec.
235 while ((r > 0) && (i <= RX_MAXWVECS)) {
236 if (i >= packet->niovecs)
237 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
240 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
241 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
245 l += packet->wirevec[i].iov_len;
250 return (r ? (resid - r) : resid);
254 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
256 struct rx_packet *p, *np;
258 num_pkts = AllocPacketBufs(class, num_pkts, q);
260 for (queue_Scan(q, p, np, rx_packet)) {
261 RX_PACKET_IOV_FULLINIT(p);
267 #ifdef RX_ENABLE_TSFPQ
269 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
271 struct rx_ts_info_t * rx_ts_info;
275 RX_TS_INFO_GET(rx_ts_info);
277 transfer = num_pkts - rx_ts_info->_FPQ.len;
280 MUTEX_ENTER(&rx_freePktQ_lock);
281 transfer = MAX(transfer, rx_TSFPQGlobSize);
282 if (transfer > rx_nFreePackets) {
283 /* alloc enough for us, plus a few globs for other threads */
284 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
287 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
289 MUTEX_EXIT(&rx_freePktQ_lock);
293 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
297 #else /* RX_ENABLE_TSFPQ */
299 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
310 MUTEX_ENTER(&rx_freePktQ_lock);
313 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
314 num_pkts--, overq++);
317 rxi_NeedMorePackets = TRUE;
318 if (rx_stats_active) {
320 case RX_PACKET_CLASS_RECEIVE:
321 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_SEND:
324 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SPECIAL:
327 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
329 case RX_PACKET_CLASS_RECV_CBUF:
330 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
332 case RX_PACKET_CLASS_SEND_CBUF:
333 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
339 if (rx_nFreePackets < num_pkts)
340 num_pkts = rx_nFreePackets;
343 rxi_NeedMorePackets = TRUE;
347 if (rx_nFreePackets < num_pkts) {
348 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
352 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
354 i++, c=queue_Next(c, rx_packet)) {
358 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
360 rx_nFreePackets -= num_pkts;
365 MUTEX_EXIT(&rx_freePktQ_lock);
370 #endif /* RX_ENABLE_TSFPQ */
373 * Free a packet currently used as a continuation buffer
375 #ifdef RX_ENABLE_TSFPQ
376 /* num_pkts=0 means queue length is unknown */
378 rxi_FreePackets(int num_pkts, struct rx_queue * q)
380 struct rx_ts_info_t * rx_ts_info;
381 struct rx_packet *c, *nc;
384 osi_Assert(num_pkts >= 0);
385 RX_TS_INFO_GET(rx_ts_info);
388 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
389 rxi_FreeDataBufsTSFPQ(c, 2, 0);
392 for (queue_Scan(q, c, nc, rx_packet)) {
393 rxi_FreeDataBufsTSFPQ(c, 2, 0);
398 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
401 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
403 MUTEX_ENTER(&rx_freePktQ_lock);
405 RX_TS_FPQ_LTOG(rx_ts_info);
407 /* Wakeup anyone waiting for packets */
410 MUTEX_EXIT(&rx_freePktQ_lock);
416 #else /* RX_ENABLE_TSFPQ */
417 /* num_pkts=0 means queue length is unknown */
419 rxi_FreePackets(int num_pkts, struct rx_queue *q)
422 struct rx_packet *p, *np;
426 osi_Assert(num_pkts >= 0);
430 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
431 if (p->niovecs > 2) {
432 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
439 for (queue_Scan(q, p, np, rx_packet)) {
440 if (p->niovecs > 2) {
441 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
448 queue_SpliceAppend(q, &cbs);
454 MUTEX_ENTER(&rx_freePktQ_lock);
456 queue_SpliceAppend(&rx_freePacketQueue, q);
457 rx_nFreePackets += qlen;
459 /* Wakeup anyone waiting for packets */
462 MUTEX_EXIT(&rx_freePktQ_lock);
467 #endif /* RX_ENABLE_TSFPQ */
469 /* this one is kind of awful.
470 * In rxkad, the packet has been all shortened, and everything, ready for
471 * sending. All of a sudden, we discover we need some of that space back.
472 * This isn't terribly general, because it knows that the packets are only
473 * rounded up to the EBS (userdata + security header).
476 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
480 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
481 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
482 p->wirevec[i].iov_len += nb;
486 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
487 p->wirevec[i].iov_len += nb;
495 /* get sufficient space to store nb bytes of data (or more), and hook
496 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
497 * returns the number of bytes >0 which it failed to come up with.
498 * Don't need to worry about locking on packet, since only
499 * one thread can manipulate one at a time. Locking on continution
500 * packets is handled by AllocPacketBufs */
501 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
503 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
507 struct rx_packet *cb, *ncb;
509 /* compute the number of cbuf's we need */
510 nv = nb / RX_CBUFFERSIZE;
511 if ((nv * RX_CBUFFERSIZE) < nb)
513 if ((nv + p->niovecs) > RX_MAXWVECS)
514 nv = RX_MAXWVECS - p->niovecs;
518 /* allocate buffers */
520 nv = AllocPacketBufs(class, nv, &q);
522 /* setup packet iovs */
523 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
525 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
526 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
529 nb -= (nv * RX_CBUFFERSIZE);
530 p->length += (nv * RX_CBUFFERSIZE);
536 /* Add more packet buffers */
537 #ifdef RX_ENABLE_TSFPQ
539 rxi_MorePackets(int apackets)
541 struct rx_packet *p, *e;
542 struct rx_ts_info_t * rx_ts_info;
546 getme = apackets * sizeof(struct rx_packet);
547 p = (struct rx_packet *)osi_Alloc(getme);
550 PIN(p, getme); /* XXXXX */
552 RX_TS_INFO_GET(rx_ts_info);
554 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
555 /* TSFPQ patch also needs to keep track of total packets */
557 MUTEX_ENTER(&rx_packets_mutex);
558 rx_nPackets += apackets;
559 RX_TS_FPQ_COMPUTE_LIMITS;
560 MUTEX_EXIT(&rx_packets_mutex);
562 for (e = p + apackets; p < e; p++) {
563 RX_PACKET_IOV_INIT(p);
566 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
569 MUTEX_ENTER(&rx_freePktQ_lock);
570 #ifdef RXDEBUG_PACKET
571 p->packetId = rx_packet_id++;
572 p->allNextp = rx_mallocedP;
573 #endif /* RXDEBUG_PACKET */
575 MUTEX_EXIT(&rx_freePktQ_lock);
578 rx_ts_info->_FPQ.delta += apackets;
580 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
582 MUTEX_ENTER(&rx_freePktQ_lock);
584 RX_TS_FPQ_LTOG(rx_ts_info);
585 rxi_NeedMorePackets = FALSE;
588 MUTEX_EXIT(&rx_freePktQ_lock);
592 #else /* RX_ENABLE_TSFPQ */
594 rxi_MorePackets(int apackets)
596 struct rx_packet *p, *e;
600 getme = apackets * sizeof(struct rx_packet);
601 p = (struct rx_packet *)osi_Alloc(getme);
604 PIN(p, getme); /* XXXXX */
607 MUTEX_ENTER(&rx_freePktQ_lock);
609 for (e = p + apackets; p < e; p++) {
610 RX_PACKET_IOV_INIT(p);
611 #ifdef RX_TRACK_PACKETS
612 p->flags |= RX_PKTFLAG_FREE;
616 queue_Append(&rx_freePacketQueue, p);
617 #ifdef RXDEBUG_PACKET
618 p->packetId = rx_packet_id++;
619 p->allNextp = rx_mallocedP;
620 #endif /* RXDEBUG_PACKET */
624 rx_nPackets += apackets;
625 rx_nFreePackets += apackets;
626 rxi_NeedMorePackets = FALSE;
629 MUTEX_EXIT(&rx_freePktQ_lock);
632 #endif /* RX_ENABLE_TSFPQ */
634 #ifdef RX_ENABLE_TSFPQ
636 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
638 struct rx_packet *p, *e;
639 struct rx_ts_info_t * rx_ts_info;
643 getme = apackets * sizeof(struct rx_packet);
644 p = (struct rx_packet *)osi_Alloc(getme);
646 PIN(p, getme); /* XXXXX */
648 RX_TS_INFO_GET(rx_ts_info);
650 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
651 /* TSFPQ patch also needs to keep track of total packets */
652 MUTEX_ENTER(&rx_packets_mutex);
653 rx_nPackets += apackets;
654 RX_TS_FPQ_COMPUTE_LIMITS;
655 MUTEX_EXIT(&rx_packets_mutex);
657 for (e = p + apackets; p < e; p++) {
658 RX_PACKET_IOV_INIT(p);
660 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
663 MUTEX_ENTER(&rx_freePktQ_lock);
664 #ifdef RXDEBUG_PACKET
665 p->packetId = rx_packet_id++;
666 p->allNextp = rx_mallocedP;
667 #endif /* RXDEBUG_PACKET */
669 MUTEX_EXIT(&rx_freePktQ_lock);
672 rx_ts_info->_FPQ.delta += apackets;
675 (num_keep_local < apackets)) {
677 MUTEX_ENTER(&rx_freePktQ_lock);
679 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
680 rxi_NeedMorePackets = FALSE;
683 MUTEX_EXIT(&rx_freePktQ_lock);
687 #endif /* RX_ENABLE_TSFPQ */
690 /* Add more packet buffers */
692 rxi_MorePacketsNoLock(int apackets)
694 #ifdef RX_ENABLE_TSFPQ
695 struct rx_ts_info_t * rx_ts_info;
696 #endif /* RX_ENABLE_TSFPQ */
697 struct rx_packet *p, *e;
700 /* allocate enough packets that 1/4 of the packets will be able
701 * to hold maximal amounts of data */
702 apackets += (apackets / 4)
703 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
705 getme = apackets * sizeof(struct rx_packet);
706 p = (struct rx_packet *)osi_Alloc(getme);
708 apackets -= apackets / 4;
709 osi_Assert(apackets > 0);
714 #ifdef RX_ENABLE_TSFPQ
715 RX_TS_INFO_GET(rx_ts_info);
716 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
717 #endif /* RX_ENABLE_TSFPQ */
719 for (e = p + apackets; p < e; p++) {
720 RX_PACKET_IOV_INIT(p);
721 #ifdef RX_TRACK_PACKETS
722 p->flags |= RX_PKTFLAG_FREE;
726 queue_Append(&rx_freePacketQueue, p);
727 #ifdef RXDEBUG_PACKET
728 p->packetId = rx_packet_id++;
729 p->allNextp = rx_mallocedP;
730 #endif /* RXDEBUG_PACKET */
734 rx_nFreePackets += apackets;
735 MUTEX_ENTER(&rx_packets_mutex);
736 rx_nPackets += apackets;
737 #ifdef RX_ENABLE_TSFPQ
738 RX_TS_FPQ_COMPUTE_LIMITS;
739 #endif /* RX_ENABLE_TSFPQ */
740 MUTEX_EXIT(&rx_packets_mutex);
741 rxi_NeedMorePackets = FALSE;
747 rxi_FreeAllPackets(void)
749 /* must be called at proper interrupt level, etcetera */
750 /* MTUXXX need to free all Packets */
751 osi_Free(rx_mallocedP,
752 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
753 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
756 #ifdef RX_ENABLE_TSFPQ
758 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
760 struct rx_ts_info_t * rx_ts_info;
764 RX_TS_INFO_GET(rx_ts_info);
766 if (num_keep_local != rx_ts_info->_FPQ.len) {
768 MUTEX_ENTER(&rx_freePktQ_lock);
769 if (num_keep_local < rx_ts_info->_FPQ.len) {
770 xfer = rx_ts_info->_FPQ.len - num_keep_local;
771 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
774 xfer = num_keep_local - rx_ts_info->_FPQ.len;
775 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
776 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
777 if (rx_nFreePackets < xfer) {
778 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
780 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
782 MUTEX_EXIT(&rx_freePktQ_lock);
788 rxi_FlushLocalPacketsTSFPQ(void)
790 rxi_AdjustLocalPacketsTSFPQ(0, 0);
792 #endif /* RX_ENABLE_TSFPQ */
794 /* Allocate more packets iff we need more continuation buffers */
795 /* In kernel, can't page in memory with interrupts disabled, so we
796 * don't use the event mechanism. */
798 rx_CheckPackets(void)
800 if (rxi_NeedMorePackets) {
801 rxi_MorePackets(rx_maxSendWindow);
805 /* In the packet freeing routine below, the assumption is that
806 we want all of the packets to be used equally frequently, so that we
807 don't get packet buffers paging out. It would be just as valid to
808 assume that we DO want them to page out if not many are being used.
809 In any event, we assume the former, and append the packets to the end
811 /* This explanation is bogus. The free list doesn't remain in any kind of
812 useful order for afs_int32: the packets in use get pretty much randomly scattered
813 across all the pages. In order to permit unused {packets,bufs} to page out, they
814 must be stored so that packets which are adjacent in memory are adjacent in the
815 free list. An array springs rapidly to mind.
818 /* Actually free the packet p. */
819 #ifdef RX_ENABLE_TSFPQ
821 rxi_FreePacketNoLock(struct rx_packet *p)
823 struct rx_ts_info_t * rx_ts_info;
824 dpf(("Free %"AFS_PTR_FMT"\n", p));
826 RX_TS_INFO_GET(rx_ts_info);
827 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
828 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
829 RX_TS_FPQ_LTOG(rx_ts_info);
832 #else /* RX_ENABLE_TSFPQ */
834 rxi_FreePacketNoLock(struct rx_packet *p)
836 dpf(("Free %"AFS_PTR_FMT"\n", p));
840 queue_Append(&rx_freePacketQueue, p);
842 #endif /* RX_ENABLE_TSFPQ */
844 #ifdef RX_ENABLE_TSFPQ
846 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
848 struct rx_ts_info_t * rx_ts_info;
849 dpf(("Free %"AFS_PTR_FMT"\n", p));
851 RX_TS_INFO_GET(rx_ts_info);
852 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
854 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
856 MUTEX_ENTER(&rx_freePktQ_lock);
858 RX_TS_FPQ_LTOG(rx_ts_info);
860 /* Wakeup anyone waiting for packets */
863 MUTEX_EXIT(&rx_freePktQ_lock);
867 #endif /* RX_ENABLE_TSFPQ */
870 * free continuation buffers off a packet into a queue
872 * [IN] p -- packet from which continuation buffers will be freed
873 * [IN] first -- iovec offset of first continuation buffer to free
874 * [IN] q -- queue into which continuation buffers will be chained
877 * number of continuation buffers freed
879 #ifndef RX_ENABLE_TSFPQ
881 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
884 struct rx_packet * cb;
887 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
888 iov = &p->wirevec[first];
890 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
891 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
892 RX_FPQ_MARK_FREE(cb);
903 * free packet continuation buffers into the global free packet pool
905 * [IN] p -- packet from which to free continuation buffers
906 * [IN] first -- iovec offset of first continuation buffer to free
912 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
916 for (first = MAX(2, first); first < p->niovecs; first++) {
917 iov = &p->wirevec[first];
919 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
920 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
928 #ifdef RX_ENABLE_TSFPQ
930 * free packet continuation buffers into the thread-local free pool
932 * [IN] p -- packet from which continuation buffers will be freed
933 * [IN] first -- iovec offset of first continuation buffer to free
934 * any value less than 2, the min number of iovecs,
935 * is treated as if it is 2.
936 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
937 * global free pool before returning
943 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
946 struct rx_ts_info_t * rx_ts_info;
948 RX_TS_INFO_GET(rx_ts_info);
950 for (first = MAX(2, first); first < p->niovecs; first++) {
951 iov = &p->wirevec[first];
953 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
954 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
959 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
961 MUTEX_ENTER(&rx_freePktQ_lock);
963 RX_TS_FPQ_LTOG(rx_ts_info);
965 /* Wakeup anyone waiting for packets */
968 MUTEX_EXIT(&rx_freePktQ_lock);
973 #endif /* RX_ENABLE_TSFPQ */
975 int rxi_nBadIovecs = 0;
977 /* rxi_RestoreDataBufs
979 * Restore the correct sizes to the iovecs. Called when reusing a packet
980 * for reading off the wire.
983 rxi_RestoreDataBufs(struct rx_packet *p)
986 struct iovec *iov = &p->wirevec[2];
988 RX_PACKET_IOV_INIT(p);
990 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
991 if (!iov->iov_base) {
996 iov->iov_len = RX_CBUFFERSIZE;
1000 #ifdef RX_ENABLE_TSFPQ
1002 rxi_TrimDataBufs(struct rx_packet *p, int first)
1005 struct iovec *iov, *end;
1006 struct rx_ts_info_t * rx_ts_info;
1010 osi_Panic("TrimDataBufs 1: first must be 1");
1012 /* Skip over continuation buffers containing message data */
1013 iov = &p->wirevec[2];
1014 end = iov + (p->niovecs - 2);
1015 length = p->length - p->wirevec[1].iov_len;
1016 for (; iov < end && length > 0; iov++) {
1018 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1019 length -= iov->iov_len;
1022 /* iov now points to the first empty data buffer. */
1026 RX_TS_INFO_GET(rx_ts_info);
1027 for (; iov < end; iov++) {
1029 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1030 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1033 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1035 MUTEX_ENTER(&rx_freePktQ_lock);
1037 RX_TS_FPQ_LTOG(rx_ts_info);
1038 rxi_PacketsUnWait();
1040 MUTEX_EXIT(&rx_freePktQ_lock);
1046 #else /* RX_ENABLE_TSFPQ */
1048 rxi_TrimDataBufs(struct rx_packet *p, int first)
1051 struct iovec *iov, *end;
1055 osi_Panic("TrimDataBufs 1: first must be 1");
1057 /* Skip over continuation buffers containing message data */
1058 iov = &p->wirevec[2];
1059 end = iov + (p->niovecs - 2);
1060 length = p->length - p->wirevec[1].iov_len;
1061 for (; iov < end && length > 0; iov++) {
1063 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1064 length -= iov->iov_len;
1067 /* iov now points to the first empty data buffer. */
1072 MUTEX_ENTER(&rx_freePktQ_lock);
1074 for (; iov < end; iov++) {
1076 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1077 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1080 rxi_PacketsUnWait();
1082 MUTEX_EXIT(&rx_freePktQ_lock);
1087 #endif /* RX_ENABLE_TSFPQ */
1089 /* Free the packet p. P is assumed not to be on any queue, i.e.
1090 * remove it yourself first if you call this routine. */
1091 #ifdef RX_ENABLE_TSFPQ
1093 rxi_FreePacket(struct rx_packet *p)
1095 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1096 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1098 #else /* RX_ENABLE_TSFPQ */
1100 rxi_FreePacket(struct rx_packet *p)
1105 MUTEX_ENTER(&rx_freePktQ_lock);
1107 rxi_FreeDataBufsNoLock(p, 2);
1108 rxi_FreePacketNoLock(p);
1109 /* Wakeup anyone waiting for packets */
1110 rxi_PacketsUnWait();
1112 MUTEX_EXIT(&rx_freePktQ_lock);
1115 #endif /* RX_ENABLE_TSFPQ */
1117 /* rxi_AllocPacket sets up p->length so it reflects the number of
1118 * bytes in the packet at this point, **not including** the header.
1119 * The header is absolutely necessary, besides, this is the way the
1120 * length field is usually used */
1121 #ifdef RX_ENABLE_TSFPQ
1123 rxi_AllocPacketNoLock(int class)
1125 struct rx_packet *p;
1126 struct rx_ts_info_t * rx_ts_info;
1128 RX_TS_INFO_GET(rx_ts_info);
1131 if (rxi_OverQuota(class)) {
1132 rxi_NeedMorePackets = TRUE;
1133 if (rx_stats_active) {
1135 case RX_PACKET_CLASS_RECEIVE:
1136 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1138 case RX_PACKET_CLASS_SEND:
1139 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1141 case RX_PACKET_CLASS_SPECIAL:
1142 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1144 case RX_PACKET_CLASS_RECV_CBUF:
1145 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1147 case RX_PACKET_CLASS_SEND_CBUF:
1148 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1152 return (struct rx_packet *)0;
1156 if (rx_stats_active)
1157 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1158 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1161 if (queue_IsEmpty(&rx_freePacketQueue))
1162 osi_Panic("rxi_AllocPacket error");
1164 if (queue_IsEmpty(&rx_freePacketQueue))
1165 rxi_MorePacketsNoLock(rx_maxSendWindow);
1169 RX_TS_FPQ_GTOL(rx_ts_info);
1172 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1174 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1177 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1178 * order to truncate outbound packets. In the near future, may need
1179 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1181 RX_PACKET_IOV_FULLINIT(p);
1184 #else /* RX_ENABLE_TSFPQ */
1186 rxi_AllocPacketNoLock(int class)
1188 struct rx_packet *p;
1191 if (rxi_OverQuota(class)) {
1192 rxi_NeedMorePackets = TRUE;
1193 if (rx_stats_active) {
1195 case RX_PACKET_CLASS_RECEIVE:
1196 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1198 case RX_PACKET_CLASS_SEND:
1199 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1201 case RX_PACKET_CLASS_SPECIAL:
1202 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1204 case RX_PACKET_CLASS_RECV_CBUF:
1205 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1207 case RX_PACKET_CLASS_SEND_CBUF:
1208 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1212 return (struct rx_packet *)0;
1216 if (rx_stats_active)
1217 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1220 if (queue_IsEmpty(&rx_freePacketQueue))
1221 osi_Panic("rxi_AllocPacket error");
1223 if (queue_IsEmpty(&rx_freePacketQueue))
1224 rxi_MorePacketsNoLock(rx_maxSendWindow);
1228 p = queue_First(&rx_freePacketQueue, rx_packet);
1230 RX_FPQ_MARK_USED(p);
1232 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1235 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1236 * order to truncate outbound packets. In the near future, may need
1237 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1239 RX_PACKET_IOV_FULLINIT(p);
1242 #endif /* RX_ENABLE_TSFPQ */
1244 #ifdef RX_ENABLE_TSFPQ
1246 rxi_AllocPacketTSFPQ(int class, int pull_global)
1248 struct rx_packet *p;
1249 struct rx_ts_info_t * rx_ts_info;
1251 RX_TS_INFO_GET(rx_ts_info);
1253 if (rx_stats_active)
1254 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1255 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1256 MUTEX_ENTER(&rx_freePktQ_lock);
1258 if (queue_IsEmpty(&rx_freePacketQueue))
1259 rxi_MorePacketsNoLock(rx_maxSendWindow);
1261 RX_TS_FPQ_GTOL(rx_ts_info);
1263 MUTEX_EXIT(&rx_freePktQ_lock);
1264 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1268 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1270 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1272 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1273 * order to truncate outbound packets. In the near future, may need
1274 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1276 RX_PACKET_IOV_FULLINIT(p);
1279 #endif /* RX_ENABLE_TSFPQ */
1281 #ifdef RX_ENABLE_TSFPQ
1283 rxi_AllocPacket(int class)
1285 struct rx_packet *p;
1287 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1290 #else /* RX_ENABLE_TSFPQ */
1292 rxi_AllocPacket(int class)
1294 struct rx_packet *p;
1296 MUTEX_ENTER(&rx_freePktQ_lock);
1297 p = rxi_AllocPacketNoLock(class);
1298 MUTEX_EXIT(&rx_freePktQ_lock);
1301 #endif /* RX_ENABLE_TSFPQ */
1303 /* This guy comes up with as many buffers as it {takes,can get} given
1304 * the MTU for this call. It also sets the packet length before
1305 * returning. caution: this is often called at NETPRI
1306 * Called with call locked.
1309 rxi_AllocSendPacket(struct rx_call *call, int want)
1311 struct rx_packet *p = (struct rx_packet *)0;
1316 mud = call->MTU - RX_HEADER_SIZE;
1318 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1319 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1321 #ifdef RX_ENABLE_TSFPQ
1322 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1324 want = MIN(want, mud);
1326 if ((unsigned)want > p->length)
1327 (void)rxi_AllocDataBuf(p, (want - p->length),
1328 RX_PACKET_CLASS_SEND_CBUF);
1330 if (p->length > mud)
1333 if (delta >= p->length) {
1341 #endif /* RX_ENABLE_TSFPQ */
1343 while (!(call->error)) {
1344 MUTEX_ENTER(&rx_freePktQ_lock);
1345 /* if an error occurred, or we get the packet we want, we're done */
1346 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1347 MUTEX_EXIT(&rx_freePktQ_lock);
1350 want = MIN(want, mud);
1352 if ((unsigned)want > p->length)
1353 (void)rxi_AllocDataBuf(p, (want - p->length),
1354 RX_PACKET_CLASS_SEND_CBUF);
1356 if (p->length > mud)
1359 if (delta >= p->length) {
1368 /* no error occurred, and we didn't get a packet, so we sleep.
1369 * At this point, we assume that packets will be returned
1370 * sooner or later, as packets are acknowledged, and so we
1373 call->flags |= RX_CALL_WAIT_PACKETS;
1374 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1375 MUTEX_EXIT(&call->lock);
1376 rx_waitingForPackets = 1;
1378 #ifdef RX_ENABLE_LOCKS
1379 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1381 osi_rxSleep(&rx_waitingForPackets);
1383 MUTEX_EXIT(&rx_freePktQ_lock);
1384 MUTEX_ENTER(&call->lock);
1385 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1386 call->flags &= ~RX_CALL_WAIT_PACKETS;
1395 /* Windows does not use file descriptors. */
1396 #define CountFDs(amax) 0
1398 /* count the number of used FDs */
1407 for (i = 0; i < amax; i++) {
1408 code = fstat(i, &tstat);
1414 #endif /* AFS_NT40_ENV */
1417 #define CountFDs(amax) amax
1421 #if !defined(KERNEL) || defined(UKERNEL)
1423 /* This function reads a single packet from the interface into the
1424 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1425 * (host,port) of the sender are stored in the supplied variables, and
1426 * the data length of the packet is stored in the packet structure.
1427 * The header is decoded. */
1429 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1432 struct sockaddr_in from;
1433 unsigned int nbytes;
1435 afs_uint32 tlen, savelen;
1437 rx_computelen(p, tlen);
1438 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1440 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1441 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1442 * it once in order to avoid races. */
1445 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1453 /* Extend the last iovec for padding, it's just to make sure that the
1454 * read doesn't return more data than we expect, and is done to get around
1455 * our problems caused by the lack of a length field in the rx header.
1456 * Use the extra buffer that follows the localdata in each packet
1458 savelen = p->wirevec[p->niovecs - 1].iov_len;
1459 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1461 memset(&msg, 0, sizeof(msg));
1462 msg.msg_name = (char *)&from;
1463 msg.msg_namelen = sizeof(struct sockaddr_in);
1464 msg.msg_iov = p->wirevec;
1465 msg.msg_iovlen = p->niovecs;
1466 nbytes = rxi_Recvmsg(socket, &msg, 0);
1468 /* restore the vec to its correct state */
1469 p->wirevec[p->niovecs - 1].iov_len = savelen;
1471 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1472 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1473 if (nbytes < 0 && errno == EWOULDBLOCK) {
1474 if (rx_stats_active)
1475 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1476 } else if (nbytes <= 0) {
1477 if (rx_stats_active) {
1478 MUTEX_ENTER(&rx_stats_mutex);
1479 rx_stats.bogusPacketOnRead++;
1480 rx_stats.bogusHost = from.sin_addr.s_addr;
1481 MUTEX_EXIT(&rx_stats_mutex);
1483 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1484 ntohs(from.sin_port), nbytes));
1489 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1490 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1491 rxi_DecodePacketHeader(p);
1493 *host = from.sin_addr.s_addr;
1494 *port = from.sin_port;
1496 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1497 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1498 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1500 #ifdef RX_TRIMDATABUFS
1501 rxi_TrimDataBufs(p, 1);
1507 /* Extract packet header. */
1508 rxi_DecodePacketHeader(p);
1510 *host = from.sin_addr.s_addr;
1511 *port = from.sin_port;
1512 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1513 if (rx_stats_active) {
1514 struct rx_peer *peer;
1515 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1517 * Try to look up this peer structure. If it doesn't exist,
1518 * don't create a new one -
1519 * we don't keep count of the bytes sent/received if a peer
1520 * structure doesn't already exist.
1522 * The peer/connection cleanup code assumes that there is 1 peer
1523 * per connection. If we actually created a peer structure here
1524 * and this packet was an rxdebug packet, the peer structure would
1525 * never be cleaned up.
1527 peer = rxi_FindPeer(*host, *port, 0, 0);
1528 /* Since this may not be associated with a connection,
1529 * it may have no refCount, meaning we could race with
1532 if (peer && (peer->refCount > 0)) {
1533 MUTEX_ENTER(&peer->peer_lock);
1534 hadd32(peer->bytesReceived, p->length);
1535 MUTEX_EXIT(&peer->peer_lock);
1540 #ifdef RX_TRIMDATABUFS
1541 /* Free any empty packet buffers at the end of this packet */
1542 rxi_TrimDataBufs(p, 1);
1548 #endif /* !KERNEL || UKERNEL */
1550 /* This function splits off the first packet in a jumbo packet.
1551 * As of AFS 3.5, jumbograms contain more than one fixed size
1552 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1553 * last packet header. All packets (except the last) are padded to
1554 * fall on RX_CBUFFERSIZE boundaries.
1555 * HACK: We store the length of the first n-1 packets in the
1556 * last two pad bytes. */
1559 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1562 struct rx_packet *np;
1563 struct rx_jumboHeader *jp;
1569 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1570 * bytes in length. All but the first packet are preceded by
1571 * an abbreviated four byte header. The length of the last packet
1572 * is calculated from the size of the jumbogram. */
1573 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1575 if ((int)p->length < length) {
1576 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1579 niov = p->niovecs - 2;
1581 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1584 iov = &p->wirevec[2];
1585 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1587 /* Get a pointer to the abbreviated packet header */
1588 jp = (struct rx_jumboHeader *)
1589 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1591 /* Set up the iovecs for the next packet */
1592 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1593 np->wirevec[0].iov_len = sizeof(struct rx_header);
1594 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1595 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1596 np->niovecs = niov + 1;
1597 for (i = 2, iov++; i <= niov; i++, iov++) {
1598 np->wirevec[i] = *iov;
1600 np->length = p->length - length;
1601 p->length = RX_JUMBOBUFFERSIZE;
1604 /* Convert the jumbo packet header to host byte order */
1605 temp = ntohl(*(afs_uint32 *) jp);
1606 jp->flags = (u_char) (temp >> 24);
1607 jp->cksum = (u_short) (temp);
1609 /* Fill in the packet header */
1610 np->header = p->header;
1611 np->header.serial = p->header.serial + 1;
1612 np->header.seq = p->header.seq + 1;
1613 np->header.flags = jp->flags;
1614 np->header.spare = jp->cksum;
1620 /* Send a udp datagram */
1622 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1623 int length, int istack)
1628 memset(&msg, 0, sizeof(msg));
1630 msg.msg_iovlen = nvecs;
1631 msg.msg_name = addr;
1632 msg.msg_namelen = sizeof(struct sockaddr_in);
1634 ret = rxi_Sendmsg(socket, &msg, 0);
1638 #elif !defined(UKERNEL)
1640 * message receipt is done in rxk_input or rx_put.
1643 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1645 * Copy an mblock to the contiguous area pointed to by cp.
1646 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1647 * but it doesn't really.
1648 * Returns the number of bytes not transferred.
1649 * The message is NOT changed.
1652 cpytoc(mblk_t * mp, int off, int len, char *cp)
1656 for (; mp && len > 0; mp = mp->b_cont) {
1657 if (mp->b_datap->db_type != M_DATA) {
1660 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1661 memcpy(cp, (char *)mp->b_rptr, n);
1669 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1670 * but it doesn't really.
1671 * This sucks, anyway, do it like m_cpy.... below
1674 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1679 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1680 if (mp->b_datap->db_type != M_DATA) {
1683 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1689 t = iovs[i].iov_len;
1692 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1702 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1703 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1705 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1707 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1710 unsigned int l1, l2, i, t;
1712 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1713 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1716 if (m->m_len <= off) {
1726 p1 = mtod(m, caddr_t) + off;
1727 l1 = m->m_len - off;
1729 p2 = iovs[0].iov_base;
1730 l2 = iovs[0].iov_len;
1733 t = MIN(l1, MIN(l2, (unsigned int)len));
1744 p1 = mtod(m, caddr_t);
1750 p2 = iovs[i].iov_base;
1751 l2 = iovs[i].iov_len;
1759 #endif /* AFS_SUN5_ENV */
1761 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1763 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1764 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1770 struct rx_packet *phandle;
1771 int hdr_len, data_len;
1776 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1783 #endif /*KERNEL && !UKERNEL */
1786 /* send a response to a debug packet */
1789 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1790 afs_uint32 ahost, short aport, int istack)
1792 struct rx_debugIn tin;
1794 struct rx_serverQueueEntry *np, *nqe;
1797 * Only respond to client-initiated Rx debug packets,
1798 * and clear the client flag in the response.
1800 if (ap->header.flags & RX_CLIENT_INITIATED) {
1801 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1802 rxi_EncodePacketHeader(ap);
1807 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1808 /* all done with packet, now set length to the truth, so we can
1809 * reuse this packet */
1810 rx_computelen(ap, ap->length);
1812 tin.type = ntohl(tin.type);
1813 tin.index = ntohl(tin.index);
1815 case RX_DEBUGI_GETSTATS:{
1816 struct rx_debugStats tstat;
1818 /* get basic stats */
1819 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1820 tstat.version = RX_DEBUGI_VERSION;
1821 #ifndef RX_ENABLE_LOCKS
1822 tstat.waitingForPackets = rx_waitingForPackets;
1824 MUTEX_ENTER(&rx_serverPool_lock);
1825 tstat.nFreePackets = htonl(rx_nFreePackets);
1826 tstat.nPackets = htonl(rx_nPackets);
1827 tstat.callsExecuted = htonl(rxi_nCalls);
1828 tstat.packetReclaims = htonl(rx_packetReclaims);
1829 tstat.usedFDs = CountFDs(64);
1830 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1831 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1832 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1834 MUTEX_EXIT(&rx_serverPool_lock);
1835 tstat.idleThreads = htonl(tstat.idleThreads);
1836 tl = sizeof(struct rx_debugStats) - ap->length;
1838 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1841 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1843 ap->length = sizeof(struct rx_debugStats);
1844 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1845 rx_computelen(ap, ap->length);
1850 case RX_DEBUGI_GETALLCONN:
1851 case RX_DEBUGI_GETCONN:{
1853 struct rx_connection *tc;
1854 struct rx_call *tcall;
1855 struct rx_debugConn tconn;
1856 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1859 tl = sizeof(struct rx_debugConn) - ap->length;
1861 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1865 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1866 /* get N'th (maybe) "interesting" connection info */
1867 for (i = 0; i < rx_hashTableSize; i++) {
1868 #if !defined(KERNEL)
1869 /* the time complexity of the algorithm used here
1870 * exponentially increses with the number of connections.
1872 #ifdef AFS_PTHREAD_ENV
1878 MUTEX_ENTER(&rx_connHashTable_lock);
1879 /* We might be slightly out of step since we are not
1880 * locking each call, but this is only debugging output.
1882 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1883 if ((all || rxi_IsConnInteresting(tc))
1884 && tin.index-- <= 0) {
1885 tconn.host = tc->peer->host;
1886 tconn.port = tc->peer->port;
1887 tconn.cid = htonl(tc->cid);
1888 tconn.epoch = htonl(tc->epoch);
1889 tconn.serial = htonl(tc->serial);
1890 for (j = 0; j < RX_MAXCALLS; j++) {
1891 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1892 if ((tcall = tc->call[j])) {
1893 tconn.callState[j] = tcall->state;
1894 tconn.callMode[j] = tcall->mode;
1895 tconn.callFlags[j] = tcall->flags;
1896 if (queue_IsNotEmpty(&tcall->rq))
1897 tconn.callOther[j] |= RX_OTHER_IN;
1898 if (queue_IsNotEmpty(&tcall->tq))
1899 tconn.callOther[j] |= RX_OTHER_OUT;
1901 tconn.callState[j] = RX_STATE_NOTINIT;
1904 tconn.natMTU = htonl(tc->peer->natMTU);
1905 tconn.error = htonl(tc->error);
1906 tconn.flags = tc->flags;
1907 tconn.type = tc->type;
1908 tconn.securityIndex = tc->securityIndex;
1909 if (tc->securityObject) {
1910 RXS_GetStats(tc->securityObject, tc,
1912 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1913 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1916 DOHTONL(packetsReceived);
1917 DOHTONL(packetsSent);
1918 DOHTONL(bytesReceived);
1922 sizeof(tconn.secStats.spares) /
1927 sizeof(tconn.secStats.sparel) /
1928 sizeof(afs_int32); i++)
1932 MUTEX_EXIT(&rx_connHashTable_lock);
1933 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1936 ap->length = sizeof(struct rx_debugConn);
1937 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1943 MUTEX_EXIT(&rx_connHashTable_lock);
1945 /* if we make it here, there are no interesting packets */
1946 tconn.cid = htonl(0xffffffff); /* means end */
1947 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1950 ap->length = sizeof(struct rx_debugConn);
1951 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1957 * Pass back all the peer structures we have available
1960 case RX_DEBUGI_GETPEER:{
1963 struct rx_debugPeer tpeer;
1966 tl = sizeof(struct rx_debugPeer) - ap->length;
1968 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1972 memset(&tpeer, 0, sizeof(tpeer));
1973 for (i = 0; i < rx_hashTableSize; i++) {
1974 #if !defined(KERNEL)
1975 /* the time complexity of the algorithm used here
1976 * exponentially increses with the number of peers.
1978 * Yielding after processing each hash table entry
1979 * and dropping rx_peerHashTable_lock.
1980 * also increases the risk that we will miss a new
1981 * entry - but we are willing to live with this
1982 * limitation since this is meant for debugging only
1984 #ifdef AFS_PTHREAD_ENV
1990 MUTEX_ENTER(&rx_peerHashTable_lock);
1991 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1992 if (tin.index-- <= 0) {
1994 MUTEX_EXIT(&rx_peerHashTable_lock);
1996 MUTEX_ENTER(&tp->peer_lock);
1997 tpeer.host = tp->host;
1998 tpeer.port = tp->port;
1999 tpeer.ifMTU = htons(tp->ifMTU);
2000 tpeer.idleWhen = htonl(tp->idleWhen);
2001 tpeer.refCount = htons(tp->refCount);
2002 tpeer.burstSize = tp->burstSize;
2003 tpeer.burst = tp->burst;
2004 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
2005 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
2006 tpeer.rtt = htonl(tp->rtt);
2007 tpeer.rtt_dev = htonl(tp->rtt_dev);
2008 tpeer.timeout.sec = htonl(tp->timeout.sec);
2009 tpeer.timeout.usec = htonl(tp->timeout.usec);
2010 tpeer.nSent = htonl(tp->nSent);
2011 tpeer.reSends = htonl(tp->reSends);
2012 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2013 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2014 tpeer.rateFlag = htonl(tp->rateFlag);
2015 tpeer.natMTU = htons(tp->natMTU);
2016 tpeer.maxMTU = htons(tp->maxMTU);
2017 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2018 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2019 tpeer.MTU = htons(tp->MTU);
2020 tpeer.cwind = htons(tp->cwind);
2021 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2022 tpeer.congestSeq = htons(tp->congestSeq);
2023 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2024 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2025 tpeer.bytesReceived.high =
2026 htonl(tp->bytesReceived.high);
2027 tpeer.bytesReceived.low =
2028 htonl(tp->bytesReceived.low);
2029 MUTEX_EXIT(&tp->peer_lock);
2031 MUTEX_ENTER(&rx_peerHashTable_lock);
2033 MUTEX_EXIT(&rx_peerHashTable_lock);
2035 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2038 ap->length = sizeof(struct rx_debugPeer);
2039 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2045 MUTEX_EXIT(&rx_peerHashTable_lock);
2047 /* if we make it here, there are no interesting packets */
2048 tpeer.host = htonl(0xffffffff); /* means end */
2049 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2052 ap->length = sizeof(struct rx_debugPeer);
2053 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2058 case RX_DEBUGI_RXSTATS:{
2062 tl = sizeof(rx_stats) - ap->length;
2064 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2068 /* Since its all int32s convert to network order with a loop. */
2069 if (rx_stats_active)
2070 MUTEX_ENTER(&rx_stats_mutex);
2071 s = (afs_int32 *) & rx_stats;
2072 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2073 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2076 ap->length = sizeof(rx_stats);
2077 if (rx_stats_active)
2078 MUTEX_EXIT(&rx_stats_mutex);
2079 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2085 /* error response packet */
2086 tin.type = htonl(RX_DEBUGI_BADTYPE);
2087 tin.index = tin.type;
2088 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2090 ap->length = sizeof(struct rx_debugIn);
2091 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2099 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2100 afs_uint32 ahost, short aport, int istack)
2105 * Only respond to client-initiated version requests, and
2106 * clear that flag in the response.
2108 if (ap->header.flags & RX_CLIENT_INITIATED) {
2111 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2112 rxi_EncodePacketHeader(ap);
2113 memset(buf, 0, sizeof(buf));
2114 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2115 rx_packetwrite(ap, 0, 65, buf);
2118 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2126 /* send a debug packet back to the sender */
2128 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2129 afs_uint32 ahost, short aport, afs_int32 istack)
2131 struct sockaddr_in taddr;
2132 unsigned int i, nbytes, savelen = 0;
2135 int waslocked = ISAFS_GLOCK();
2138 taddr.sin_family = AF_INET;
2139 taddr.sin_port = aport;
2140 taddr.sin_addr.s_addr = ahost;
2141 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2142 taddr.sin_len = sizeof(struct sockaddr_in);
2145 /* We need to trim the niovecs. */
2146 nbytes = apacket->length;
2147 for (i = 1; i < apacket->niovecs; i++) {
2148 if (nbytes <= apacket->wirevec[i].iov_len) {
2149 savelen = apacket->wirevec[i].iov_len;
2150 saven = apacket->niovecs;
2151 apacket->wirevec[i].iov_len = nbytes;
2152 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2154 nbytes -= apacket->wirevec[i].iov_len;
2157 #ifdef RX_KERNEL_TRACE
2158 if (ICL_SETACTIVE(afs_iclSetp)) {
2161 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2162 "before osi_NetSend()");
2170 /* debug packets are not reliably delivered, hence the cast below. */
2171 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2172 apacket->length + RX_HEADER_SIZE, istack);
2174 #ifdef RX_KERNEL_TRACE
2175 if (ICL_SETACTIVE(afs_iclSetp)) {
2177 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2178 "after osi_NetSend()");
2187 if (saven) { /* means we truncated the packet above. */
2188 apacket->wirevec[i - 1].iov_len = savelen;
2189 apacket->niovecs = saven;
2194 /* Send the packet to appropriate destination for the specified
2195 * call. The header is first encoded and placed in the packet.
2198 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2199 struct rx_packet *p, int istack)
2205 struct sockaddr_in addr;
2206 struct rx_peer *peer = conn->peer;
2209 char deliveryType = 'S';
2211 /* The address we're sending the packet to */
2212 memset(&addr, 0, sizeof(addr));
2213 addr.sin_family = AF_INET;
2214 addr.sin_port = peer->port;
2215 addr.sin_addr.s_addr = peer->host;
2217 /* This stuff should be revamped, I think, so that most, if not
2218 * all, of the header stuff is always added here. We could
2219 * probably do away with the encode/decode routines. XXXXX */
2221 /* Stamp each packet with a unique serial number. The serial
2222 * number is maintained on a connection basis because some types
2223 * of security may be based on the serial number of the packet,
2224 * and security is handled on a per authenticated-connection
2226 /* Pre-increment, to guarantee no zero serial number; a zero
2227 * serial number means the packet was never sent. */
2228 MUTEX_ENTER(&conn->conn_data_lock);
2229 p->header.serial = ++conn->serial;
2230 if (p->length > conn->peer->maxPacketSize) {
2231 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2232 (p->header.flags & RX_REQUEST_ACK)) {
2233 conn->lastPingSize = p->length;
2234 conn->lastPingSizeSer = p->header.serial;
2235 } else if (p->header.seq != 0) {
2236 conn->lastPacketSize = p->length;
2237 conn->lastPacketSizeSeq = p->header.seq;
2240 MUTEX_EXIT(&conn->conn_data_lock);
2241 /* This is so we can adjust retransmit time-outs better in the face of
2242 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2244 if (p->firstSerial == 0) {
2245 p->firstSerial = p->header.serial;
2248 /* If an output tracer function is defined, call it with the packet and
2249 * network address. Note this function may modify its arguments. */
2250 if (rx_almostSent) {
2251 int drop = (*rx_almostSent) (p, &addr);
2252 /* drop packet if return value is non-zero? */
2254 deliveryType = 'D'; /* Drop the packet */
2258 /* Get network byte order header */
2259 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2260 * touch ALL the fields */
2262 /* Send the packet out on the same socket that related packets are being
2266 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2269 /* Possibly drop this packet, for testing purposes */
2270 if ((deliveryType == 'D')
2271 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2272 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2273 deliveryType = 'D'; /* Drop the packet */
2275 deliveryType = 'S'; /* Send the packet */
2276 #endif /* RXDEBUG */
2278 /* Loop until the packet is sent. We'd prefer just to use a
2279 * blocking socket, but unfortunately the interface doesn't
2280 * allow us to have the socket block in send mode, and not
2281 * block in receive mode */
2283 waslocked = ISAFS_GLOCK();
2284 #ifdef RX_KERNEL_TRACE
2285 if (ICL_SETACTIVE(afs_iclSetp)) {
2288 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2289 "before osi_NetSend()");
2298 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2299 p->length + RX_HEADER_SIZE, istack)) != 0) {
2300 /* send failed, so let's hurry up the resend, eh? */
2301 if (rx_stats_active)
2302 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2303 p->retryTime = p->timeSent; /* resend it very soon */
2304 clock_Addmsec(&(p->retryTime),
2305 10 + (((afs_uint32) p->backoff) << 8));
2306 /* Some systems are nice and tell us right away that we cannot
2307 * reach this recipient by returning an error code.
2308 * So, when this happens let's "down" the host NOW so
2309 * we don't sit around waiting for this host to timeout later.
2313 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2314 #elif defined(AFS_LINUX20_ENV)
2315 code == -ENETUNREACH
2316 #elif defined(AFS_DARWIN_ENV)
2317 code == EHOSTUNREACH
2322 call->lastReceiveTime = 0;
2325 #ifdef RX_KERNEL_TRACE
2326 if (ICL_SETACTIVE(afs_iclSetp)) {
2328 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2329 "after osi_NetSend()");
2340 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2341 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2342 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2343 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2345 if (rx_stats_active) {
2346 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2347 MUTEX_ENTER(&peer->peer_lock);
2348 hadd32(peer->bytesSent, p->length);
2349 MUTEX_EXIT(&peer->peer_lock);
2353 /* Send a list of packets to appropriate destination for the specified
2354 * connection. The headers are first encoded and placed in the packets.
2357 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2358 struct rx_packet **list, int len, int istack)
2360 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2363 struct sockaddr_in addr;
2364 struct rx_peer *peer = conn->peer;
2366 struct rx_packet *p = NULL;
2367 struct iovec wirevec[RX_MAXIOVECS];
2368 int i, length, code;
2371 struct rx_jumboHeader *jp;
2373 char deliveryType = 'S';
2375 /* The address we're sending the packet to */
2376 addr.sin_family = AF_INET;
2377 addr.sin_port = peer->port;
2378 addr.sin_addr.s_addr = peer->host;
2380 if (len + 1 > RX_MAXIOVECS) {
2381 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2385 * Stamp the packets in this jumbogram with consecutive serial numbers
2387 MUTEX_ENTER(&conn->conn_data_lock);
2388 serial = conn->serial;
2389 conn->serial += len;
2390 for (i = 0; i < len; i++) {
2392 if (p->length > conn->peer->maxPacketSize) {
2393 /* a ping *or* a sequenced packet can count */
2394 if ((p->length > conn->peer->maxPacketSize)) {
2395 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2396 (p->header.flags & RX_REQUEST_ACK)) &&
2397 ((i == 0) || (p->length >= conn->lastPingSize))) {
2398 conn->lastPingSize = p->length;
2399 conn->lastPingSizeSer = serial + i;
2400 } else if ((p->header.seq != 0) &&
2401 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2402 conn->lastPacketSize = p->length;
2403 conn->lastPacketSizeSeq = p->header.seq;
2408 MUTEX_EXIT(&conn->conn_data_lock);
2411 /* This stuff should be revamped, I think, so that most, if not
2412 * all, of the header stuff is always added here. We could
2413 * probably do away with the encode/decode routines. XXXXX */
2416 length = RX_HEADER_SIZE;
2417 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2418 wirevec[0].iov_len = RX_HEADER_SIZE;
2419 for (i = 0; i < len; i++) {
2422 /* The whole 3.5 jumbogram scheme relies on packets fitting
2423 * in a single packet buffer. */
2424 if (p->niovecs > 2) {
2425 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2428 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2431 if (p->length != RX_JUMBOBUFFERSIZE) {
2432 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2434 p->header.flags |= RX_JUMBO_PACKET;
2435 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2436 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2438 wirevec[i + 1].iov_len = p->length;
2439 length += p->length;
2441 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2443 /* Convert jumbo packet header to network byte order */
2444 temp = (afs_uint32) (p->header.flags) << 24;
2445 temp |= (afs_uint32) (p->header.spare);
2446 *(afs_uint32 *) jp = htonl(temp);
2448 jp = (struct rx_jumboHeader *)
2449 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2451 /* Stamp each packet with a unique serial number. The serial
2452 * number is maintained on a connection basis because some types
2453 * of security may be based on the serial number of the packet,
2454 * and security is handled on a per authenticated-connection
2456 /* Pre-increment, to guarantee no zero serial number; a zero
2457 * serial number means the packet was never sent. */
2458 p->header.serial = ++serial;
2459 /* This is so we can adjust retransmit time-outs better in the face of
2460 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2462 if (p->firstSerial == 0) {
2463 p->firstSerial = p->header.serial;
2466 /* If an output tracer function is defined, call it with the packet and
2467 * network address. Note this function may modify its arguments. */
2468 if (rx_almostSent) {
2469 int drop = (*rx_almostSent) (p, &addr);
2470 /* drop packet if return value is non-zero? */
2472 deliveryType = 'D'; /* Drop the packet */
2476 /* Get network byte order header */
2477 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2478 * touch ALL the fields */
2481 /* Send the packet out on the same socket that related packets are being
2485 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2488 /* Possibly drop this packet, for testing purposes */
2489 if ((deliveryType == 'D')
2490 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2491 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2492 deliveryType = 'D'; /* Drop the packet */
2494 deliveryType = 'S'; /* Send the packet */
2495 #endif /* RXDEBUG */
2497 /* Loop until the packet is sent. We'd prefer just to use a
2498 * blocking socket, but unfortunately the interface doesn't
2499 * allow us to have the socket block in send mode, and not
2500 * block in receive mode */
2501 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2502 waslocked = ISAFS_GLOCK();
2503 if (!istack && waslocked)
2507 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2509 /* send failed, so let's hurry up the resend, eh? */
2510 if (rx_stats_active)
2511 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2512 for (i = 0; i < len; i++) {
2514 p->retryTime = p->timeSent; /* resend it very soon */
2515 clock_Addmsec(&(p->retryTime),
2516 10 + (((afs_uint32) p->backoff) << 8));
2518 /* Some systems are nice and tell us right away that we cannot
2519 * reach this recipient by returning an error code.
2520 * So, when this happens let's "down" the host NOW so
2521 * we don't sit around waiting for this host to timeout later.
2525 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2526 #elif defined(AFS_LINUX20_ENV)
2527 code == -ENETUNREACH
2528 #elif defined(AFS_DARWIN_ENV)
2529 code == EHOSTUNREACH
2534 call->lastReceiveTime = 0;
2536 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2537 if (!istack && waslocked)
2545 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2546 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2547 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2548 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2551 if (rx_stats_active) {
2552 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2553 MUTEX_ENTER(&peer->peer_lock);
2554 hadd32(peer->bytesSent, p->length);
2555 MUTEX_EXIT(&peer->peer_lock);
2560 /* Send a "special" packet to the peer connection. If call is
2561 * specified, then the packet is directed to a specific call channel
2562 * associated with the connection, otherwise it is directed to the
2563 * connection only. Uses optionalPacket if it is supplied, rather than
2564 * allocating a new packet buffer. Nbytes is the length of the data
2565 * portion of the packet. If data is non-null, nbytes of data are
2566 * copied into the packet. Type is the type of the packet, as defined
2567 * in rx.h. Bug: there's a lot of duplication between this and other
2568 * routines. This needs to be cleaned up. */
2570 rxi_SendSpecial(struct rx_call *call,
2571 struct rx_connection *conn,
2572 struct rx_packet *optionalPacket, int type, char *data,
2573 int nbytes, int istack)
2575 /* Some of the following stuff should be common code for all
2576 * packet sends (it's repeated elsewhere) */
2577 struct rx_packet *p;
2579 int savelen = 0, saven = 0;
2580 int channel, callNumber;
2582 channel = call->channel;
2583 callNumber = *call->callNumber;
2584 /* BUSY packets refer to the next call on this connection */
2585 if (type == RX_PACKET_TYPE_BUSY) {
2594 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2596 osi_Panic("rxi_SendSpecial failure");
2603 p->header.serviceId = conn->serviceId;
2604 p->header.securityIndex = conn->securityIndex;
2605 p->header.cid = (conn->cid | channel);
2606 p->header.callNumber = callNumber;
2608 p->header.epoch = conn->epoch;
2609 p->header.type = type;
2610 p->header.flags = 0;
2611 if (conn->type == RX_CLIENT_CONNECTION)
2612 p->header.flags |= RX_CLIENT_INITIATED;
2614 rx_packetwrite(p, 0, nbytes, data);
2616 for (i = 1; i < p->niovecs; i++) {
2617 if (nbytes <= p->wirevec[i].iov_len) {
2618 savelen = p->wirevec[i].iov_len;
2620 p->wirevec[i].iov_len = nbytes;
2621 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2623 nbytes -= p->wirevec[i].iov_len;
2627 rxi_Send(call, p, istack);
2629 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2630 if (saven) { /* means we truncated the packet above. We probably don't */
2631 /* really need to do this, but it seems safer this way, given that */
2632 /* sneaky optionalPacket... */
2633 p->wirevec[i - 1].iov_len = savelen;
2636 if (!optionalPacket)
2638 return optionalPacket;
2642 /* Encode the packet's header (from the struct header in the packet to
2643 * the net byte order representation in the wire representation of the
2644 * packet, which is what is actually sent out on the wire) */
2646 rxi_EncodePacketHeader(struct rx_packet *p)
2648 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2650 memset(buf, 0, RX_HEADER_SIZE);
2651 *buf++ = htonl(p->header.epoch);
2652 *buf++ = htonl(p->header.cid);
2653 *buf++ = htonl(p->header.callNumber);
2654 *buf++ = htonl(p->header.seq);
2655 *buf++ = htonl(p->header.serial);
2656 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2657 | (((afs_uint32) p->header.flags) << 16)
2658 | (p->header.userStatus << 8) | p->header.securityIndex);
2659 /* Note: top 16 bits of this next word were reserved */
2660 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2663 /* Decode the packet's header (from net byte order to a struct header) */
2665 rxi_DecodePacketHeader(struct rx_packet *p)
2667 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2670 p->header.epoch = ntohl(*buf);
2672 p->header.cid = ntohl(*buf);
2674 p->header.callNumber = ntohl(*buf);
2676 p->header.seq = ntohl(*buf);
2678 p->header.serial = ntohl(*buf);
2684 /* C will truncate byte fields to bytes for me */
2685 p->header.type = temp >> 24;
2686 p->header.flags = temp >> 16;
2687 p->header.userStatus = temp >> 8;
2688 p->header.securityIndex = temp >> 0;
2693 p->header.serviceId = (temp & 0xffff);
2694 p->header.spare = temp >> 16;
2695 /* Note: top 16 bits of this last word are the security checksum */
2699 rxi_PrepareSendPacket(struct rx_call *call,
2700 struct rx_packet *p, int last)
2702 struct rx_connection *conn = call->conn;
2704 afs_int32 len; /* len must be a signed type; it can go negative */
2706 p->flags &= ~RX_PKTFLAG_ACKED;
2707 p->header.cid = (conn->cid | call->channel);
2708 p->header.serviceId = conn->serviceId;
2709 p->header.securityIndex = conn->securityIndex;
2711 /* No data packets on call 0. Where do these come from? */
2712 if (*call->callNumber == 0)
2713 *call->callNumber = 1;
2715 p->header.callNumber = *call->callNumber;
2716 p->header.seq = call->tnext++;
2717 p->header.epoch = conn->epoch;
2718 p->header.type = RX_PACKET_TYPE_DATA;
2719 p->header.flags = 0;
2720 p->header.spare = 0;
2721 if (conn->type == RX_CLIENT_CONNECTION)
2722 p->header.flags |= RX_CLIENT_INITIATED;
2725 p->header.flags |= RX_LAST_PACKET;
2727 clock_Zero(&p->retryTime); /* Never yet transmitted */
2728 clock_Zero(&p->firstSent); /* Never yet transmitted */
2729 p->header.serial = 0; /* Another way of saying never transmitted... */
2732 /* Now that we're sure this is the last data on the call, make sure
2733 * that the "length" and the sum of the iov_lens matches. */
2734 len = p->length + call->conn->securityHeaderSize;
2736 for (i = 1; i < p->niovecs && len > 0; i++) {
2737 len -= p->wirevec[i].iov_len;
2740 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2741 } else if (i < p->niovecs) {
2742 /* Free any extra elements in the wirevec */
2743 #if defined(RX_ENABLE_TSFPQ)
2744 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2745 #else /* !RX_ENABLE_TSFPQ */
2746 MUTEX_ENTER(&rx_freePktQ_lock);
2747 rxi_FreeDataBufsNoLock(p, i);
2748 MUTEX_EXIT(&rx_freePktQ_lock);
2749 #endif /* !RX_ENABLE_TSFPQ */
2754 p->wirevec[i - 1].iov_len += len;
2755 RXS_PreparePacket(conn->securityObject, call, p);
2758 /* Given an interface MTU size, calculate an adjusted MTU size that
2759 * will make efficient use of the RX buffers when the peer is sending
2760 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2762 rxi_AdjustIfMTU(int mtu)
2767 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2769 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2770 if (mtu <= adjMTU) {
2777 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2778 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2781 /* Given an interface MTU size, and the peer's advertised max receive
2782 * size, calculate an adjisted maxMTU size that makes efficient use
2783 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2785 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2787 int maxMTU = mtu * rxi_nSendFrags;
2788 maxMTU = MIN(maxMTU, peerMaxMTU);
2789 return rxi_AdjustIfMTU(maxMTU);
2792 /* Given a packet size, figure out how many datagram packet will fit.
2793 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2794 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2795 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2797 rxi_AdjustDgramPackets(int frags, int mtu)
2800 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2803 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2804 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2805 /* subtract the size of the first and last packets */
2806 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2810 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2815 * This function can be used by the Windows Cache Manager
2816 * to dump the list of all rx packets so that we can determine
2817 * where the packet leakage is.
2819 int rx_DumpPackets(FILE *outputFile, char *cookie)
2821 #ifdef RXDEBUG_PACKET
2822 struct rx_packet *p;
2826 #define RXDPRINTF sprintf
2827 #define RXDPRINTOUT output
2829 #define RXDPRINTF fprintf
2830 #define RXDPRINTOUT outputFile
2834 MUTEX_ENTER(&rx_freePktQ_lock);
2835 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2837 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2840 for (p = rx_mallocedP; p; p = p->allNextp) {
2841 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2842 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2843 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2844 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2845 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2846 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2848 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2852 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2854 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2857 MUTEX_EXIT(&rx_freePktQ_lock);
2859 #endif /* RXDEBUG_PACKET */