2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
19 #include "afs/sysincludes.h"
20 #include "afsincludes.h"
21 #include "rx/rx_kcommon.h"
22 #include "rx/rx_clock.h"
23 #include "rx/rx_queue.h"
24 #include "rx/rx_packet.h"
25 #else /* defined(UKERNEL) */
26 #ifdef RX_KERNEL_TRACE
27 #include "../rx/rx_kcommon.h"
30 #ifndef AFS_LINUX20_ENV
33 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
34 #include "afs/sysincludes.h"
36 #if defined(AFS_OBSD_ENV)
40 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
41 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
42 #include "sys/mount.h" /* it gets pulled in by something later anyway */
46 #include "netinet/in.h"
47 #include "afs/afs_osi.h"
48 #include "rx_kmutex.h"
49 #include "rx/rx_clock.h"
50 #include "rx/rx_queue.h"
52 #include <sys/sysmacros.h>
54 #include "rx/rx_packet.h"
55 #endif /* defined(UKERNEL) */
56 #include "rx/rx_globals.h"
58 #include "sys/types.h"
61 #if defined(AFS_NT40_ENV)
64 #define EWOULDBLOCK WSAEWOULDBLOCK
67 #include "rx_xmit_nt.h"
70 #include <sys/socket.h>
71 #include <netinet/in.h>
77 #include <sys/sysmacros.h>
79 #include "rx_packet.h"
80 #include "rx_globals.h"
90 /* rxdb_fileID is used to identify the lock location, along with line#. */
91 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
92 #endif /* RX_LOCKS_DB */
93 static struct rx_packet *rx_mallocedP = 0;
95 static afs_uint32 rx_packet_id = 0;
98 extern char cml_version_number[];
100 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
102 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
103 afs_uint32 ahost, short aport,
106 #ifdef RX_ENABLE_TSFPQ
108 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
110 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
112 struct rx_queue * q);
115 /* some rules about packets:
116 * 1. When a packet is allocated, the final iov_buf contains room for
117 * a security trailer, but iov_len masks that fact. If the security
118 * package wants to add the trailer, it may do so, and then extend
119 * iov_len appropriately. For this reason, packet's niovecs and
120 * iov_len fields should be accurate before calling PreparePacket.
124 * all packet buffers (iov_base) are integral multiples of
126 * offset is an integral multiple of the word size.
129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
133 for (l = 0, i = 1; i < packet->niovecs; i++) {
134 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the word size.
147 * offset is an integral multiple of the word size.
150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
154 for (l = 0, i = 1; i < packet->niovecs; i++) {
155 if (l + packet->wirevec[i].iov_len > offset) {
156 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
157 (offset - l))) = data;
160 l += packet->wirevec[i].iov_len;
167 * all packet buffers (iov_base) are integral multiples of the
169 * offset is an integral multiple of the word size.
171 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
177 unsigned int i, j, l, r;
178 for (l = 0, i = 1; i < packet->niovecs; i++) {
179 if (l + packet->wirevec[i].iov_len > offset) {
182 l += packet->wirevec[i].iov_len;
185 /* i is the iovec which contains the first little bit of data in which we
186 * are interested. l is the total length of everything prior to this iovec.
187 * j is the number of bytes we can safely copy out of this iovec.
188 * offset only applies to the first iovec.
191 while ((r > 0) && (i < packet->niovecs)) {
192 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
193 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
196 l += packet->wirevec[i].iov_len;
201 return (r ? (resid - r) : resid);
206 * all packet buffers (iov_base) are integral multiples of the
208 * offset is an integral multiple of the word size.
211 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
213 unsigned int i, j, l, o, r;
216 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
217 if (l + packet->wirevec[i].iov_len > o) {
220 l += packet->wirevec[i].iov_len;
223 /* i is the iovec which contains the first little bit of data in which we
224 * are interested. l is the total length of everything prior to this iovec.
225 * j is the number of bytes we can safely copy out of this iovec.
226 * offset only applies to the first iovec.
229 while ((r > 0) && (i <= RX_MAXWVECS)) {
230 if (i >= packet->niovecs)
231 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
234 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
235 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
239 l += packet->wirevec[i].iov_len;
244 return (r ? (resid - r) : resid);
248 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
250 struct rx_packet *p, *np;
252 num_pkts = AllocPacketBufs(class, num_pkts, q);
254 for (queue_Scan(q, p, np, rx_packet)) {
255 RX_PACKET_IOV_FULLINIT(p);
261 #ifdef RX_ENABLE_TSFPQ
263 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
265 struct rx_ts_info_t * rx_ts_info;
269 RX_TS_INFO_GET(rx_ts_info);
271 transfer = num_pkts - rx_ts_info->_FPQ.len;
274 MUTEX_ENTER(&rx_freePktQ_lock);
275 transfer = MAX(transfer, rx_TSFPQGlobSize);
276 if (transfer > rx_nFreePackets) {
277 /* alloc enough for us, plus a few globs for other threads */
278 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
281 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
283 MUTEX_EXIT(&rx_freePktQ_lock);
287 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
291 #else /* RX_ENABLE_TSFPQ */
293 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
304 MUTEX_ENTER(&rx_freePktQ_lock);
307 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
308 num_pkts--, overq++);
311 rxi_NeedMorePackets = TRUE;
312 if (rx_stats_active) {
314 case RX_PACKET_CLASS_RECEIVE:
315 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
317 case RX_PACKET_CLASS_SEND:
318 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
320 case RX_PACKET_CLASS_SPECIAL:
321 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_RECV_CBUF:
324 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SEND_CBUF:
327 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
333 if (rx_nFreePackets < num_pkts)
334 num_pkts = rx_nFreePackets;
337 rxi_NeedMorePackets = TRUE;
341 if (rx_nFreePackets < num_pkts) {
342 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
346 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
348 i++, c=queue_Next(c, rx_packet)) {
352 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
354 rx_nFreePackets -= num_pkts;
359 MUTEX_EXIT(&rx_freePktQ_lock);
364 #endif /* RX_ENABLE_TSFPQ */
367 * Free a packet currently used as a continuation buffer
369 #ifdef RX_ENABLE_TSFPQ
370 /* num_pkts=0 means queue length is unknown */
372 rxi_FreePackets(int num_pkts, struct rx_queue * q)
374 struct rx_ts_info_t * rx_ts_info;
375 struct rx_packet *c, *nc;
378 osi_Assert(num_pkts >= 0);
379 RX_TS_INFO_GET(rx_ts_info);
382 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
383 rxi_FreeDataBufsTSFPQ(c, 2, 0);
386 for (queue_Scan(q, c, nc, rx_packet)) {
387 rxi_FreeDataBufsTSFPQ(c, 2, 0);
392 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
395 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
397 MUTEX_ENTER(&rx_freePktQ_lock);
399 RX_TS_FPQ_LTOG(rx_ts_info);
401 /* Wakeup anyone waiting for packets */
404 MUTEX_EXIT(&rx_freePktQ_lock);
410 #else /* RX_ENABLE_TSFPQ */
411 /* num_pkts=0 means queue length is unknown */
413 rxi_FreePackets(int num_pkts, struct rx_queue *q)
416 struct rx_packet *p, *np;
420 osi_Assert(num_pkts >= 0);
424 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
425 if (p->niovecs > 2) {
426 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
433 for (queue_Scan(q, p, np, rx_packet)) {
434 if (p->niovecs > 2) {
435 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
442 queue_SpliceAppend(q, &cbs);
448 MUTEX_ENTER(&rx_freePktQ_lock);
450 queue_SpliceAppend(&rx_freePacketQueue, q);
451 rx_nFreePackets += qlen;
453 /* Wakeup anyone waiting for packets */
456 MUTEX_EXIT(&rx_freePktQ_lock);
461 #endif /* RX_ENABLE_TSFPQ */
463 /* this one is kind of awful.
464 * In rxkad, the packet has been all shortened, and everything, ready for
465 * sending. All of a sudden, we discover we need some of that space back.
466 * This isn't terribly general, because it knows that the packets are only
467 * rounded up to the EBS (userdata + security header).
470 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
474 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
475 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
476 p->wirevec[i].iov_len += nb;
480 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
481 p->wirevec[i].iov_len += nb;
489 /* get sufficient space to store nb bytes of data (or more), and hook
490 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
491 * returns the number of bytes >0 which it failed to come up with.
492 * Don't need to worry about locking on packet, since only
493 * one thread can manipulate one at a time. Locking on continution
494 * packets is handled by AllocPacketBufs */
495 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
497 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
501 struct rx_packet *cb, *ncb;
503 /* compute the number of cbuf's we need */
504 nv = nb / RX_CBUFFERSIZE;
505 if ((nv * RX_CBUFFERSIZE) < nb)
507 if ((nv + p->niovecs) > RX_MAXWVECS)
508 nv = RX_MAXWVECS - p->niovecs;
512 /* allocate buffers */
514 nv = AllocPacketBufs(class, nv, &q);
516 /* setup packet iovs */
517 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
519 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
520 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
523 nb -= (nv * RX_CBUFFERSIZE);
524 p->length += (nv * RX_CBUFFERSIZE);
530 /* Add more packet buffers */
531 #ifdef RX_ENABLE_TSFPQ
533 rxi_MorePackets(int apackets)
535 struct rx_packet *p, *e;
536 struct rx_ts_info_t * rx_ts_info;
540 getme = apackets * sizeof(struct rx_packet);
541 p = (struct rx_packet *)osi_Alloc(getme);
544 PIN(p, getme); /* XXXXX */
546 RX_TS_INFO_GET(rx_ts_info);
548 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
549 /* TSFPQ patch also needs to keep track of total packets */
551 MUTEX_ENTER(&rx_packets_mutex);
552 rx_nPackets += apackets;
553 RX_TS_FPQ_COMPUTE_LIMITS;
554 MUTEX_EXIT(&rx_packets_mutex);
556 for (e = p + apackets; p < e; p++) {
557 RX_PACKET_IOV_INIT(p);
560 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
563 MUTEX_ENTER(&rx_freePktQ_lock);
564 #ifdef RXDEBUG_PACKET
565 p->packetId = rx_packet_id++;
566 p->allNextp = rx_mallocedP;
567 #endif /* RXDEBUG_PACKET */
569 MUTEX_EXIT(&rx_freePktQ_lock);
572 rx_ts_info->_FPQ.delta += apackets;
574 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
576 MUTEX_ENTER(&rx_freePktQ_lock);
578 RX_TS_FPQ_LTOG(rx_ts_info);
579 rxi_NeedMorePackets = FALSE;
582 MUTEX_EXIT(&rx_freePktQ_lock);
586 #else /* RX_ENABLE_TSFPQ */
588 rxi_MorePackets(int apackets)
590 struct rx_packet *p, *e;
594 getme = apackets * sizeof(struct rx_packet);
595 p = (struct rx_packet *)osi_Alloc(getme);
598 PIN(p, getme); /* XXXXX */
601 MUTEX_ENTER(&rx_freePktQ_lock);
603 for (e = p + apackets; p < e; p++) {
604 RX_PACKET_IOV_INIT(p);
605 #ifdef RX_TRACK_PACKETS
606 p->flags |= RX_PKTFLAG_FREE;
610 queue_Append(&rx_freePacketQueue, p);
611 #ifdef RXDEBUG_PACKET
612 p->packetId = rx_packet_id++;
613 p->allNextp = rx_mallocedP;
614 #endif /* RXDEBUG_PACKET */
618 rx_nPackets += apackets;
619 rx_nFreePackets += apackets;
620 rxi_NeedMorePackets = FALSE;
623 MUTEX_EXIT(&rx_freePktQ_lock);
626 #endif /* RX_ENABLE_TSFPQ */
628 #ifdef RX_ENABLE_TSFPQ
630 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
632 struct rx_packet *p, *e;
633 struct rx_ts_info_t * rx_ts_info;
637 getme = apackets * sizeof(struct rx_packet);
638 p = (struct rx_packet *)osi_Alloc(getme);
640 PIN(p, getme); /* XXXXX */
642 RX_TS_INFO_GET(rx_ts_info);
644 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
645 /* TSFPQ patch also needs to keep track of total packets */
646 MUTEX_ENTER(&rx_packets_mutex);
647 rx_nPackets += apackets;
648 RX_TS_FPQ_COMPUTE_LIMITS;
649 MUTEX_EXIT(&rx_packets_mutex);
651 for (e = p + apackets; p < e; p++) {
652 RX_PACKET_IOV_INIT(p);
654 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
657 MUTEX_ENTER(&rx_freePktQ_lock);
658 #ifdef RXDEBUG_PACKET
659 p->packetId = rx_packet_id++;
660 p->allNextp = rx_mallocedP;
661 #endif /* RXDEBUG_PACKET */
663 MUTEX_EXIT(&rx_freePktQ_lock);
666 rx_ts_info->_FPQ.delta += apackets;
669 (num_keep_local < apackets)) {
671 MUTEX_ENTER(&rx_freePktQ_lock);
673 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
674 rxi_NeedMorePackets = FALSE;
677 MUTEX_EXIT(&rx_freePktQ_lock);
681 #endif /* RX_ENABLE_TSFPQ */
684 /* Add more packet buffers */
686 rxi_MorePacketsNoLock(int apackets)
688 #ifdef RX_ENABLE_TSFPQ
689 struct rx_ts_info_t * rx_ts_info;
690 #endif /* RX_ENABLE_TSFPQ */
691 struct rx_packet *p, *e;
694 /* allocate enough packets that 1/4 of the packets will be able
695 * to hold maximal amounts of data */
696 apackets += (apackets / 4)
697 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
699 getme = apackets * sizeof(struct rx_packet);
700 p = (struct rx_packet *)osi_Alloc(getme);
702 apackets -= apackets / 4;
703 osi_Assert(apackets > 0);
708 #ifdef RX_ENABLE_TSFPQ
709 RX_TS_INFO_GET(rx_ts_info);
710 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
711 #endif /* RX_ENABLE_TSFPQ */
713 for (e = p + apackets; p < e; p++) {
714 RX_PACKET_IOV_INIT(p);
715 #ifdef RX_TRACK_PACKETS
716 p->flags |= RX_PKTFLAG_FREE;
720 queue_Append(&rx_freePacketQueue, p);
721 #ifdef RXDEBUG_PACKET
722 p->packetId = rx_packet_id++;
723 p->allNextp = rx_mallocedP;
724 #endif /* RXDEBUG_PACKET */
728 rx_nFreePackets += apackets;
729 MUTEX_ENTER(&rx_packets_mutex);
730 rx_nPackets += apackets;
731 #ifdef RX_ENABLE_TSFPQ
732 RX_TS_FPQ_COMPUTE_LIMITS;
733 #endif /* RX_ENABLE_TSFPQ */
734 MUTEX_EXIT(&rx_packets_mutex);
735 rxi_NeedMorePackets = FALSE;
741 rxi_FreeAllPackets(void)
743 /* must be called at proper interrupt level, etcetera */
744 /* MTUXXX need to free all Packets */
745 osi_Free(rx_mallocedP,
746 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
747 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
750 #ifdef RX_ENABLE_TSFPQ
752 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
754 struct rx_ts_info_t * rx_ts_info;
758 RX_TS_INFO_GET(rx_ts_info);
760 if (num_keep_local != rx_ts_info->_FPQ.len) {
762 MUTEX_ENTER(&rx_freePktQ_lock);
763 if (num_keep_local < rx_ts_info->_FPQ.len) {
764 xfer = rx_ts_info->_FPQ.len - num_keep_local;
765 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
768 xfer = num_keep_local - rx_ts_info->_FPQ.len;
769 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
770 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
771 if (rx_nFreePackets < xfer) {
772 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
774 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
776 MUTEX_EXIT(&rx_freePktQ_lock);
782 rxi_FlushLocalPacketsTSFPQ(void)
784 rxi_AdjustLocalPacketsTSFPQ(0, 0);
786 #endif /* RX_ENABLE_TSFPQ */
788 /* Allocate more packets iff we need more continuation buffers */
789 /* In kernel, can't page in memory with interrupts disabled, so we
790 * don't use the event mechanism. */
792 rx_CheckPackets(void)
794 if (rxi_NeedMorePackets) {
795 rxi_MorePackets(rx_maxSendWindow);
799 /* In the packet freeing routine below, the assumption is that
800 we want all of the packets to be used equally frequently, so that we
801 don't get packet buffers paging out. It would be just as valid to
802 assume that we DO want them to page out if not many are being used.
803 In any event, we assume the former, and append the packets to the end
805 /* This explanation is bogus. The free list doesn't remain in any kind of
806 useful order for afs_int32: the packets in use get pretty much randomly scattered
807 across all the pages. In order to permit unused {packets,bufs} to page out, they
808 must be stored so that packets which are adjacent in memory are adjacent in the
809 free list. An array springs rapidly to mind.
812 /* Actually free the packet p. */
813 #ifdef RX_ENABLE_TSFPQ
815 rxi_FreePacketNoLock(struct rx_packet *p)
817 struct rx_ts_info_t * rx_ts_info;
818 dpf(("Free %"AFS_PTR_FMT"\n", p));
820 RX_TS_INFO_GET(rx_ts_info);
821 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
822 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
823 RX_TS_FPQ_LTOG(rx_ts_info);
826 #else /* RX_ENABLE_TSFPQ */
828 rxi_FreePacketNoLock(struct rx_packet *p)
830 dpf(("Free %"AFS_PTR_FMT"\n", p));
834 queue_Append(&rx_freePacketQueue, p);
836 #endif /* RX_ENABLE_TSFPQ */
838 #ifdef RX_ENABLE_TSFPQ
840 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
842 struct rx_ts_info_t * rx_ts_info;
843 dpf(("Free %"AFS_PTR_FMT"\n", p));
845 RX_TS_INFO_GET(rx_ts_info);
846 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
848 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
850 MUTEX_ENTER(&rx_freePktQ_lock);
852 RX_TS_FPQ_LTOG(rx_ts_info);
854 /* Wakeup anyone waiting for packets */
857 MUTEX_EXIT(&rx_freePktQ_lock);
861 #endif /* RX_ENABLE_TSFPQ */
864 * free continuation buffers off a packet into a queue
866 * [IN] p -- packet from which continuation buffers will be freed
867 * [IN] first -- iovec offset of first continuation buffer to free
868 * [IN] q -- queue into which continuation buffers will be chained
871 * number of continuation buffers freed
873 #ifndef RX_ENABLE_TSFPQ
875 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
878 struct rx_packet * cb;
881 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
882 iov = &p->wirevec[first];
884 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
885 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
886 RX_FPQ_MARK_FREE(cb);
897 * free packet continuation buffers into the global free packet pool
899 * [IN] p -- packet from which to free continuation buffers
900 * [IN] first -- iovec offset of first continuation buffer to free
906 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
910 for (first = MAX(2, first); first < p->niovecs; first++) {
911 iov = &p->wirevec[first];
913 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
914 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
922 #ifdef RX_ENABLE_TSFPQ
924 * free packet continuation buffers into the thread-local free pool
926 * [IN] p -- packet from which continuation buffers will be freed
927 * [IN] first -- iovec offset of first continuation buffer to free
928 * any value less than 2, the min number of iovecs,
929 * is treated as if it is 2.
930 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
931 * global free pool before returning
937 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
940 struct rx_ts_info_t * rx_ts_info;
942 RX_TS_INFO_GET(rx_ts_info);
944 for (first = MAX(2, first); first < p->niovecs; first++) {
945 iov = &p->wirevec[first];
947 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
948 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
953 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
955 MUTEX_ENTER(&rx_freePktQ_lock);
957 RX_TS_FPQ_LTOG(rx_ts_info);
959 /* Wakeup anyone waiting for packets */
962 MUTEX_EXIT(&rx_freePktQ_lock);
967 #endif /* RX_ENABLE_TSFPQ */
969 int rxi_nBadIovecs = 0;
971 /* rxi_RestoreDataBufs
973 * Restore the correct sizes to the iovecs. Called when reusing a packet
974 * for reading off the wire.
977 rxi_RestoreDataBufs(struct rx_packet *p)
980 struct iovec *iov = &p->wirevec[2];
982 RX_PACKET_IOV_INIT(p);
984 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
985 if (!iov->iov_base) {
990 iov->iov_len = RX_CBUFFERSIZE;
994 #ifdef RX_ENABLE_TSFPQ
996 rxi_TrimDataBufs(struct rx_packet *p, int first)
999 struct iovec *iov, *end;
1000 struct rx_ts_info_t * rx_ts_info;
1004 osi_Panic("TrimDataBufs 1: first must be 1");
1006 /* Skip over continuation buffers containing message data */
1007 iov = &p->wirevec[2];
1008 end = iov + (p->niovecs - 2);
1009 length = p->length - p->wirevec[1].iov_len;
1010 for (; iov < end && length > 0; iov++) {
1012 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1013 length -= iov->iov_len;
1016 /* iov now points to the first empty data buffer. */
1020 RX_TS_INFO_GET(rx_ts_info);
1021 for (; iov < end; iov++) {
1023 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1024 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1027 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1029 MUTEX_ENTER(&rx_freePktQ_lock);
1031 RX_TS_FPQ_LTOG(rx_ts_info);
1032 rxi_PacketsUnWait();
1034 MUTEX_EXIT(&rx_freePktQ_lock);
1040 #else /* RX_ENABLE_TSFPQ */
1042 rxi_TrimDataBufs(struct rx_packet *p, int first)
1045 struct iovec *iov, *end;
1049 osi_Panic("TrimDataBufs 1: first must be 1");
1051 /* Skip over continuation buffers containing message data */
1052 iov = &p->wirevec[2];
1053 end = iov + (p->niovecs - 2);
1054 length = p->length - p->wirevec[1].iov_len;
1055 for (; iov < end && length > 0; iov++) {
1057 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1058 length -= iov->iov_len;
1061 /* iov now points to the first empty data buffer. */
1066 MUTEX_ENTER(&rx_freePktQ_lock);
1068 for (; iov < end; iov++) {
1070 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1071 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1074 rxi_PacketsUnWait();
1076 MUTEX_EXIT(&rx_freePktQ_lock);
1081 #endif /* RX_ENABLE_TSFPQ */
1083 /* Free the packet p. P is assumed not to be on any queue, i.e.
1084 * remove it yourself first if you call this routine. */
1085 #ifdef RX_ENABLE_TSFPQ
1087 rxi_FreePacket(struct rx_packet *p)
1089 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1090 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1092 #else /* RX_ENABLE_TSFPQ */
1094 rxi_FreePacket(struct rx_packet *p)
1099 MUTEX_ENTER(&rx_freePktQ_lock);
1101 rxi_FreeDataBufsNoLock(p, 2);
1102 rxi_FreePacketNoLock(p);
1103 /* Wakeup anyone waiting for packets */
1104 rxi_PacketsUnWait();
1106 MUTEX_EXIT(&rx_freePktQ_lock);
1109 #endif /* RX_ENABLE_TSFPQ */
1111 /* rxi_AllocPacket sets up p->length so it reflects the number of
1112 * bytes in the packet at this point, **not including** the header.
1113 * The header is absolutely necessary, besides, this is the way the
1114 * length field is usually used */
1115 #ifdef RX_ENABLE_TSFPQ
1117 rxi_AllocPacketNoLock(int class)
1119 struct rx_packet *p;
1120 struct rx_ts_info_t * rx_ts_info;
1122 RX_TS_INFO_GET(rx_ts_info);
1125 if (rxi_OverQuota(class)) {
1126 rxi_NeedMorePackets = TRUE;
1127 if (rx_stats_active) {
1129 case RX_PACKET_CLASS_RECEIVE:
1130 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1132 case RX_PACKET_CLASS_SEND:
1133 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1135 case RX_PACKET_CLASS_SPECIAL:
1136 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1138 case RX_PACKET_CLASS_RECV_CBUF:
1139 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1141 case RX_PACKET_CLASS_SEND_CBUF:
1142 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1146 return (struct rx_packet *)0;
1150 if (rx_stats_active)
1151 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1152 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1155 if (queue_IsEmpty(&rx_freePacketQueue))
1156 osi_Panic("rxi_AllocPacket error");
1158 if (queue_IsEmpty(&rx_freePacketQueue))
1159 rxi_MorePacketsNoLock(rx_maxSendWindow);
1163 RX_TS_FPQ_GTOL(rx_ts_info);
1166 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1168 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1171 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1172 * order to truncate outbound packets. In the near future, may need
1173 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1175 RX_PACKET_IOV_FULLINIT(p);
1178 #else /* RX_ENABLE_TSFPQ */
1180 rxi_AllocPacketNoLock(int class)
1182 struct rx_packet *p;
1185 if (rxi_OverQuota(class)) {
1186 rxi_NeedMorePackets = TRUE;
1187 if (rx_stats_active) {
1189 case RX_PACKET_CLASS_RECEIVE:
1190 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1192 case RX_PACKET_CLASS_SEND:
1193 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1195 case RX_PACKET_CLASS_SPECIAL:
1196 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1198 case RX_PACKET_CLASS_RECV_CBUF:
1199 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1201 case RX_PACKET_CLASS_SEND_CBUF:
1202 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1206 return (struct rx_packet *)0;
1210 if (rx_stats_active)
1211 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1214 if (queue_IsEmpty(&rx_freePacketQueue))
1215 osi_Panic("rxi_AllocPacket error");
1217 if (queue_IsEmpty(&rx_freePacketQueue))
1218 rxi_MorePacketsNoLock(rx_maxSendWindow);
1222 p = queue_First(&rx_freePacketQueue, rx_packet);
1224 RX_FPQ_MARK_USED(p);
1226 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1229 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1230 * order to truncate outbound packets. In the near future, may need
1231 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1233 RX_PACKET_IOV_FULLINIT(p);
1236 #endif /* RX_ENABLE_TSFPQ */
1238 #ifdef RX_ENABLE_TSFPQ
1240 rxi_AllocPacketTSFPQ(int class, int pull_global)
1242 struct rx_packet *p;
1243 struct rx_ts_info_t * rx_ts_info;
1245 RX_TS_INFO_GET(rx_ts_info);
1247 if (rx_stats_active)
1248 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1249 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1250 MUTEX_ENTER(&rx_freePktQ_lock);
1252 if (queue_IsEmpty(&rx_freePacketQueue))
1253 rxi_MorePacketsNoLock(rx_maxSendWindow);
1255 RX_TS_FPQ_GTOL(rx_ts_info);
1257 MUTEX_EXIT(&rx_freePktQ_lock);
1258 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1262 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1264 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1266 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1267 * order to truncate outbound packets. In the near future, may need
1268 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1270 RX_PACKET_IOV_FULLINIT(p);
1273 #endif /* RX_ENABLE_TSFPQ */
1275 #ifdef RX_ENABLE_TSFPQ
1277 rxi_AllocPacket(int class)
1279 struct rx_packet *p;
1281 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1284 #else /* RX_ENABLE_TSFPQ */
1286 rxi_AllocPacket(int class)
1288 struct rx_packet *p;
1290 MUTEX_ENTER(&rx_freePktQ_lock);
1291 p = rxi_AllocPacketNoLock(class);
1292 MUTEX_EXIT(&rx_freePktQ_lock);
1295 #endif /* RX_ENABLE_TSFPQ */
1297 /* This guy comes up with as many buffers as it {takes,can get} given
1298 * the MTU for this call. It also sets the packet length before
1299 * returning. caution: this is often called at NETPRI
1300 * Called with call locked.
1303 rxi_AllocSendPacket(struct rx_call *call, int want)
1305 struct rx_packet *p = (struct rx_packet *)0;
1310 mud = call->MTU - RX_HEADER_SIZE;
1312 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1313 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1315 #ifdef RX_ENABLE_TSFPQ
1316 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1318 want = MIN(want, mud);
1320 if ((unsigned)want > p->length)
1321 (void)rxi_AllocDataBuf(p, (want - p->length),
1322 RX_PACKET_CLASS_SEND_CBUF);
1324 if (p->length > mud)
1327 if (delta >= p->length) {
1335 #endif /* RX_ENABLE_TSFPQ */
1337 while (!(call->error)) {
1338 MUTEX_ENTER(&rx_freePktQ_lock);
1339 /* if an error occurred, or we get the packet we want, we're done */
1340 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1341 MUTEX_EXIT(&rx_freePktQ_lock);
1344 want = MIN(want, mud);
1346 if ((unsigned)want > p->length)
1347 (void)rxi_AllocDataBuf(p, (want - p->length),
1348 RX_PACKET_CLASS_SEND_CBUF);
1350 if (p->length > mud)
1353 if (delta >= p->length) {
1362 /* no error occurred, and we didn't get a packet, so we sleep.
1363 * At this point, we assume that packets will be returned
1364 * sooner or later, as packets are acknowledged, and so we
1367 call->flags |= RX_CALL_WAIT_PACKETS;
1368 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1369 MUTEX_EXIT(&call->lock);
1370 rx_waitingForPackets = 1;
1372 #ifdef RX_ENABLE_LOCKS
1373 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1375 osi_rxSleep(&rx_waitingForPackets);
1377 MUTEX_EXIT(&rx_freePktQ_lock);
1378 MUTEX_ENTER(&call->lock);
1379 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1380 call->flags &= ~RX_CALL_WAIT_PACKETS;
1389 /* Windows does not use file descriptors. */
1390 #define CountFDs(amax) 0
1392 /* count the number of used FDs */
1401 for (i = 0; i < amax; i++) {
1402 code = fstat(i, &tstat);
1408 #endif /* AFS_NT40_ENV */
1411 #define CountFDs(amax) amax
1415 #if !defined(KERNEL) || defined(UKERNEL)
1417 /* This function reads a single packet from the interface into the
1418 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1419 * (host,port) of the sender are stored in the supplied variables, and
1420 * the data length of the packet is stored in the packet structure.
1421 * The header is decoded. */
1423 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1426 struct sockaddr_in from;
1427 unsigned int nbytes;
1429 afs_uint32 tlen, savelen;
1431 rx_computelen(p, tlen);
1432 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1434 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1435 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1436 * it once in order to avoid races. */
1439 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1447 /* Extend the last iovec for padding, it's just to make sure that the
1448 * read doesn't return more data than we expect, and is done to get around
1449 * our problems caused by the lack of a length field in the rx header.
1450 * Use the extra buffer that follows the localdata in each packet
1452 savelen = p->wirevec[p->niovecs - 1].iov_len;
1453 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1455 memset(&msg, 0, sizeof(msg));
1456 msg.msg_name = (char *)&from;
1457 msg.msg_namelen = sizeof(struct sockaddr_in);
1458 msg.msg_iov = p->wirevec;
1459 msg.msg_iovlen = p->niovecs;
1460 nbytes = rxi_Recvmsg(socket, &msg, 0);
1462 /* restore the vec to its correct state */
1463 p->wirevec[p->niovecs - 1].iov_len = savelen;
1465 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1466 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1467 if (nbytes < 0 && errno == EWOULDBLOCK) {
1468 if (rx_stats_active)
1469 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1470 } else if (nbytes <= 0) {
1471 if (rx_stats_active) {
1472 MUTEX_ENTER(&rx_stats_mutex);
1473 rx_stats.bogusPacketOnRead++;
1474 rx_stats.bogusHost = from.sin_addr.s_addr;
1475 MUTEX_EXIT(&rx_stats_mutex);
1477 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1478 ntohs(from.sin_port), nbytes));
1483 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1484 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1485 rxi_DecodePacketHeader(p);
1487 *host = from.sin_addr.s_addr;
1488 *port = from.sin_port;
1490 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1491 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1492 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1494 #ifdef RX_TRIMDATABUFS
1495 rxi_TrimDataBufs(p, 1);
1501 /* Extract packet header. */
1502 rxi_DecodePacketHeader(p);
1504 *host = from.sin_addr.s_addr;
1505 *port = from.sin_port;
1506 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1507 if (rx_stats_active) {
1508 struct rx_peer *peer;
1509 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1511 * Try to look up this peer structure. If it doesn't exist,
1512 * don't create a new one -
1513 * we don't keep count of the bytes sent/received if a peer
1514 * structure doesn't already exist.
1516 * The peer/connection cleanup code assumes that there is 1 peer
1517 * per connection. If we actually created a peer structure here
1518 * and this packet was an rxdebug packet, the peer structure would
1519 * never be cleaned up.
1521 peer = rxi_FindPeer(*host, *port, 0, 0);
1522 /* Since this may not be associated with a connection,
1523 * it may have no refCount, meaning we could race with
1526 if (peer && (peer->refCount > 0)) {
1527 MUTEX_ENTER(&peer->peer_lock);
1528 hadd32(peer->bytesReceived, p->length);
1529 MUTEX_EXIT(&peer->peer_lock);
1534 #ifdef RX_TRIMDATABUFS
1535 /* Free any empty packet buffers at the end of this packet */
1536 rxi_TrimDataBufs(p, 1);
1542 #endif /* !KERNEL || UKERNEL */
1544 /* This function splits off the first packet in a jumbo packet.
1545 * As of AFS 3.5, jumbograms contain more than one fixed size
1546 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1547 * last packet header. All packets (except the last) are padded to
1548 * fall on RX_CBUFFERSIZE boundaries.
1549 * HACK: We store the length of the first n-1 packets in the
1550 * last two pad bytes. */
1553 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1556 struct rx_packet *np;
1557 struct rx_jumboHeader *jp;
1563 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1564 * bytes in length. All but the first packet are preceded by
1565 * an abbreviated four byte header. The length of the last packet
1566 * is calculated from the size of the jumbogram. */
1567 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1569 if ((int)p->length < length) {
1570 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1573 niov = p->niovecs - 2;
1575 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1578 iov = &p->wirevec[2];
1579 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1581 /* Get a pointer to the abbreviated packet header */
1582 jp = (struct rx_jumboHeader *)
1583 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1585 /* Set up the iovecs for the next packet */
1586 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1587 np->wirevec[0].iov_len = sizeof(struct rx_header);
1588 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1589 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1590 np->niovecs = niov + 1;
1591 for (i = 2, iov++; i <= niov; i++, iov++) {
1592 np->wirevec[i] = *iov;
1594 np->length = p->length - length;
1595 p->length = RX_JUMBOBUFFERSIZE;
1598 /* Convert the jumbo packet header to host byte order */
1599 temp = ntohl(*(afs_uint32 *) jp);
1600 jp->flags = (u_char) (temp >> 24);
1601 jp->cksum = (u_short) (temp);
1603 /* Fill in the packet header */
1604 np->header = p->header;
1605 np->header.serial = p->header.serial + 1;
1606 np->header.seq = p->header.seq + 1;
1607 np->header.flags = jp->flags;
1608 np->header.spare = jp->cksum;
1614 /* Send a udp datagram */
1616 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1617 int length, int istack)
1622 memset(&msg, 0, sizeof(msg));
1624 msg.msg_iovlen = nvecs;
1625 msg.msg_name = addr;
1626 msg.msg_namelen = sizeof(struct sockaddr_in);
1628 ret = rxi_Sendmsg(socket, &msg, 0);
1632 #elif !defined(UKERNEL)
1634 * message receipt is done in rxk_input or rx_put.
1637 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1639 * Copy an mblock to the contiguous area pointed to by cp.
1640 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1641 * but it doesn't really.
1642 * Returns the number of bytes not transferred.
1643 * The message is NOT changed.
1646 cpytoc(mblk_t * mp, int off, int len, char *cp)
1650 for (; mp && len > 0; mp = mp->b_cont) {
1651 if (mp->b_datap->db_type != M_DATA) {
1654 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1655 memcpy(cp, (char *)mp->b_rptr, n);
1663 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1664 * but it doesn't really.
1665 * This sucks, anyway, do it like m_cpy.... below
1668 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1673 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1674 if (mp->b_datap->db_type != M_DATA) {
1677 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1683 t = iovs[i].iov_len;
1686 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1696 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1697 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1699 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1701 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1704 unsigned int l1, l2, i, t;
1706 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1707 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1710 if (m->m_len <= off) {
1720 p1 = mtod(m, caddr_t) + off;
1721 l1 = m->m_len - off;
1723 p2 = iovs[0].iov_base;
1724 l2 = iovs[0].iov_len;
1727 t = MIN(l1, MIN(l2, (unsigned int)len));
1738 p1 = mtod(m, caddr_t);
1744 p2 = iovs[i].iov_base;
1745 l2 = iovs[i].iov_len;
1753 #endif /* AFS_SUN5_ENV */
1755 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1757 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1758 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1764 struct rx_packet *phandle;
1765 int hdr_len, data_len;
1770 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1777 #endif /*KERNEL && !UKERNEL */
1780 /* send a response to a debug packet */
1783 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1784 afs_uint32 ahost, short aport, int istack)
1786 struct rx_debugIn tin;
1788 struct rx_serverQueueEntry *np, *nqe;
1791 * Only respond to client-initiated Rx debug packets,
1792 * and clear the client flag in the response.
1794 if (ap->header.flags & RX_CLIENT_INITIATED) {
1795 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1796 rxi_EncodePacketHeader(ap);
1801 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1802 /* all done with packet, now set length to the truth, so we can
1803 * reuse this packet */
1804 rx_computelen(ap, ap->length);
1806 tin.type = ntohl(tin.type);
1807 tin.index = ntohl(tin.index);
1809 case RX_DEBUGI_GETSTATS:{
1810 struct rx_debugStats tstat;
1812 /* get basic stats */
1813 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1814 tstat.version = RX_DEBUGI_VERSION;
1815 #ifndef RX_ENABLE_LOCKS
1816 tstat.waitingForPackets = rx_waitingForPackets;
1818 MUTEX_ENTER(&rx_serverPool_lock);
1819 tstat.nFreePackets = htonl(rx_nFreePackets);
1820 tstat.nPackets = htonl(rx_nPackets);
1821 tstat.callsExecuted = htonl(rxi_nCalls);
1822 tstat.packetReclaims = htonl(rx_packetReclaims);
1823 tstat.usedFDs = CountFDs(64);
1824 tstat.nWaiting = htonl(rx_nWaiting);
1825 tstat.nWaited = htonl(rx_nWaited);
1826 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1828 MUTEX_EXIT(&rx_serverPool_lock);
1829 tstat.idleThreads = htonl(tstat.idleThreads);
1830 tl = sizeof(struct rx_debugStats) - ap->length;
1832 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1835 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1837 ap->length = sizeof(struct rx_debugStats);
1838 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1839 rx_computelen(ap, ap->length);
1844 case RX_DEBUGI_GETALLCONN:
1845 case RX_DEBUGI_GETCONN:{
1847 struct rx_connection *tc;
1848 struct rx_call *tcall;
1849 struct rx_debugConn tconn;
1850 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1853 tl = sizeof(struct rx_debugConn) - ap->length;
1855 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1859 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1860 /* get N'th (maybe) "interesting" connection info */
1861 for (i = 0; i < rx_hashTableSize; i++) {
1862 #if !defined(KERNEL)
1863 /* the time complexity of the algorithm used here
1864 * exponentially increses with the number of connections.
1866 #ifdef AFS_PTHREAD_ENV
1872 MUTEX_ENTER(&rx_connHashTable_lock);
1873 /* We might be slightly out of step since we are not
1874 * locking each call, but this is only debugging output.
1876 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1877 if ((all || rxi_IsConnInteresting(tc))
1878 && tin.index-- <= 0) {
1879 tconn.host = tc->peer->host;
1880 tconn.port = tc->peer->port;
1881 tconn.cid = htonl(tc->cid);
1882 tconn.epoch = htonl(tc->epoch);
1883 tconn.serial = htonl(tc->serial);
1884 for (j = 0; j < RX_MAXCALLS; j++) {
1885 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1886 if ((tcall = tc->call[j])) {
1887 tconn.callState[j] = tcall->state;
1888 tconn.callMode[j] = tcall->mode;
1889 tconn.callFlags[j] = tcall->flags;
1890 if (queue_IsNotEmpty(&tcall->rq))
1891 tconn.callOther[j] |= RX_OTHER_IN;
1892 if (queue_IsNotEmpty(&tcall->tq))
1893 tconn.callOther[j] |= RX_OTHER_OUT;
1895 tconn.callState[j] = RX_STATE_NOTINIT;
1898 tconn.natMTU = htonl(tc->peer->natMTU);
1899 tconn.error = htonl(tc->error);
1900 tconn.flags = tc->flags;
1901 tconn.type = tc->type;
1902 tconn.securityIndex = tc->securityIndex;
1903 if (tc->securityObject) {
1904 RXS_GetStats(tc->securityObject, tc,
1906 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1907 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1910 DOHTONL(packetsReceived);
1911 DOHTONL(packetsSent);
1912 DOHTONL(bytesReceived);
1916 sizeof(tconn.secStats.spares) /
1921 sizeof(tconn.secStats.sparel) /
1922 sizeof(afs_int32); i++)
1926 MUTEX_EXIT(&rx_connHashTable_lock);
1927 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1930 ap->length = sizeof(struct rx_debugConn);
1931 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1937 MUTEX_EXIT(&rx_connHashTable_lock);
1939 /* if we make it here, there are no interesting packets */
1940 tconn.cid = htonl(0xffffffff); /* means end */
1941 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1944 ap->length = sizeof(struct rx_debugConn);
1945 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1951 * Pass back all the peer structures we have available
1954 case RX_DEBUGI_GETPEER:{
1957 struct rx_debugPeer tpeer;
1960 tl = sizeof(struct rx_debugPeer) - ap->length;
1962 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1966 memset(&tpeer, 0, sizeof(tpeer));
1967 for (i = 0; i < rx_hashTableSize; i++) {
1968 #if !defined(KERNEL)
1969 /* the time complexity of the algorithm used here
1970 * exponentially increses with the number of peers.
1972 * Yielding after processing each hash table entry
1973 * and dropping rx_peerHashTable_lock.
1974 * also increases the risk that we will miss a new
1975 * entry - but we are willing to live with this
1976 * limitation since this is meant for debugging only
1978 #ifdef AFS_PTHREAD_ENV
1984 MUTEX_ENTER(&rx_peerHashTable_lock);
1985 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1986 if (tin.index-- <= 0) {
1988 MUTEX_EXIT(&rx_peerHashTable_lock);
1990 MUTEX_ENTER(&tp->peer_lock);
1991 tpeer.host = tp->host;
1992 tpeer.port = tp->port;
1993 tpeer.ifMTU = htons(tp->ifMTU);
1994 tpeer.idleWhen = htonl(tp->idleWhen);
1995 tpeer.refCount = htons(tp->refCount);
1996 tpeer.burstSize = tp->burstSize;
1997 tpeer.burst = tp->burst;
1998 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1999 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
2000 tpeer.rtt = htonl(tp->rtt);
2001 tpeer.rtt_dev = htonl(tp->rtt_dev);
2002 tpeer.timeout.sec = htonl(tp->timeout.sec);
2003 tpeer.timeout.usec = htonl(tp->timeout.usec);
2004 tpeer.nSent = htonl(tp->nSent);
2005 tpeer.reSends = htonl(tp->reSends);
2006 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2007 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2008 tpeer.rateFlag = htonl(tp->rateFlag);
2009 tpeer.natMTU = htons(tp->natMTU);
2010 tpeer.maxMTU = htons(tp->maxMTU);
2011 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2012 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2013 tpeer.MTU = htons(tp->MTU);
2014 tpeer.cwind = htons(tp->cwind);
2015 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2016 tpeer.congestSeq = htons(tp->congestSeq);
2017 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2018 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2019 tpeer.bytesReceived.high =
2020 htonl(tp->bytesReceived.high);
2021 tpeer.bytesReceived.low =
2022 htonl(tp->bytesReceived.low);
2023 MUTEX_EXIT(&tp->peer_lock);
2025 MUTEX_ENTER(&rx_peerHashTable_lock);
2027 MUTEX_EXIT(&rx_peerHashTable_lock);
2029 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2032 ap->length = sizeof(struct rx_debugPeer);
2033 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2039 MUTEX_EXIT(&rx_peerHashTable_lock);
2041 /* if we make it here, there are no interesting packets */
2042 tpeer.host = htonl(0xffffffff); /* means end */
2043 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2046 ap->length = sizeof(struct rx_debugPeer);
2047 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2052 case RX_DEBUGI_RXSTATS:{
2056 tl = sizeof(rx_stats) - ap->length;
2058 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2062 /* Since its all int32s convert to network order with a loop. */
2063 if (rx_stats_active)
2064 MUTEX_ENTER(&rx_stats_mutex);
2065 s = (afs_int32 *) & rx_stats;
2066 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2067 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2070 ap->length = sizeof(rx_stats);
2071 if (rx_stats_active)
2072 MUTEX_EXIT(&rx_stats_mutex);
2073 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2079 /* error response packet */
2080 tin.type = htonl(RX_DEBUGI_BADTYPE);
2081 tin.index = tin.type;
2082 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2084 ap->length = sizeof(struct rx_debugIn);
2085 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2093 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2094 afs_uint32 ahost, short aport, int istack)
2099 * Only respond to client-initiated version requests, and
2100 * clear that flag in the response.
2102 if (ap->header.flags & RX_CLIENT_INITIATED) {
2105 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2106 rxi_EncodePacketHeader(ap);
2107 memset(buf, 0, sizeof(buf));
2108 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2109 rx_packetwrite(ap, 0, 65, buf);
2112 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2120 /* send a debug packet back to the sender */
2122 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2123 afs_uint32 ahost, short aport, afs_int32 istack)
2125 struct sockaddr_in taddr;
2126 unsigned int i, nbytes, savelen = 0;
2129 int waslocked = ISAFS_GLOCK();
2132 taddr.sin_family = AF_INET;
2133 taddr.sin_port = aport;
2134 taddr.sin_addr.s_addr = ahost;
2135 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2136 taddr.sin_len = sizeof(struct sockaddr_in);
2139 /* We need to trim the niovecs. */
2140 nbytes = apacket->length;
2141 for (i = 1; i < apacket->niovecs; i++) {
2142 if (nbytes <= apacket->wirevec[i].iov_len) {
2143 savelen = apacket->wirevec[i].iov_len;
2144 saven = apacket->niovecs;
2145 apacket->wirevec[i].iov_len = nbytes;
2146 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2148 nbytes -= apacket->wirevec[i].iov_len;
2151 #ifdef RX_KERNEL_TRACE
2152 if (ICL_SETACTIVE(afs_iclSetp)) {
2155 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2156 "before osi_NetSend()");
2164 /* debug packets are not reliably delivered, hence the cast below. */
2165 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2166 apacket->length + RX_HEADER_SIZE, istack);
2168 #ifdef RX_KERNEL_TRACE
2169 if (ICL_SETACTIVE(afs_iclSetp)) {
2171 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2172 "after osi_NetSend()");
2181 if (saven) { /* means we truncated the packet above. */
2182 apacket->wirevec[i - 1].iov_len = savelen;
2183 apacket->niovecs = saven;
2188 /* Send the packet to appropriate destination for the specified
2189 * call. The header is first encoded and placed in the packet.
2192 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2193 struct rx_packet *p, int istack)
2199 struct sockaddr_in addr;
2200 struct rx_peer *peer = conn->peer;
2203 char deliveryType = 'S';
2205 /* The address we're sending the packet to */
2206 memset(&addr, 0, sizeof(addr));
2207 addr.sin_family = AF_INET;
2208 addr.sin_port = peer->port;
2209 addr.sin_addr.s_addr = peer->host;
2211 /* This stuff should be revamped, I think, so that most, if not
2212 * all, of the header stuff is always added here. We could
2213 * probably do away with the encode/decode routines. XXXXX */
2215 /* Stamp each packet with a unique serial number. The serial
2216 * number is maintained on a connection basis because some types
2217 * of security may be based on the serial number of the packet,
2218 * and security is handled on a per authenticated-connection
2220 /* Pre-increment, to guarantee no zero serial number; a zero
2221 * serial number means the packet was never sent. */
2222 MUTEX_ENTER(&conn->conn_data_lock);
2223 p->header.serial = ++conn->serial;
2224 if (p->length > conn->peer->maxPacketSize) {
2225 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2226 (p->header.flags & RX_REQUEST_ACK)) {
2227 conn->lastPingSize = p->length;
2228 conn->lastPingSizeSer = p->header.serial;
2229 } else if (p->header.seq != 0) {
2230 conn->lastPacketSize = p->length;
2231 conn->lastPacketSizeSeq = p->header.seq;
2234 MUTEX_EXIT(&conn->conn_data_lock);
2235 /* This is so we can adjust retransmit time-outs better in the face of
2236 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2238 if (p->firstSerial == 0) {
2239 p->firstSerial = p->header.serial;
2242 /* If an output tracer function is defined, call it with the packet and
2243 * network address. Note this function may modify its arguments. */
2244 if (rx_almostSent) {
2245 int drop = (*rx_almostSent) (p, &addr);
2246 /* drop packet if return value is non-zero? */
2248 deliveryType = 'D'; /* Drop the packet */
2252 /* Get network byte order header */
2253 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2254 * touch ALL the fields */
2256 /* Send the packet out on the same socket that related packets are being
2260 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2263 /* Possibly drop this packet, for testing purposes */
2264 if ((deliveryType == 'D')
2265 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2266 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2267 deliveryType = 'D'; /* Drop the packet */
2269 deliveryType = 'S'; /* Send the packet */
2270 #endif /* RXDEBUG */
2272 /* Loop until the packet is sent. We'd prefer just to use a
2273 * blocking socket, but unfortunately the interface doesn't
2274 * allow us to have the socket block in send mode, and not
2275 * block in receive mode */
2277 waslocked = ISAFS_GLOCK();
2278 #ifdef RX_KERNEL_TRACE
2279 if (ICL_SETACTIVE(afs_iclSetp)) {
2282 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2283 "before osi_NetSend()");
2292 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2293 p->length + RX_HEADER_SIZE, istack)) != 0) {
2294 /* send failed, so let's hurry up the resend, eh? */
2295 if (rx_stats_active)
2296 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2297 p->retryTime = p->timeSent; /* resend it very soon */
2298 clock_Addmsec(&(p->retryTime),
2299 10 + (((afs_uint32) p->backoff) << 8));
2300 /* Some systems are nice and tell us right away that we cannot
2301 * reach this recipient by returning an error code.
2302 * So, when this happens let's "down" the host NOW so
2303 * we don't sit around waiting for this host to timeout later.
2307 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2308 #elif defined(AFS_LINUX20_ENV)
2309 code == -ENETUNREACH
2310 #elif defined(AFS_DARWIN_ENV)
2311 code == EHOSTUNREACH
2316 call->lastReceiveTime = 0;
2319 #ifdef RX_KERNEL_TRACE
2320 if (ICL_SETACTIVE(afs_iclSetp)) {
2322 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2323 "after osi_NetSend()");
2334 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2335 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2336 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2337 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2339 if (rx_stats_active) {
2340 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2341 MUTEX_ENTER(&peer->peer_lock);
2342 hadd32(peer->bytesSent, p->length);
2343 MUTEX_EXIT(&peer->peer_lock);
2347 /* Send a list of packets to appropriate destination for the specified
2348 * connection. The headers are first encoded and placed in the packets.
2351 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2352 struct rx_packet **list, int len, int istack)
2354 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2357 struct sockaddr_in addr;
2358 struct rx_peer *peer = conn->peer;
2360 struct rx_packet *p = NULL;
2361 struct iovec wirevec[RX_MAXIOVECS];
2362 int i, length, code;
2365 struct rx_jumboHeader *jp;
2367 char deliveryType = 'S';
2369 /* The address we're sending the packet to */
2370 addr.sin_family = AF_INET;
2371 addr.sin_port = peer->port;
2372 addr.sin_addr.s_addr = peer->host;
2374 if (len + 1 > RX_MAXIOVECS) {
2375 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2379 * Stamp the packets in this jumbogram with consecutive serial numbers
2381 MUTEX_ENTER(&conn->conn_data_lock);
2382 serial = conn->serial;
2383 conn->serial += len;
2384 for (i = 0; i < len; i++) {
2386 if (p->length > conn->peer->maxPacketSize) {
2387 /* a ping *or* a sequenced packet can count */
2388 if ((p->length > conn->peer->maxPacketSize)) {
2389 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2390 (p->header.flags & RX_REQUEST_ACK)) &&
2391 ((i == 0) || (p->length >= conn->lastPingSize))) {
2392 conn->lastPingSize = p->length;
2393 conn->lastPingSizeSer = serial + i;
2394 } else if ((p->header.seq != 0) &&
2395 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2396 conn->lastPacketSize = p->length;
2397 conn->lastPacketSizeSeq = p->header.seq;
2402 MUTEX_EXIT(&conn->conn_data_lock);
2405 /* This stuff should be revamped, I think, so that most, if not
2406 * all, of the header stuff is always added here. We could
2407 * probably do away with the encode/decode routines. XXXXX */
2410 length = RX_HEADER_SIZE;
2411 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2412 wirevec[0].iov_len = RX_HEADER_SIZE;
2413 for (i = 0; i < len; i++) {
2416 /* The whole 3.5 jumbogram scheme relies on packets fitting
2417 * in a single packet buffer. */
2418 if (p->niovecs > 2) {
2419 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2422 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2425 if (p->length != RX_JUMBOBUFFERSIZE) {
2426 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2428 p->header.flags |= RX_JUMBO_PACKET;
2429 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2430 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2432 wirevec[i + 1].iov_len = p->length;
2433 length += p->length;
2435 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2437 /* Convert jumbo packet header to network byte order */
2438 temp = (afs_uint32) (p->header.flags) << 24;
2439 temp |= (afs_uint32) (p->header.spare);
2440 *(afs_uint32 *) jp = htonl(temp);
2442 jp = (struct rx_jumboHeader *)
2443 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2445 /* Stamp each packet with a unique serial number. The serial
2446 * number is maintained on a connection basis because some types
2447 * of security may be based on the serial number of the packet,
2448 * and security is handled on a per authenticated-connection
2450 /* Pre-increment, to guarantee no zero serial number; a zero
2451 * serial number means the packet was never sent. */
2452 p->header.serial = ++serial;
2453 /* This is so we can adjust retransmit time-outs better in the face of
2454 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2456 if (p->firstSerial == 0) {
2457 p->firstSerial = p->header.serial;
2460 /* If an output tracer function is defined, call it with the packet and
2461 * network address. Note this function may modify its arguments. */
2462 if (rx_almostSent) {
2463 int drop = (*rx_almostSent) (p, &addr);
2464 /* drop packet if return value is non-zero? */
2466 deliveryType = 'D'; /* Drop the packet */
2470 /* Get network byte order header */
2471 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2472 * touch ALL the fields */
2475 /* Send the packet out on the same socket that related packets are being
2479 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2482 /* Possibly drop this packet, for testing purposes */
2483 if ((deliveryType == 'D')
2484 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2485 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2486 deliveryType = 'D'; /* Drop the packet */
2488 deliveryType = 'S'; /* Send the packet */
2489 #endif /* RXDEBUG */
2491 /* Loop until the packet is sent. We'd prefer just to use a
2492 * blocking socket, but unfortunately the interface doesn't
2493 * allow us to have the socket block in send mode, and not
2494 * block in receive mode */
2495 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2496 waslocked = ISAFS_GLOCK();
2497 if (!istack && waslocked)
2501 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2503 /* send failed, so let's hurry up the resend, eh? */
2504 if (rx_stats_active)
2505 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2506 for (i = 0; i < len; i++) {
2508 p->retryTime = p->timeSent; /* resend it very soon */
2509 clock_Addmsec(&(p->retryTime),
2510 10 + (((afs_uint32) p->backoff) << 8));
2512 /* Some systems are nice and tell us right away that we cannot
2513 * reach this recipient by returning an error code.
2514 * So, when this happens let's "down" the host NOW so
2515 * we don't sit around waiting for this host to timeout later.
2519 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2520 #elif defined(AFS_LINUX20_ENV)
2521 code == -ENETUNREACH
2522 #elif defined(AFS_DARWIN_ENV)
2523 code == EHOSTUNREACH
2528 call->lastReceiveTime = 0;
2530 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2531 if (!istack && waslocked)
2539 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2540 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2541 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2542 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2545 if (rx_stats_active) {
2546 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2547 MUTEX_ENTER(&peer->peer_lock);
2548 hadd32(peer->bytesSent, p->length);
2549 MUTEX_EXIT(&peer->peer_lock);
2554 /* Send a "special" packet to the peer connection. If call is
2555 * specified, then the packet is directed to a specific call channel
2556 * associated with the connection, otherwise it is directed to the
2557 * connection only. Uses optionalPacket if it is supplied, rather than
2558 * allocating a new packet buffer. Nbytes is the length of the data
2559 * portion of the packet. If data is non-null, nbytes of data are
2560 * copied into the packet. Type is the type of the packet, as defined
2561 * in rx.h. Bug: there's a lot of duplication between this and other
2562 * routines. This needs to be cleaned up. */
2564 rxi_SendSpecial(struct rx_call *call,
2565 struct rx_connection *conn,
2566 struct rx_packet *optionalPacket, int type, char *data,
2567 int nbytes, int istack)
2569 /* Some of the following stuff should be common code for all
2570 * packet sends (it's repeated elsewhere) */
2571 struct rx_packet *p;
2573 int savelen = 0, saven = 0;
2574 int channel, callNumber;
2576 channel = call->channel;
2577 callNumber = *call->callNumber;
2578 /* BUSY packets refer to the next call on this connection */
2579 if (type == RX_PACKET_TYPE_BUSY) {
2588 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2590 osi_Panic("rxi_SendSpecial failure");
2597 p->header.serviceId = conn->serviceId;
2598 p->header.securityIndex = conn->securityIndex;
2599 p->header.cid = (conn->cid | channel);
2600 p->header.callNumber = callNumber;
2602 p->header.epoch = conn->epoch;
2603 p->header.type = type;
2604 p->header.flags = 0;
2605 if (conn->type == RX_CLIENT_CONNECTION)
2606 p->header.flags |= RX_CLIENT_INITIATED;
2608 rx_packetwrite(p, 0, nbytes, data);
2610 for (i = 1; i < p->niovecs; i++) {
2611 if (nbytes <= p->wirevec[i].iov_len) {
2612 savelen = p->wirevec[i].iov_len;
2614 p->wirevec[i].iov_len = nbytes;
2615 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2617 nbytes -= p->wirevec[i].iov_len;
2621 rxi_Send(call, p, istack);
2623 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2624 if (saven) { /* means we truncated the packet above. We probably don't */
2625 /* really need to do this, but it seems safer this way, given that */
2626 /* sneaky optionalPacket... */
2627 p->wirevec[i - 1].iov_len = savelen;
2630 if (!optionalPacket)
2632 return optionalPacket;
2636 /* Encode the packet's header (from the struct header in the packet to
2637 * the net byte order representation in the wire representation of the
2638 * packet, which is what is actually sent out on the wire) */
2640 rxi_EncodePacketHeader(struct rx_packet *p)
2642 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2644 memset(buf, 0, RX_HEADER_SIZE);
2645 *buf++ = htonl(p->header.epoch);
2646 *buf++ = htonl(p->header.cid);
2647 *buf++ = htonl(p->header.callNumber);
2648 *buf++ = htonl(p->header.seq);
2649 *buf++ = htonl(p->header.serial);
2650 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2651 | (((afs_uint32) p->header.flags) << 16)
2652 | (p->header.userStatus << 8) | p->header.securityIndex);
2653 /* Note: top 16 bits of this next word were reserved */
2654 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2657 /* Decode the packet's header (from net byte order to a struct header) */
2659 rxi_DecodePacketHeader(struct rx_packet *p)
2661 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2664 p->header.epoch = ntohl(*buf);
2666 p->header.cid = ntohl(*buf);
2668 p->header.callNumber = ntohl(*buf);
2670 p->header.seq = ntohl(*buf);
2672 p->header.serial = ntohl(*buf);
2678 /* C will truncate byte fields to bytes for me */
2679 p->header.type = temp >> 24;
2680 p->header.flags = temp >> 16;
2681 p->header.userStatus = temp >> 8;
2682 p->header.securityIndex = temp >> 0;
2687 p->header.serviceId = (temp & 0xffff);
2688 p->header.spare = temp >> 16;
2689 /* Note: top 16 bits of this last word are the security checksum */
2693 rxi_PrepareSendPacket(struct rx_call *call,
2694 struct rx_packet *p, int last)
2696 struct rx_connection *conn = call->conn;
2698 afs_int32 len; /* len must be a signed type; it can go negative */
2700 p->flags &= ~RX_PKTFLAG_ACKED;
2701 p->header.cid = (conn->cid | call->channel);
2702 p->header.serviceId = conn->serviceId;
2703 p->header.securityIndex = conn->securityIndex;
2705 /* No data packets on call 0. Where do these come from? */
2706 if (*call->callNumber == 0)
2707 *call->callNumber = 1;
2709 p->header.callNumber = *call->callNumber;
2710 p->header.seq = call->tnext++;
2711 p->header.epoch = conn->epoch;
2712 p->header.type = RX_PACKET_TYPE_DATA;
2713 p->header.flags = 0;
2714 p->header.spare = 0;
2715 if (conn->type == RX_CLIENT_CONNECTION)
2716 p->header.flags |= RX_CLIENT_INITIATED;
2719 p->header.flags |= RX_LAST_PACKET;
2721 clock_Zero(&p->retryTime); /* Never yet transmitted */
2722 clock_Zero(&p->firstSent); /* Never yet transmitted */
2723 p->header.serial = 0; /* Another way of saying never transmitted... */
2726 /* Now that we're sure this is the last data on the call, make sure
2727 * that the "length" and the sum of the iov_lens matches. */
2728 len = p->length + call->conn->securityHeaderSize;
2730 for (i = 1; i < p->niovecs && len > 0; i++) {
2731 len -= p->wirevec[i].iov_len;
2734 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2735 } else if (i < p->niovecs) {
2736 /* Free any extra elements in the wirevec */
2737 #if defined(RX_ENABLE_TSFPQ)
2738 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2739 #else /* !RX_ENABLE_TSFPQ */
2740 MUTEX_ENTER(&rx_freePktQ_lock);
2741 rxi_FreeDataBufsNoLock(p, i);
2742 MUTEX_EXIT(&rx_freePktQ_lock);
2743 #endif /* !RX_ENABLE_TSFPQ */
2748 p->wirevec[i - 1].iov_len += len;
2749 RXS_PreparePacket(conn->securityObject, call, p);
2752 /* Given an interface MTU size, calculate an adjusted MTU size that
2753 * will make efficient use of the RX buffers when the peer is sending
2754 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2756 rxi_AdjustIfMTU(int mtu)
2761 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2763 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2764 if (mtu <= adjMTU) {
2771 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2772 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2775 /* Given an interface MTU size, and the peer's advertised max receive
2776 * size, calculate an adjisted maxMTU size that makes efficient use
2777 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2779 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2781 int maxMTU = mtu * rxi_nSendFrags;
2782 maxMTU = MIN(maxMTU, peerMaxMTU);
2783 return rxi_AdjustIfMTU(maxMTU);
2786 /* Given a packet size, figure out how many datagram packet will fit.
2787 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2788 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2789 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2791 rxi_AdjustDgramPackets(int frags, int mtu)
2794 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2797 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2798 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2799 /* subtract the size of the first and last packets */
2800 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2804 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2809 * This function can be used by the Windows Cache Manager
2810 * to dump the list of all rx packets so that we can determine
2811 * where the packet leakage is.
2813 int rx_DumpPackets(FILE *outputFile, char *cookie)
2815 #ifdef RXDEBUG_PACKET
2816 struct rx_packet *p;
2820 #define RXDPRINTF sprintf
2821 #define RXDPRINTOUT output
2823 #define RXDPRINTF fprintf
2824 #define RXDPRINTOUT outputFile
2828 MUTEX_ENTER(&rx_freePktQ_lock);
2829 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2831 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2834 for (p = rx_mallocedP; p; p = p->allNextp) {
2835 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2836 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2837 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2838 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2839 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2840 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2842 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2846 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2848 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2851 MUTEX_EXIT(&rx_freePktQ_lock);
2853 #endif /* RXDEBUG_PACKET */