2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
20 #include "afs/sysincludes.h"
21 #include "afsincludes.h"
22 #include "rx/rx_kcommon.h"
23 #include "rx/rx_clock.h"
24 #include "rx/rx_queue.h"
25 #include "rx/rx_packet.h"
26 #else /* defined(UKERNEL) */
27 #ifdef RX_KERNEL_TRACE
28 #include "../rx/rx_kcommon.h"
31 #ifndef AFS_LINUX20_ENV
34 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
35 #include "afs/sysincludes.h"
37 #if defined(AFS_OBSD_ENV)
41 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
42 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
43 #include "sys/mount.h" /* it gets pulled in by something later anyway */
47 #include "netinet/in.h"
48 #include "afs/afs_osi.h"
49 #include "rx_kmutex.h"
50 #include "rx/rx_clock.h"
51 #include "rx/rx_queue.h"
53 #include <sys/sysmacros.h>
55 #include "rx/rx_packet.h"
56 #endif /* defined(UKERNEL) */
57 #include "rx/rx_globals.h"
59 #include "sys/types.h"
62 #if defined(AFS_NT40_ENV)
65 #define EWOULDBLOCK WSAEWOULDBLOCK
68 #include "rx_xmit_nt.h"
71 #include <sys/socket.h>
72 #include <netinet/in.h>
78 #include <sys/sysmacros.h>
80 #include "rx_packet.h"
81 #include "rx_globals.h"
91 /* rxdb_fileID is used to identify the lock location, along with line#. */
92 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
93 #endif /* RX_LOCKS_DB */
94 static struct rx_packet *rx_mallocedP = 0;
96 static afs_uint32 rx_packet_id = 0;
99 extern char cml_version_number[];
101 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
103 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
104 afs_uint32 ahost, short aport,
107 #ifdef RX_ENABLE_TSFPQ
109 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
113 struct rx_queue * q);
116 /* some rules about packets:
117 * 1. When a packet is allocated, the final iov_buf contains room for
118 * a security trailer, but iov_len masks that fact. If the security
119 * package wants to add the trailer, it may do so, and then extend
120 * iov_len appropriately. For this reason, packet's niovecs and
121 * iov_len fields should be accurate before calling PreparePacket.
125 * all packet buffers (iov_base) are integral multiples of
127 * offset is an integral multiple of the word size.
130 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
134 for (l = 0, i = 1; i < packet->niovecs; i++) {
135 if (l + packet->wirevec[i].iov_len > offset) {
137 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
140 l += packet->wirevec[i].iov_len;
147 * all packet buffers (iov_base) are integral multiples of the word size.
148 * offset is an integral multiple of the word size.
151 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
155 for (l = 0, i = 1; i < packet->niovecs; i++) {
156 if (l + packet->wirevec[i].iov_len > offset) {
157 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
158 (offset - l))) = data;
161 l += packet->wirevec[i].iov_len;
168 * all packet buffers (iov_base) are integral multiples of the
170 * offset is an integral multiple of the word size.
172 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
175 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
178 unsigned int i, j, l, r;
179 for (l = 0, i = 1; i < packet->niovecs; i++) {
180 if (l + packet->wirevec[i].iov_len > offset) {
183 l += packet->wirevec[i].iov_len;
186 /* i is the iovec which contains the first little bit of data in which we
187 * are interested. l is the total length of everything prior to this iovec.
188 * j is the number of bytes we can safely copy out of this iovec.
189 * offset only applies to the first iovec.
192 while ((r > 0) && (i < packet->niovecs)) {
193 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
194 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
197 l += packet->wirevec[i].iov_len;
202 return (r ? (resid - r) : resid);
207 * all packet buffers (iov_base) are integral multiples of the
209 * offset is an integral multiple of the word size.
212 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
214 unsigned int i, j, l, o, r;
217 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
218 if (l + packet->wirevec[i].iov_len > o) {
221 l += packet->wirevec[i].iov_len;
224 /* i is the iovec which contains the first little bit of data in which we
225 * are interested. l is the total length of everything prior to this iovec.
226 * j is the number of bytes we can safely copy out of this iovec.
227 * offset only applies to the first iovec.
230 while ((r > 0) && (i <= RX_MAXWVECS)) {
231 if (i >= packet->niovecs)
232 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
235 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
236 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
240 l += packet->wirevec[i].iov_len;
245 return (r ? (resid - r) : resid);
249 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
251 struct rx_packet *p, *np;
253 num_pkts = AllocPacketBufs(class, num_pkts, q);
255 for (queue_Scan(q, p, np, rx_packet)) {
256 RX_PACKET_IOV_FULLINIT(p);
262 #ifdef RX_ENABLE_TSFPQ
264 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
266 struct rx_ts_info_t * rx_ts_info;
270 RX_TS_INFO_GET(rx_ts_info);
272 transfer = num_pkts - rx_ts_info->_FPQ.len;
275 MUTEX_ENTER(&rx_freePktQ_lock);
276 transfer = MAX(transfer, rx_TSFPQGlobSize);
277 if (transfer > rx_nFreePackets) {
278 /* alloc enough for us, plus a few globs for other threads */
279 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
282 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
284 MUTEX_EXIT(&rx_freePktQ_lock);
288 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
292 #else /* RX_ENABLE_TSFPQ */
294 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
305 MUTEX_ENTER(&rx_freePktQ_lock);
308 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
309 num_pkts--, overq++);
312 rxi_NeedMorePackets = TRUE;
313 if (rx_stats_active) {
315 case RX_PACKET_CLASS_RECEIVE:
316 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
318 case RX_PACKET_CLASS_SEND:
319 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
321 case RX_PACKET_CLASS_SPECIAL:
322 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
324 case RX_PACKET_CLASS_RECV_CBUF:
325 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
327 case RX_PACKET_CLASS_SEND_CBUF:
328 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
334 if (rx_nFreePackets < num_pkts)
335 num_pkts = rx_nFreePackets;
338 rxi_NeedMorePackets = TRUE;
342 if (rx_nFreePackets < num_pkts) {
343 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
347 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
349 i++, c=queue_Next(c, rx_packet)) {
353 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
355 rx_nFreePackets -= num_pkts;
360 MUTEX_EXIT(&rx_freePktQ_lock);
365 #endif /* RX_ENABLE_TSFPQ */
368 * Free a packet currently used as a continuation buffer
370 #ifdef RX_ENABLE_TSFPQ
371 /* num_pkts=0 means queue length is unknown */
373 rxi_FreePackets(int num_pkts, struct rx_queue * q)
375 struct rx_ts_info_t * rx_ts_info;
376 struct rx_packet *c, *nc;
379 osi_Assert(num_pkts >= 0);
380 RX_TS_INFO_GET(rx_ts_info);
383 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
384 rxi_FreeDataBufsTSFPQ(c, 2, 0);
387 for (queue_Scan(q, c, nc, rx_packet)) {
388 rxi_FreeDataBufsTSFPQ(c, 2, 0);
393 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
396 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
398 MUTEX_ENTER(&rx_freePktQ_lock);
400 RX_TS_FPQ_LTOG(rx_ts_info);
402 /* Wakeup anyone waiting for packets */
405 MUTEX_EXIT(&rx_freePktQ_lock);
411 #else /* RX_ENABLE_TSFPQ */
412 /* num_pkts=0 means queue length is unknown */
414 rxi_FreePackets(int num_pkts, struct rx_queue *q)
417 struct rx_packet *p, *np;
421 osi_Assert(num_pkts >= 0);
425 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
434 for (queue_Scan(q, p, np, rx_packet)) {
435 if (p->niovecs > 2) {
436 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
443 queue_SpliceAppend(q, &cbs);
449 MUTEX_ENTER(&rx_freePktQ_lock);
451 queue_SpliceAppend(&rx_freePacketQueue, q);
452 rx_nFreePackets += qlen;
454 /* Wakeup anyone waiting for packets */
457 MUTEX_EXIT(&rx_freePktQ_lock);
462 #endif /* RX_ENABLE_TSFPQ */
464 /* this one is kind of awful.
465 * In rxkad, the packet has been all shortened, and everything, ready for
466 * sending. All of a sudden, we discover we need some of that space back.
467 * This isn't terribly general, because it knows that the packets are only
468 * rounded up to the EBS (userdata + security header).
471 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
475 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
476 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
477 p->wirevec[i].iov_len += nb;
481 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
482 p->wirevec[i].iov_len += nb;
490 /* get sufficient space to store nb bytes of data (or more), and hook
491 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
492 * returns the number of bytes >0 which it failed to come up with.
493 * Don't need to worry about locking on packet, since only
494 * one thread can manipulate one at a time. Locking on continution
495 * packets is handled by AllocPacketBufs */
496 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
498 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
502 struct rx_packet *cb, *ncb;
504 /* compute the number of cbuf's we need */
505 nv = nb / RX_CBUFFERSIZE;
506 if ((nv * RX_CBUFFERSIZE) < nb)
508 if ((nv + p->niovecs) > RX_MAXWVECS)
509 nv = RX_MAXWVECS - p->niovecs;
513 /* allocate buffers */
515 nv = AllocPacketBufs(class, nv, &q);
517 /* setup packet iovs */
518 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
520 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
521 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
524 nb -= (nv * RX_CBUFFERSIZE);
525 p->length += (nv * RX_CBUFFERSIZE);
531 /* Add more packet buffers */
532 #ifdef RX_ENABLE_TSFPQ
534 rxi_MorePackets(int apackets)
536 struct rx_packet *p, *e;
537 struct rx_ts_info_t * rx_ts_info;
541 getme = apackets * sizeof(struct rx_packet);
542 p = (struct rx_packet *)osi_Alloc(getme);
545 PIN(p, getme); /* XXXXX */
547 RX_TS_INFO_GET(rx_ts_info);
549 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
550 /* TSFPQ patch also needs to keep track of total packets */
552 MUTEX_ENTER(&rx_packets_mutex);
553 rx_nPackets += apackets;
554 RX_TS_FPQ_COMPUTE_LIMITS;
555 MUTEX_EXIT(&rx_packets_mutex);
557 for (e = p + apackets; p < e; p++) {
558 RX_PACKET_IOV_INIT(p);
561 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
564 MUTEX_ENTER(&rx_freePktQ_lock);
565 #ifdef RXDEBUG_PACKET
566 p->packetId = rx_packet_id++;
567 p->allNextp = rx_mallocedP;
568 #endif /* RXDEBUG_PACKET */
570 MUTEX_EXIT(&rx_freePktQ_lock);
573 rx_ts_info->_FPQ.delta += apackets;
575 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
577 MUTEX_ENTER(&rx_freePktQ_lock);
579 RX_TS_FPQ_LTOG(rx_ts_info);
580 rxi_NeedMorePackets = FALSE;
583 MUTEX_EXIT(&rx_freePktQ_lock);
587 #else /* RX_ENABLE_TSFPQ */
589 rxi_MorePackets(int apackets)
591 struct rx_packet *p, *e;
595 getme = apackets * sizeof(struct rx_packet);
596 p = (struct rx_packet *)osi_Alloc(getme);
599 PIN(p, getme); /* XXXXX */
602 MUTEX_ENTER(&rx_freePktQ_lock);
604 for (e = p + apackets; p < e; p++) {
605 RX_PACKET_IOV_INIT(p);
606 #ifdef RX_TRACK_PACKETS
607 p->flags |= RX_PKTFLAG_FREE;
611 queue_Append(&rx_freePacketQueue, p);
612 #ifdef RXDEBUG_PACKET
613 p->packetId = rx_packet_id++;
614 p->allNextp = rx_mallocedP;
615 #endif /* RXDEBUG_PACKET */
619 rx_nPackets += apackets;
620 rx_nFreePackets += apackets;
621 rxi_NeedMorePackets = FALSE;
624 MUTEX_EXIT(&rx_freePktQ_lock);
627 #endif /* RX_ENABLE_TSFPQ */
629 #ifdef RX_ENABLE_TSFPQ
631 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
633 struct rx_packet *p, *e;
634 struct rx_ts_info_t * rx_ts_info;
638 getme = apackets * sizeof(struct rx_packet);
639 p = (struct rx_packet *)osi_Alloc(getme);
641 PIN(p, getme); /* XXXXX */
643 RX_TS_INFO_GET(rx_ts_info);
645 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
646 /* TSFPQ patch also needs to keep track of total packets */
647 MUTEX_ENTER(&rx_packets_mutex);
648 rx_nPackets += apackets;
649 RX_TS_FPQ_COMPUTE_LIMITS;
650 MUTEX_EXIT(&rx_packets_mutex);
652 for (e = p + apackets; p < e; p++) {
653 RX_PACKET_IOV_INIT(p);
655 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
658 MUTEX_ENTER(&rx_freePktQ_lock);
659 #ifdef RXDEBUG_PACKET
660 p->packetId = rx_packet_id++;
661 p->allNextp = rx_mallocedP;
662 #endif /* RXDEBUG_PACKET */
664 MUTEX_EXIT(&rx_freePktQ_lock);
667 rx_ts_info->_FPQ.delta += apackets;
670 (num_keep_local < apackets)) {
672 MUTEX_ENTER(&rx_freePktQ_lock);
674 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
675 rxi_NeedMorePackets = FALSE;
678 MUTEX_EXIT(&rx_freePktQ_lock);
682 #endif /* RX_ENABLE_TSFPQ */
685 /* Add more packet buffers */
687 rxi_MorePacketsNoLock(int apackets)
689 #ifdef RX_ENABLE_TSFPQ
690 struct rx_ts_info_t * rx_ts_info;
691 #endif /* RX_ENABLE_TSFPQ */
692 struct rx_packet *p, *e;
695 /* allocate enough packets that 1/4 of the packets will be able
696 * to hold maximal amounts of data */
697 apackets += (apackets / 4)
698 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
700 getme = apackets * sizeof(struct rx_packet);
701 p = (struct rx_packet *)osi_Alloc(getme);
703 apackets -= apackets / 4;
704 osi_Assert(apackets > 0);
709 #ifdef RX_ENABLE_TSFPQ
710 RX_TS_INFO_GET(rx_ts_info);
711 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
712 #endif /* RX_ENABLE_TSFPQ */
714 for (e = p + apackets; p < e; p++) {
715 RX_PACKET_IOV_INIT(p);
716 #ifdef RX_TRACK_PACKETS
717 p->flags |= RX_PKTFLAG_FREE;
721 queue_Append(&rx_freePacketQueue, p);
722 #ifdef RXDEBUG_PACKET
723 p->packetId = rx_packet_id++;
724 p->allNextp = rx_mallocedP;
725 #endif /* RXDEBUG_PACKET */
729 rx_nFreePackets += apackets;
730 MUTEX_ENTER(&rx_packets_mutex);
731 rx_nPackets += apackets;
732 #ifdef RX_ENABLE_TSFPQ
733 RX_TS_FPQ_COMPUTE_LIMITS;
734 #endif /* RX_ENABLE_TSFPQ */
735 MUTEX_EXIT(&rx_packets_mutex);
736 rxi_NeedMorePackets = FALSE;
742 rxi_FreeAllPackets(void)
744 /* must be called at proper interrupt level, etcetera */
745 /* MTUXXX need to free all Packets */
746 osi_Free(rx_mallocedP,
747 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
748 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
751 #ifdef RX_ENABLE_TSFPQ
753 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
755 struct rx_ts_info_t * rx_ts_info;
759 RX_TS_INFO_GET(rx_ts_info);
761 if (num_keep_local != rx_ts_info->_FPQ.len) {
763 MUTEX_ENTER(&rx_freePktQ_lock);
764 if (num_keep_local < rx_ts_info->_FPQ.len) {
765 xfer = rx_ts_info->_FPQ.len - num_keep_local;
766 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
769 xfer = num_keep_local - rx_ts_info->_FPQ.len;
770 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
771 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
772 if (rx_nFreePackets < xfer) {
773 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
775 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
777 MUTEX_EXIT(&rx_freePktQ_lock);
783 rxi_FlushLocalPacketsTSFPQ(void)
785 rxi_AdjustLocalPacketsTSFPQ(0, 0);
787 #endif /* RX_ENABLE_TSFPQ */
789 /* Allocate more packets iff we need more continuation buffers */
790 /* In kernel, can't page in memory with interrupts disabled, so we
791 * don't use the event mechanism. */
793 rx_CheckPackets(void)
795 if (rxi_NeedMorePackets) {
796 rxi_MorePackets(rx_maxSendWindow);
800 /* In the packet freeing routine below, the assumption is that
801 we want all of the packets to be used equally frequently, so that we
802 don't get packet buffers paging out. It would be just as valid to
803 assume that we DO want them to page out if not many are being used.
804 In any event, we assume the former, and append the packets to the end
806 /* This explanation is bogus. The free list doesn't remain in any kind of
807 useful order for afs_int32: the packets in use get pretty much randomly scattered
808 across all the pages. In order to permit unused {packets,bufs} to page out, they
809 must be stored so that packets which are adjacent in memory are adjacent in the
810 free list. An array springs rapidly to mind.
813 /* Actually free the packet p. */
814 #ifdef RX_ENABLE_TSFPQ
816 rxi_FreePacketNoLock(struct rx_packet *p)
818 struct rx_ts_info_t * rx_ts_info;
819 dpf(("Free %"AFS_PTR_FMT"\n", p));
821 RX_TS_INFO_GET(rx_ts_info);
822 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
823 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
824 RX_TS_FPQ_LTOG(rx_ts_info);
827 #else /* RX_ENABLE_TSFPQ */
829 rxi_FreePacketNoLock(struct rx_packet *p)
831 dpf(("Free %"AFS_PTR_FMT"\n", p));
835 queue_Append(&rx_freePacketQueue, p);
837 #endif /* RX_ENABLE_TSFPQ */
839 #ifdef RX_ENABLE_TSFPQ
841 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
843 struct rx_ts_info_t * rx_ts_info;
844 dpf(("Free %"AFS_PTR_FMT"\n", p));
846 RX_TS_INFO_GET(rx_ts_info);
847 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
849 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
851 MUTEX_ENTER(&rx_freePktQ_lock);
853 RX_TS_FPQ_LTOG(rx_ts_info);
855 /* Wakeup anyone waiting for packets */
858 MUTEX_EXIT(&rx_freePktQ_lock);
862 #endif /* RX_ENABLE_TSFPQ */
865 * free continuation buffers off a packet into a queue
867 * [IN] p -- packet from which continuation buffers will be freed
868 * [IN] first -- iovec offset of first continuation buffer to free
869 * [IN] q -- queue into which continuation buffers will be chained
872 * number of continuation buffers freed
874 #ifndef RX_ENABLE_TSFPQ
876 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
879 struct rx_packet * cb;
882 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
883 iov = &p->wirevec[first];
885 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
886 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
887 RX_FPQ_MARK_FREE(cb);
898 * free packet continuation buffers into the global free packet pool
900 * [IN] p -- packet from which to free continuation buffers
901 * [IN] first -- iovec offset of first continuation buffer to free
907 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
911 for (first = MAX(2, first); first < p->niovecs; first++) {
912 iov = &p->wirevec[first];
914 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
915 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
923 #ifdef RX_ENABLE_TSFPQ
925 * free packet continuation buffers into the thread-local free pool
927 * [IN] p -- packet from which continuation buffers will be freed
928 * [IN] first -- iovec offset of first continuation buffer to free
929 * any value less than 2, the min number of iovecs,
930 * is treated as if it is 2.
931 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
932 * global free pool before returning
938 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
941 struct rx_ts_info_t * rx_ts_info;
943 RX_TS_INFO_GET(rx_ts_info);
945 for (first = MAX(2, first); first < p->niovecs; first++) {
946 iov = &p->wirevec[first];
948 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
949 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
954 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
956 MUTEX_ENTER(&rx_freePktQ_lock);
958 RX_TS_FPQ_LTOG(rx_ts_info);
960 /* Wakeup anyone waiting for packets */
963 MUTEX_EXIT(&rx_freePktQ_lock);
968 #endif /* RX_ENABLE_TSFPQ */
970 int rxi_nBadIovecs = 0;
972 /* rxi_RestoreDataBufs
974 * Restore the correct sizes to the iovecs. Called when reusing a packet
975 * for reading off the wire.
978 rxi_RestoreDataBufs(struct rx_packet *p)
981 struct iovec *iov = &p->wirevec[2];
983 RX_PACKET_IOV_INIT(p);
985 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
986 if (!iov->iov_base) {
991 iov->iov_len = RX_CBUFFERSIZE;
995 #ifdef RX_ENABLE_TSFPQ
997 rxi_TrimDataBufs(struct rx_packet *p, int first)
1000 struct iovec *iov, *end;
1001 struct rx_ts_info_t * rx_ts_info;
1005 osi_Panic("TrimDataBufs 1: first must be 1");
1007 /* Skip over continuation buffers containing message data */
1008 iov = &p->wirevec[2];
1009 end = iov + (p->niovecs - 2);
1010 length = p->length - p->wirevec[1].iov_len;
1011 for (; iov < end && length > 0; iov++) {
1013 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1014 length -= iov->iov_len;
1017 /* iov now points to the first empty data buffer. */
1021 RX_TS_INFO_GET(rx_ts_info);
1022 for (; iov < end; iov++) {
1024 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1025 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1028 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1030 MUTEX_ENTER(&rx_freePktQ_lock);
1032 RX_TS_FPQ_LTOG(rx_ts_info);
1033 rxi_PacketsUnWait();
1035 MUTEX_EXIT(&rx_freePktQ_lock);
1041 #else /* RX_ENABLE_TSFPQ */
1043 rxi_TrimDataBufs(struct rx_packet *p, int first)
1046 struct iovec *iov, *end;
1050 osi_Panic("TrimDataBufs 1: first must be 1");
1052 /* Skip over continuation buffers containing message data */
1053 iov = &p->wirevec[2];
1054 end = iov + (p->niovecs - 2);
1055 length = p->length - p->wirevec[1].iov_len;
1056 for (; iov < end && length > 0; iov++) {
1058 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1059 length -= iov->iov_len;
1062 /* iov now points to the first empty data buffer. */
1067 MUTEX_ENTER(&rx_freePktQ_lock);
1069 for (; iov < end; iov++) {
1071 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1072 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1075 rxi_PacketsUnWait();
1077 MUTEX_EXIT(&rx_freePktQ_lock);
1082 #endif /* RX_ENABLE_TSFPQ */
1084 /* Free the packet p. P is assumed not to be on any queue, i.e.
1085 * remove it yourself first if you call this routine. */
1086 #ifdef RX_ENABLE_TSFPQ
1088 rxi_FreePacket(struct rx_packet *p)
1090 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1091 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1093 #else /* RX_ENABLE_TSFPQ */
1095 rxi_FreePacket(struct rx_packet *p)
1100 MUTEX_ENTER(&rx_freePktQ_lock);
1102 rxi_FreeDataBufsNoLock(p, 2);
1103 rxi_FreePacketNoLock(p);
1104 /* Wakeup anyone waiting for packets */
1105 rxi_PacketsUnWait();
1107 MUTEX_EXIT(&rx_freePktQ_lock);
1110 #endif /* RX_ENABLE_TSFPQ */
1112 /* rxi_AllocPacket sets up p->length so it reflects the number of
1113 * bytes in the packet at this point, **not including** the header.
1114 * The header is absolutely necessary, besides, this is the way the
1115 * length field is usually used */
1116 #ifdef RX_ENABLE_TSFPQ
1118 rxi_AllocPacketNoLock(int class)
1120 struct rx_packet *p;
1121 struct rx_ts_info_t * rx_ts_info;
1123 RX_TS_INFO_GET(rx_ts_info);
1126 if (rxi_OverQuota(class)) {
1127 rxi_NeedMorePackets = TRUE;
1128 if (rx_stats_active) {
1130 case RX_PACKET_CLASS_RECEIVE:
1131 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1133 case RX_PACKET_CLASS_SEND:
1134 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1136 case RX_PACKET_CLASS_SPECIAL:
1137 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1139 case RX_PACKET_CLASS_RECV_CBUF:
1140 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1142 case RX_PACKET_CLASS_SEND_CBUF:
1143 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1147 return (struct rx_packet *)0;
1151 if (rx_stats_active)
1152 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1153 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1156 if (queue_IsEmpty(&rx_freePacketQueue))
1157 osi_Panic("rxi_AllocPacket error");
1159 if (queue_IsEmpty(&rx_freePacketQueue))
1160 rxi_MorePacketsNoLock(rx_maxSendWindow);
1164 RX_TS_FPQ_GTOL(rx_ts_info);
1167 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1169 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1172 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1173 * order to truncate outbound packets. In the near future, may need
1174 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1176 RX_PACKET_IOV_FULLINIT(p);
1179 #else /* RX_ENABLE_TSFPQ */
1181 rxi_AllocPacketNoLock(int class)
1183 struct rx_packet *p;
1186 if (rxi_OverQuota(class)) {
1187 rxi_NeedMorePackets = TRUE;
1188 if (rx_stats_active) {
1190 case RX_PACKET_CLASS_RECEIVE:
1191 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1193 case RX_PACKET_CLASS_SEND:
1194 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1196 case RX_PACKET_CLASS_SPECIAL:
1197 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1199 case RX_PACKET_CLASS_RECV_CBUF:
1200 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1202 case RX_PACKET_CLASS_SEND_CBUF:
1203 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1207 return (struct rx_packet *)0;
1211 if (rx_stats_active)
1212 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1215 if (queue_IsEmpty(&rx_freePacketQueue))
1216 osi_Panic("rxi_AllocPacket error");
1218 if (queue_IsEmpty(&rx_freePacketQueue))
1219 rxi_MorePacketsNoLock(rx_maxSendWindow);
1223 p = queue_First(&rx_freePacketQueue, rx_packet);
1225 RX_FPQ_MARK_USED(p);
1227 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1230 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1231 * order to truncate outbound packets. In the near future, may need
1232 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1234 RX_PACKET_IOV_FULLINIT(p);
1237 #endif /* RX_ENABLE_TSFPQ */
1239 #ifdef RX_ENABLE_TSFPQ
1241 rxi_AllocPacketTSFPQ(int class, int pull_global)
1243 struct rx_packet *p;
1244 struct rx_ts_info_t * rx_ts_info;
1246 RX_TS_INFO_GET(rx_ts_info);
1248 if (rx_stats_active)
1249 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1250 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1251 MUTEX_ENTER(&rx_freePktQ_lock);
1253 if (queue_IsEmpty(&rx_freePacketQueue))
1254 rxi_MorePacketsNoLock(rx_maxSendWindow);
1256 RX_TS_FPQ_GTOL(rx_ts_info);
1258 MUTEX_EXIT(&rx_freePktQ_lock);
1259 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1263 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1265 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1267 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1268 * order to truncate outbound packets. In the near future, may need
1269 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1271 RX_PACKET_IOV_FULLINIT(p);
1274 #endif /* RX_ENABLE_TSFPQ */
1276 #ifdef RX_ENABLE_TSFPQ
1278 rxi_AllocPacket(int class)
1280 struct rx_packet *p;
1282 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1285 #else /* RX_ENABLE_TSFPQ */
1287 rxi_AllocPacket(int class)
1289 struct rx_packet *p;
1291 MUTEX_ENTER(&rx_freePktQ_lock);
1292 p = rxi_AllocPacketNoLock(class);
1293 MUTEX_EXIT(&rx_freePktQ_lock);
1296 #endif /* RX_ENABLE_TSFPQ */
1298 /* This guy comes up with as many buffers as it {takes,can get} given
1299 * the MTU for this call. It also sets the packet length before
1300 * returning. caution: this is often called at NETPRI
1301 * Called with call locked.
1304 rxi_AllocSendPacket(struct rx_call *call, int want)
1306 struct rx_packet *p = (struct rx_packet *)0;
1311 mud = call->MTU - RX_HEADER_SIZE;
1313 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1314 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1316 #ifdef RX_ENABLE_TSFPQ
1317 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1319 want = MIN(want, mud);
1321 if ((unsigned)want > p->length)
1322 (void)rxi_AllocDataBuf(p, (want - p->length),
1323 RX_PACKET_CLASS_SEND_CBUF);
1325 if (p->length > mud)
1328 if (delta >= p->length) {
1336 #endif /* RX_ENABLE_TSFPQ */
1338 while (!(call->error)) {
1339 MUTEX_ENTER(&rx_freePktQ_lock);
1340 /* if an error occurred, or we get the packet we want, we're done */
1341 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1342 MUTEX_EXIT(&rx_freePktQ_lock);
1345 want = MIN(want, mud);
1347 if ((unsigned)want > p->length)
1348 (void)rxi_AllocDataBuf(p, (want - p->length),
1349 RX_PACKET_CLASS_SEND_CBUF);
1351 if (p->length > mud)
1354 if (delta >= p->length) {
1363 /* no error occurred, and we didn't get a packet, so we sleep.
1364 * At this point, we assume that packets will be returned
1365 * sooner or later, as packets are acknowledged, and so we
1368 call->flags |= RX_CALL_WAIT_PACKETS;
1369 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1370 MUTEX_EXIT(&call->lock);
1371 rx_waitingForPackets = 1;
1373 #ifdef RX_ENABLE_LOCKS
1374 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1376 osi_rxSleep(&rx_waitingForPackets);
1378 MUTEX_EXIT(&rx_freePktQ_lock);
1379 MUTEX_ENTER(&call->lock);
1380 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1381 call->flags &= ~RX_CALL_WAIT_PACKETS;
1390 /* Windows does not use file descriptors. */
1391 #define CountFDs(amax) 0
1393 /* count the number of used FDs */
1402 for (i = 0; i < amax; i++) {
1403 code = fstat(i, &tstat);
1409 #endif /* AFS_NT40_ENV */
1412 #define CountFDs(amax) amax
1416 #if !defined(KERNEL) || defined(UKERNEL)
1418 /* This function reads a single packet from the interface into the
1419 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1420 * (host,port) of the sender are stored in the supplied variables, and
1421 * the data length of the packet is stored in the packet structure.
1422 * The header is decoded. */
1424 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1427 struct sockaddr_in from;
1428 unsigned int nbytes;
1430 afs_uint32 tlen, savelen;
1432 rx_computelen(p, tlen);
1433 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1435 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1436 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1437 * it once in order to avoid races. */
1440 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1448 /* Extend the last iovec for padding, it's just to make sure that the
1449 * read doesn't return more data than we expect, and is done to get around
1450 * our problems caused by the lack of a length field in the rx header.
1451 * Use the extra buffer that follows the localdata in each packet
1453 savelen = p->wirevec[p->niovecs - 1].iov_len;
1454 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1456 memset(&msg, 0, sizeof(msg));
1457 msg.msg_name = (char *)&from;
1458 msg.msg_namelen = sizeof(struct sockaddr_in);
1459 msg.msg_iov = p->wirevec;
1460 msg.msg_iovlen = p->niovecs;
1461 nbytes = rxi_Recvmsg(socket, &msg, 0);
1463 /* restore the vec to its correct state */
1464 p->wirevec[p->niovecs - 1].iov_len = savelen;
1466 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1467 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1468 if (nbytes < 0 && errno == EWOULDBLOCK) {
1469 if (rx_stats_active)
1470 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1471 } else if (nbytes <= 0) {
1472 if (rx_stats_active) {
1473 MUTEX_ENTER(&rx_stats_mutex);
1474 rx_stats.bogusPacketOnRead++;
1475 rx_stats.bogusHost = from.sin_addr.s_addr;
1476 MUTEX_EXIT(&rx_stats_mutex);
1478 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1479 ntohs(from.sin_port), nbytes));
1484 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1485 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1486 rxi_DecodePacketHeader(p);
1488 *host = from.sin_addr.s_addr;
1489 *port = from.sin_port;
1491 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1492 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1493 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1495 #ifdef RX_TRIMDATABUFS
1496 rxi_TrimDataBufs(p, 1);
1502 /* Extract packet header. */
1503 rxi_DecodePacketHeader(p);
1505 *host = from.sin_addr.s_addr;
1506 *port = from.sin_port;
1507 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1508 struct rx_peer *peer;
1509 if (rx_stats_active)
1510 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1512 * Try to look up this peer structure. If it doesn't exist,
1513 * don't create a new one -
1514 * we don't keep count of the bytes sent/received if a peer
1515 * structure doesn't already exist.
1517 * The peer/connection cleanup code assumes that there is 1 peer
1518 * per connection. If we actually created a peer structure here
1519 * and this packet was an rxdebug packet, the peer structure would
1520 * never be cleaned up.
1522 peer = rxi_FindPeer(*host, *port, 0, 0);
1523 /* Since this may not be associated with a connection,
1524 * it may have no refCount, meaning we could race with
1527 if (peer && (peer->refCount > 0)) {
1528 MUTEX_ENTER(&peer->peer_lock);
1529 hadd32(peer->bytesReceived, p->length);
1530 MUTEX_EXIT(&peer->peer_lock);
1534 #ifdef RX_TRIMDATABUFS
1535 /* Free any empty packet buffers at the end of this packet */
1536 rxi_TrimDataBufs(p, 1);
1542 #endif /* !KERNEL || UKERNEL */
1544 /* This function splits off the first packet in a jumbo packet.
1545 * As of AFS 3.5, jumbograms contain more than one fixed size
1546 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1547 * last packet header. All packets (except the last) are padded to
1548 * fall on RX_CBUFFERSIZE boundaries.
1549 * HACK: We store the length of the first n-1 packets in the
1550 * last two pad bytes. */
1553 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1556 struct rx_packet *np;
1557 struct rx_jumboHeader *jp;
1563 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1564 * bytes in length. All but the first packet are preceded by
1565 * an abbreviated four byte header. The length of the last packet
1566 * is calculated from the size of the jumbogram. */
1567 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1569 if ((int)p->length < length) {
1570 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1573 niov = p->niovecs - 2;
1575 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1578 iov = &p->wirevec[2];
1579 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1581 /* Get a pointer to the abbreviated packet header */
1582 jp = (struct rx_jumboHeader *)
1583 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1585 /* Set up the iovecs for the next packet */
1586 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1587 np->wirevec[0].iov_len = sizeof(struct rx_header);
1588 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1589 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1590 np->niovecs = niov + 1;
1591 for (i = 2, iov++; i <= niov; i++, iov++) {
1592 np->wirevec[i] = *iov;
1594 np->length = p->length - length;
1595 p->length = RX_JUMBOBUFFERSIZE;
1598 /* Convert the jumbo packet header to host byte order */
1599 temp = ntohl(*(afs_uint32 *) jp);
1600 jp->flags = (u_char) (temp >> 24);
1601 jp->cksum = (u_short) (temp);
1603 /* Fill in the packet header */
1604 np->header = p->header;
1605 np->header.serial = p->header.serial + 1;
1606 np->header.seq = p->header.seq + 1;
1607 np->header.flags = jp->flags;
1608 np->header.spare = jp->cksum;
1614 /* Send a udp datagram */
1616 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1617 int length, int istack)
1622 memset(&msg, 0, sizeof(msg));
1624 msg.msg_iovlen = nvecs;
1625 msg.msg_name = addr;
1626 msg.msg_namelen = sizeof(struct sockaddr_in);
1628 ret = rxi_Sendmsg(socket, &msg, 0);
1632 #elif !defined(UKERNEL)
1634 * message receipt is done in rxk_input or rx_put.
1637 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1639 * Copy an mblock to the contiguous area pointed to by cp.
1640 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1641 * but it doesn't really.
1642 * Returns the number of bytes not transferred.
1643 * The message is NOT changed.
1646 cpytoc(mblk_t * mp, int off, int len, char *cp)
1650 for (; mp && len > 0; mp = mp->b_cont) {
1651 if (mp->b_datap->db_type != M_DATA) {
1654 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1655 memcpy(cp, (char *)mp->b_rptr, n);
1663 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1664 * but it doesn't really.
1665 * This sucks, anyway, do it like m_cpy.... below
1668 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1673 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1674 if (mp->b_datap->db_type != M_DATA) {
1677 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1683 t = iovs[i].iov_len;
1686 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1696 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1697 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1699 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1701 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1704 unsigned int l1, l2, i, t;
1706 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1707 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1710 if (m->m_len <= off) {
1720 p1 = mtod(m, caddr_t) + off;
1721 l1 = m->m_len - off;
1723 p2 = iovs[0].iov_base;
1724 l2 = iovs[0].iov_len;
1727 t = MIN(l1, MIN(l2, (unsigned int)len));
1738 p1 = mtod(m, caddr_t);
1744 p2 = iovs[i].iov_base;
1745 l2 = iovs[i].iov_len;
1753 #endif /* AFS_SUN5_ENV */
1755 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1757 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1758 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1764 struct rx_packet *phandle;
1765 int hdr_len, data_len;
1770 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1777 #endif /*KERNEL && !UKERNEL */
1780 /* send a response to a debug packet */
1783 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1784 afs_uint32 ahost, short aport, int istack)
1786 struct rx_debugIn tin;
1788 struct rx_serverQueueEntry *np, *nqe;
1791 * Only respond to client-initiated Rx debug packets,
1792 * and clear the client flag in the response.
1794 if (ap->header.flags & RX_CLIENT_INITIATED) {
1795 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1796 rxi_EncodePacketHeader(ap);
1801 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1802 /* all done with packet, now set length to the truth, so we can
1803 * reuse this packet */
1804 rx_computelen(ap, ap->length);
1806 tin.type = ntohl(tin.type);
1807 tin.index = ntohl(tin.index);
1809 case RX_DEBUGI_GETSTATS:{
1810 struct rx_debugStats tstat;
1812 /* get basic stats */
1813 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1814 tstat.version = RX_DEBUGI_VERSION;
1815 #ifndef RX_ENABLE_LOCKS
1816 tstat.waitingForPackets = rx_waitingForPackets;
1818 MUTEX_ENTER(&rx_serverPool_lock);
1819 tstat.nFreePackets = htonl(rx_nFreePackets);
1820 tstat.nPackets = htonl(rx_nPackets);
1821 tstat.callsExecuted = htonl(rxi_nCalls);
1822 tstat.packetReclaims = htonl(rx_packetReclaims);
1823 tstat.usedFDs = CountFDs(64);
1824 tstat.nWaiting = htonl(rx_nWaiting);
1825 tstat.nWaited = htonl(rx_nWaited);
1826 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1828 MUTEX_EXIT(&rx_serverPool_lock);
1829 tstat.idleThreads = htonl(tstat.idleThreads);
1830 tl = sizeof(struct rx_debugStats) - ap->length;
1832 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1835 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1837 ap->length = sizeof(struct rx_debugStats);
1838 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1839 rx_computelen(ap, ap->length);
1844 case RX_DEBUGI_GETALLCONN:
1845 case RX_DEBUGI_GETCONN:{
1847 struct rx_connection *tc;
1848 struct rx_call *tcall;
1849 struct rx_debugConn tconn;
1850 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1853 tl = sizeof(struct rx_debugConn) - ap->length;
1855 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1859 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1860 /* get N'th (maybe) "interesting" connection info */
1861 for (i = 0; i < rx_hashTableSize; i++) {
1862 #if !defined(KERNEL)
1863 /* the time complexity of the algorithm used here
1864 * exponentially increses with the number of connections.
1866 #ifdef AFS_PTHREAD_ENV
1872 MUTEX_ENTER(&rx_connHashTable_lock);
1873 /* We might be slightly out of step since we are not
1874 * locking each call, but this is only debugging output.
1876 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1877 if ((all || rxi_IsConnInteresting(tc))
1878 && tin.index-- <= 0) {
1879 tconn.host = tc->peer->host;
1880 tconn.port = tc->peer->port;
1881 tconn.cid = htonl(tc->cid);
1882 tconn.epoch = htonl(tc->epoch);
1883 tconn.serial = htonl(tc->serial);
1884 for (j = 0; j < RX_MAXCALLS; j++) {
1885 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1886 if ((tcall = tc->call[j])) {
1887 tconn.callState[j] = tcall->state;
1888 tconn.callMode[j] = tcall->mode;
1889 tconn.callFlags[j] = tcall->flags;
1890 if (queue_IsNotEmpty(&tcall->rq))
1891 tconn.callOther[j] |= RX_OTHER_IN;
1892 if (queue_IsNotEmpty(&tcall->tq))
1893 tconn.callOther[j] |= RX_OTHER_OUT;
1895 tconn.callState[j] = RX_STATE_NOTINIT;
1898 tconn.natMTU = htonl(tc->peer->natMTU);
1899 tconn.error = htonl(tc->error);
1900 tconn.flags = tc->flags;
1901 tconn.type = tc->type;
1902 tconn.securityIndex = tc->securityIndex;
1903 if (tc->securityObject) {
1904 RXS_GetStats(tc->securityObject, tc,
1906 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1907 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1910 DOHTONL(packetsReceived);
1911 DOHTONL(packetsSent);
1912 DOHTONL(bytesReceived);
1916 sizeof(tconn.secStats.spares) /
1921 sizeof(tconn.secStats.sparel) /
1922 sizeof(afs_int32); i++)
1926 MUTEX_EXIT(&rx_connHashTable_lock);
1927 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1930 ap->length = sizeof(struct rx_debugConn);
1931 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1937 MUTEX_EXIT(&rx_connHashTable_lock);
1939 /* if we make it here, there are no interesting packets */
1940 tconn.cid = htonl(0xffffffff); /* means end */
1941 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1944 ap->length = sizeof(struct rx_debugConn);
1945 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1951 * Pass back all the peer structures we have available
1954 case RX_DEBUGI_GETPEER:{
1957 struct rx_debugPeer tpeer;
1960 tl = sizeof(struct rx_debugPeer) - ap->length;
1962 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1966 memset(&tpeer, 0, sizeof(tpeer));
1967 for (i = 0; i < rx_hashTableSize; i++) {
1968 #if !defined(KERNEL)
1969 /* the time complexity of the algorithm used here
1970 * exponentially increses with the number of peers.
1972 * Yielding after processing each hash table entry
1973 * and dropping rx_peerHashTable_lock.
1974 * also increases the risk that we will miss a new
1975 * entry - but we are willing to live with this
1976 * limitation since this is meant for debugging only
1978 #ifdef AFS_PTHREAD_ENV
1984 MUTEX_ENTER(&rx_peerHashTable_lock);
1985 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1986 if (tin.index-- <= 0) {
1988 MUTEX_EXIT(&rx_peerHashTable_lock);
1990 MUTEX_ENTER(&tp->peer_lock);
1991 tpeer.host = tp->host;
1992 tpeer.port = tp->port;
1993 tpeer.ifMTU = htons(tp->ifMTU);
1994 tpeer.idleWhen = htonl(tp->idleWhen);
1995 tpeer.refCount = htons(tp->refCount);
1996 tpeer.burstSize = tp->burstSize;
1997 tpeer.burst = tp->burst;
1998 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1999 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
2000 tpeer.rtt = htonl(tp->rtt);
2001 tpeer.rtt_dev = htonl(tp->rtt_dev);
2002 tpeer.timeout.sec = htonl(tp->timeout.sec);
2003 tpeer.timeout.usec = htonl(tp->timeout.usec);
2004 tpeer.nSent = htonl(tp->nSent);
2005 tpeer.reSends = htonl(tp->reSends);
2006 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2007 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2008 tpeer.rateFlag = htonl(tp->rateFlag);
2009 tpeer.natMTU = htons(tp->natMTU);
2010 tpeer.maxMTU = htons(tp->maxMTU);
2011 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2012 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2013 tpeer.MTU = htons(tp->MTU);
2014 tpeer.cwind = htons(tp->cwind);
2015 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2016 tpeer.congestSeq = htons(tp->congestSeq);
2017 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2018 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2019 tpeer.bytesReceived.high =
2020 htonl(tp->bytesReceived.high);
2021 tpeer.bytesReceived.low =
2022 htonl(tp->bytesReceived.low);
2023 MUTEX_EXIT(&tp->peer_lock);
2025 MUTEX_ENTER(&rx_peerHashTable_lock);
2027 MUTEX_EXIT(&rx_peerHashTable_lock);
2029 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2032 ap->length = sizeof(struct rx_debugPeer);
2033 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2039 MUTEX_EXIT(&rx_peerHashTable_lock);
2041 /* if we make it here, there are no interesting packets */
2042 tpeer.host = htonl(0xffffffff); /* means end */
2043 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2046 ap->length = sizeof(struct rx_debugPeer);
2047 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2052 case RX_DEBUGI_RXSTATS:{
2056 tl = sizeof(rx_stats) - ap->length;
2058 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2062 /* Since its all int32s convert to network order with a loop. */
2063 if (rx_stats_active)
2064 MUTEX_ENTER(&rx_stats_mutex);
2065 s = (afs_int32 *) & rx_stats;
2066 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2067 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2070 ap->length = sizeof(rx_stats);
2071 if (rx_stats_active)
2072 MUTEX_EXIT(&rx_stats_mutex);
2073 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2079 /* error response packet */
2080 tin.type = htonl(RX_DEBUGI_BADTYPE);
2081 tin.index = tin.type;
2082 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2084 ap->length = sizeof(struct rx_debugIn);
2085 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2093 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2094 afs_uint32 ahost, short aport, int istack)
2099 * Only respond to client-initiated version requests, and
2100 * clear that flag in the response.
2102 if (ap->header.flags & RX_CLIENT_INITIATED) {
2105 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2106 rxi_EncodePacketHeader(ap);
2107 memset(buf, 0, sizeof(buf));
2108 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2109 rx_packetwrite(ap, 0, 65, buf);
2112 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2120 /* send a debug packet back to the sender */
2122 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2123 afs_uint32 ahost, short aport, afs_int32 istack)
2125 struct sockaddr_in taddr;
2126 unsigned int i, nbytes, savelen = 0;
2129 int waslocked = ISAFS_GLOCK();
2132 taddr.sin_family = AF_INET;
2133 taddr.sin_port = aport;
2134 taddr.sin_addr.s_addr = ahost;
2135 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2136 taddr.sin_len = sizeof(struct sockaddr_in);
2139 /* We need to trim the niovecs. */
2140 nbytes = apacket->length;
2141 for (i = 1; i < apacket->niovecs; i++) {
2142 if (nbytes <= apacket->wirevec[i].iov_len) {
2143 savelen = apacket->wirevec[i].iov_len;
2144 saven = apacket->niovecs;
2145 apacket->wirevec[i].iov_len = nbytes;
2146 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2148 nbytes -= apacket->wirevec[i].iov_len;
2151 #ifdef RX_KERNEL_TRACE
2152 if (ICL_SETACTIVE(afs_iclSetp)) {
2155 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2156 "before osi_NetSend()");
2164 /* debug packets are not reliably delivered, hence the cast below. */
2165 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2166 apacket->length + RX_HEADER_SIZE, istack);
2168 #ifdef RX_KERNEL_TRACE
2169 if (ICL_SETACTIVE(afs_iclSetp)) {
2171 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2172 "after osi_NetSend()");
2181 if (saven) { /* means we truncated the packet above. */
2182 apacket->wirevec[i - 1].iov_len = savelen;
2183 apacket->niovecs = saven;
2188 /* Send the packet to appropriate destination for the specified
2189 * call. The header is first encoded and placed in the packet.
2192 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2193 struct rx_packet *p, int istack)
2199 struct sockaddr_in addr;
2200 struct rx_peer *peer = conn->peer;
2203 char deliveryType = 'S';
2205 /* The address we're sending the packet to */
2206 memset(&addr, 0, sizeof(addr));
2207 addr.sin_family = AF_INET;
2208 addr.sin_port = peer->port;
2209 addr.sin_addr.s_addr = peer->host;
2211 /* This stuff should be revamped, I think, so that most, if not
2212 * all, of the header stuff is always added here. We could
2213 * probably do away with the encode/decode routines. XXXXX */
2215 /* Stamp each packet with a unique serial number. The serial
2216 * number is maintained on a connection basis because some types
2217 * of security may be based on the serial number of the packet,
2218 * and security is handled on a per authenticated-connection
2220 /* Pre-increment, to guarantee no zero serial number; a zero
2221 * serial number means the packet was never sent. */
2222 MUTEX_ENTER(&conn->conn_data_lock);
2223 p->header.serial = ++conn->serial;
2224 if (p->length > conn->peer->maxPacketSize) {
2225 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2226 (p->header.flags & RX_REQUEST_ACK)) {
2227 conn->lastPingSize = p->length;
2228 conn->lastPingSizeSer = p->header.serial;
2229 } else if (p->header.seq != 0) {
2230 conn->lastPacketSize = p->length;
2231 conn->lastPacketSizeSeq = p->header.seq;
2234 MUTEX_EXIT(&conn->conn_data_lock);
2235 /* This is so we can adjust retransmit time-outs better in the face of
2236 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2238 if (p->firstSerial == 0) {
2239 p->firstSerial = p->header.serial;
2242 /* If an output tracer function is defined, call it with the packet and
2243 * network address. Note this function may modify its arguments. */
2244 if (rx_almostSent) {
2245 int drop = (*rx_almostSent) (p, &addr);
2246 /* drop packet if return value is non-zero? */
2248 deliveryType = 'D'; /* Drop the packet */
2252 /* Get network byte order header */
2253 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2254 * touch ALL the fields */
2256 /* Send the packet out on the same socket that related packets are being
2260 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2263 /* Possibly drop this packet, for testing purposes */
2264 if ((deliveryType == 'D')
2265 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2266 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2267 deliveryType = 'D'; /* Drop the packet */
2269 deliveryType = 'S'; /* Send the packet */
2270 #endif /* RXDEBUG */
2272 /* Loop until the packet is sent. We'd prefer just to use a
2273 * blocking socket, but unfortunately the interface doesn't
2274 * allow us to have the socket block in send mode, and not
2275 * block in receive mode */
2277 waslocked = ISAFS_GLOCK();
2278 #ifdef RX_KERNEL_TRACE
2279 if (ICL_SETACTIVE(afs_iclSetp)) {
2282 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2283 "before osi_NetSend()");
2292 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2293 p->length + RX_HEADER_SIZE, istack)) != 0) {
2294 /* send failed, so let's hurry up the resend, eh? */
2295 if (rx_stats_active)
2296 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2297 p->retryTime = p->timeSent; /* resend it very soon */
2298 clock_Addmsec(&(p->retryTime),
2299 10 + (((afs_uint32) p->backoff) << 8));
2300 /* Some systems are nice and tell us right away that we cannot
2301 * reach this recipient by returning an error code.
2302 * So, when this happens let's "down" the host NOW so
2303 * we don't sit around waiting for this host to timeout later.
2307 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2308 #elif defined(AFS_LINUX20_ENV)
2309 code == -ENETUNREACH
2310 #elif defined(AFS_DARWIN_ENV)
2311 code == EHOSTUNREACH
2316 call->lastReceiveTime = 0;
2319 #ifdef RX_KERNEL_TRACE
2320 if (ICL_SETACTIVE(afs_iclSetp)) {
2322 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2323 "after osi_NetSend()");
2334 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2335 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2336 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2337 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2339 if (rx_stats_active)
2340 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2341 MUTEX_ENTER(&peer->peer_lock);
2342 hadd32(peer->bytesSent, p->length);
2343 MUTEX_EXIT(&peer->peer_lock);
2346 /* Send a list of packets to appropriate destination for the specified
2347 * connection. The headers are first encoded and placed in the packets.
2350 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2351 struct rx_packet **list, int len, int istack)
2353 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2356 struct sockaddr_in addr;
2357 struct rx_peer *peer = conn->peer;
2359 struct rx_packet *p = NULL;
2360 struct iovec wirevec[RX_MAXIOVECS];
2361 int i, length, code;
2364 struct rx_jumboHeader *jp;
2366 char deliveryType = 'S';
2368 /* The address we're sending the packet to */
2369 addr.sin_family = AF_INET;
2370 addr.sin_port = peer->port;
2371 addr.sin_addr.s_addr = peer->host;
2373 if (len + 1 > RX_MAXIOVECS) {
2374 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2378 * Stamp the packets in this jumbogram with consecutive serial numbers
2380 MUTEX_ENTER(&conn->conn_data_lock);
2381 serial = conn->serial;
2382 conn->serial += len;
2383 for (i = 0; i < len; i++) {
2385 if (p->length > conn->peer->maxPacketSize) {
2386 /* a ping *or* a sequenced packet can count */
2387 if ((p->length > conn->peer->maxPacketSize)) {
2388 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2389 (p->header.flags & RX_REQUEST_ACK)) &&
2390 ((i == 0) || (p->length >= conn->lastPingSize))) {
2391 conn->lastPingSize = p->length;
2392 conn->lastPingSizeSer = serial + i;
2393 } else if ((p->header.seq != 0) &&
2394 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2395 conn->lastPacketSize = p->length;
2396 conn->lastPacketSizeSeq = p->header.seq;
2401 MUTEX_EXIT(&conn->conn_data_lock);
2404 /* This stuff should be revamped, I think, so that most, if not
2405 * all, of the header stuff is always added here. We could
2406 * probably do away with the encode/decode routines. XXXXX */
2409 length = RX_HEADER_SIZE;
2410 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2411 wirevec[0].iov_len = RX_HEADER_SIZE;
2412 for (i = 0; i < len; i++) {
2415 /* The whole 3.5 jumbogram scheme relies on packets fitting
2416 * in a single packet buffer. */
2417 if (p->niovecs > 2) {
2418 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2421 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2424 if (p->length != RX_JUMBOBUFFERSIZE) {
2425 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2427 p->header.flags |= RX_JUMBO_PACKET;
2428 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2429 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2431 wirevec[i + 1].iov_len = p->length;
2432 length += p->length;
2434 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2436 /* Convert jumbo packet header to network byte order */
2437 temp = (afs_uint32) (p->header.flags) << 24;
2438 temp |= (afs_uint32) (p->header.spare);
2439 *(afs_uint32 *) jp = htonl(temp);
2441 jp = (struct rx_jumboHeader *)
2442 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2444 /* Stamp each packet with a unique serial number. The serial
2445 * number is maintained on a connection basis because some types
2446 * of security may be based on the serial number of the packet,
2447 * and security is handled on a per authenticated-connection
2449 /* Pre-increment, to guarantee no zero serial number; a zero
2450 * serial number means the packet was never sent. */
2451 p->header.serial = ++serial;
2452 /* This is so we can adjust retransmit time-outs better in the face of
2453 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2455 if (p->firstSerial == 0) {
2456 p->firstSerial = p->header.serial;
2459 /* If an output tracer function is defined, call it with the packet and
2460 * network address. Note this function may modify its arguments. */
2461 if (rx_almostSent) {
2462 int drop = (*rx_almostSent) (p, &addr);
2463 /* drop packet if return value is non-zero? */
2465 deliveryType = 'D'; /* Drop the packet */
2469 /* Get network byte order header */
2470 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2471 * touch ALL the fields */
2474 /* Send the packet out on the same socket that related packets are being
2478 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2481 /* Possibly drop this packet, for testing purposes */
2482 if ((deliveryType == 'D')
2483 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2484 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2485 deliveryType = 'D'; /* Drop the packet */
2487 deliveryType = 'S'; /* Send the packet */
2488 #endif /* RXDEBUG */
2490 /* Loop until the packet is sent. We'd prefer just to use a
2491 * blocking socket, but unfortunately the interface doesn't
2492 * allow us to have the socket block in send mode, and not
2493 * block in receive mode */
2494 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2495 waslocked = ISAFS_GLOCK();
2496 if (!istack && waslocked)
2500 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2502 /* send failed, so let's hurry up the resend, eh? */
2503 if (rx_stats_active)
2504 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2505 for (i = 0; i < len; i++) {
2507 p->retryTime = p->timeSent; /* resend it very soon */
2508 clock_Addmsec(&(p->retryTime),
2509 10 + (((afs_uint32) p->backoff) << 8));
2511 /* Some systems are nice and tell us right away that we cannot
2512 * reach this recipient by returning an error code.
2513 * So, when this happens let's "down" the host NOW so
2514 * we don't sit around waiting for this host to timeout later.
2518 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2519 #elif defined(AFS_LINUX20_ENV)
2520 code == -ENETUNREACH
2521 #elif defined(AFS_DARWIN_ENV)
2522 code == EHOSTUNREACH
2527 call->lastReceiveTime = 0;
2529 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2530 if (!istack && waslocked)
2538 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2539 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2540 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2541 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2544 if (rx_stats_active)
2545 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2546 MUTEX_ENTER(&peer->peer_lock);
2547 hadd32(peer->bytesSent, p->length);
2548 MUTEX_EXIT(&peer->peer_lock);
2552 /* Send a "special" packet to the peer connection. If call is
2553 * specified, then the packet is directed to a specific call channel
2554 * associated with the connection, otherwise it is directed to the
2555 * connection only. Uses optionalPacket if it is supplied, rather than
2556 * allocating a new packet buffer. Nbytes is the length of the data
2557 * portion of the packet. If data is non-null, nbytes of data are
2558 * copied into the packet. Type is the type of the packet, as defined
2559 * in rx.h. Bug: there's a lot of duplication between this and other
2560 * routines. This needs to be cleaned up. */
2562 rxi_SendSpecial(struct rx_call *call,
2563 struct rx_connection *conn,
2564 struct rx_packet *optionalPacket, int type, char *data,
2565 int nbytes, int istack)
2567 /* Some of the following stuff should be common code for all
2568 * packet sends (it's repeated elsewhere) */
2569 struct rx_packet *p;
2571 int savelen = 0, saven = 0;
2572 int channel, callNumber;
2574 channel = call->channel;
2575 callNumber = *call->callNumber;
2576 /* BUSY packets refer to the next call on this connection */
2577 if (type == RX_PACKET_TYPE_BUSY) {
2586 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2588 osi_Panic("rxi_SendSpecial failure");
2595 p->header.serviceId = conn->serviceId;
2596 p->header.securityIndex = conn->securityIndex;
2597 p->header.cid = (conn->cid | channel);
2598 p->header.callNumber = callNumber;
2600 p->header.epoch = conn->epoch;
2601 p->header.type = type;
2602 p->header.flags = 0;
2603 if (conn->type == RX_CLIENT_CONNECTION)
2604 p->header.flags |= RX_CLIENT_INITIATED;
2606 rx_packetwrite(p, 0, nbytes, data);
2608 for (i = 1; i < p->niovecs; i++) {
2609 if (nbytes <= p->wirevec[i].iov_len) {
2610 savelen = p->wirevec[i].iov_len;
2612 p->wirevec[i].iov_len = nbytes;
2613 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2615 nbytes -= p->wirevec[i].iov_len;
2619 rxi_Send(call, p, istack);
2621 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2622 if (saven) { /* means we truncated the packet above. We probably don't */
2623 /* really need to do this, but it seems safer this way, given that */
2624 /* sneaky optionalPacket... */
2625 p->wirevec[i - 1].iov_len = savelen;
2628 if (!optionalPacket)
2630 return optionalPacket;
2634 /* Encode the packet's header (from the struct header in the packet to
2635 * the net byte order representation in the wire representation of the
2636 * packet, which is what is actually sent out on the wire) */
2638 rxi_EncodePacketHeader(struct rx_packet *p)
2640 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2642 memset(buf, 0, RX_HEADER_SIZE);
2643 *buf++ = htonl(p->header.epoch);
2644 *buf++ = htonl(p->header.cid);
2645 *buf++ = htonl(p->header.callNumber);
2646 *buf++ = htonl(p->header.seq);
2647 *buf++ = htonl(p->header.serial);
2648 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2649 | (((afs_uint32) p->header.flags) << 16)
2650 | (p->header.userStatus << 8) | p->header.securityIndex);
2651 /* Note: top 16 bits of this next word were reserved */
2652 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2655 /* Decode the packet's header (from net byte order to a struct header) */
2657 rxi_DecodePacketHeader(struct rx_packet *p)
2659 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2662 p->header.epoch = ntohl(*buf);
2664 p->header.cid = ntohl(*buf);
2666 p->header.callNumber = ntohl(*buf);
2668 p->header.seq = ntohl(*buf);
2670 p->header.serial = ntohl(*buf);
2676 /* C will truncate byte fields to bytes for me */
2677 p->header.type = temp >> 24;
2678 p->header.flags = temp >> 16;
2679 p->header.userStatus = temp >> 8;
2680 p->header.securityIndex = temp >> 0;
2685 p->header.serviceId = (temp & 0xffff);
2686 p->header.spare = temp >> 16;
2687 /* Note: top 16 bits of this last word are the security checksum */
2691 rxi_PrepareSendPacket(struct rx_call *call,
2692 struct rx_packet *p, int last)
2694 struct rx_connection *conn = call->conn;
2696 afs_int32 len; /* len must be a signed type; it can go negative */
2698 p->flags &= ~RX_PKTFLAG_ACKED;
2699 p->header.cid = (conn->cid | call->channel);
2700 p->header.serviceId = conn->serviceId;
2701 p->header.securityIndex = conn->securityIndex;
2703 /* No data packets on call 0. Where do these come from? */
2704 if (*call->callNumber == 0)
2705 *call->callNumber = 1;
2707 p->header.callNumber = *call->callNumber;
2708 p->header.seq = call->tnext++;
2709 p->header.epoch = conn->epoch;
2710 p->header.type = RX_PACKET_TYPE_DATA;
2711 p->header.flags = 0;
2712 p->header.spare = 0;
2713 if (conn->type == RX_CLIENT_CONNECTION)
2714 p->header.flags |= RX_CLIENT_INITIATED;
2717 p->header.flags |= RX_LAST_PACKET;
2719 clock_Zero(&p->retryTime); /* Never yet transmitted */
2720 clock_Zero(&p->firstSent); /* Never yet transmitted */
2721 p->header.serial = 0; /* Another way of saying never transmitted... */
2724 /* Now that we're sure this is the last data on the call, make sure
2725 * that the "length" and the sum of the iov_lens matches. */
2726 len = p->length + call->conn->securityHeaderSize;
2728 for (i = 1; i < p->niovecs && len > 0; i++) {
2729 len -= p->wirevec[i].iov_len;
2732 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2733 } else if (i < p->niovecs) {
2734 /* Free any extra elements in the wirevec */
2735 #if defined(RX_ENABLE_TSFPQ)
2736 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2737 #else /* !RX_ENABLE_TSFPQ */
2738 MUTEX_ENTER(&rx_freePktQ_lock);
2739 rxi_FreeDataBufsNoLock(p, i);
2740 MUTEX_EXIT(&rx_freePktQ_lock);
2741 #endif /* !RX_ENABLE_TSFPQ */
2746 p->wirevec[i - 1].iov_len += len;
2747 RXS_PreparePacket(conn->securityObject, call, p);
2750 /* Given an interface MTU size, calculate an adjusted MTU size that
2751 * will make efficient use of the RX buffers when the peer is sending
2752 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2754 rxi_AdjustIfMTU(int mtu)
2759 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2761 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2762 if (mtu <= adjMTU) {
2769 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2770 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2773 /* Given an interface MTU size, and the peer's advertised max receive
2774 * size, calculate an adjisted maxMTU size that makes efficient use
2775 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2777 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2779 int maxMTU = mtu * rxi_nSendFrags;
2780 maxMTU = MIN(maxMTU, peerMaxMTU);
2781 return rxi_AdjustIfMTU(maxMTU);
2784 /* Given a packet size, figure out how many datagram packet will fit.
2785 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2786 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2787 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2789 rxi_AdjustDgramPackets(int frags, int mtu)
2792 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2795 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2796 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2797 /* subtract the size of the first and last packets */
2798 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2802 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2807 * This function can be used by the Windows Cache Manager
2808 * to dump the list of all rx packets so that we can determine
2809 * where the packet leakage is.
2811 int rx_DumpPackets(FILE *outputFile, char *cookie)
2813 #ifdef RXDEBUG_PACKET
2814 struct rx_packet *p;
2818 #define RXDPRINTF sprintf
2819 #define RXDPRINTOUT output
2821 #define RXDPRINTF fprintf
2822 #define RXDPRINTOUT outputFile
2826 MUTEX_ENTER(&rx_freePktQ_lock);
2827 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2829 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2832 for (p = rx_mallocedP; p; p = p->allNextp) {
2833 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2834 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2835 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2836 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2837 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2838 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2840 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2844 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2846 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2849 MUTEX_EXIT(&rx_freePktQ_lock);
2851 #endif /* RXDEBUG_PACKET */