2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
20 #include "afs/sysincludes.h"
21 #include "afsincludes.h"
22 #include "rx/rx_kcommon.h"
23 #include "rx/rx_clock.h"
24 #include "rx/rx_queue.h"
25 #include "rx/rx_packet.h"
26 #else /* defined(UKERNEL) */
27 #ifdef RX_KERNEL_TRACE
28 #include "../rx/rx_kcommon.h"
31 #ifndef AFS_LINUX20_ENV
34 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
35 #include "afs/sysincludes.h"
37 #if defined(AFS_OBSD_ENV)
41 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
42 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
43 #include "sys/mount.h" /* it gets pulled in by something later anyway */
47 #include "netinet/in.h"
48 #include "afs/afs_osi.h"
49 #include "rx_kmutex.h"
50 #include "rx/rx_clock.h"
51 #include "rx/rx_queue.h"
53 #include <sys/sysmacros.h>
55 #include "rx/rx_packet.h"
56 #endif /* defined(UKERNEL) */
57 #include "rx/rx_globals.h"
59 #include "sys/types.h"
62 #if defined(AFS_NT40_ENV)
65 #define EWOULDBLOCK WSAEWOULDBLOCK
68 #include "rx_xmit_nt.h"
71 #include <sys/socket.h>
72 #include <netinet/in.h>
78 #include <sys/sysmacros.h>
80 #include "rx_packet.h"
81 #include "rx_globals.h"
91 /* rxdb_fileID is used to identify the lock location, along with line#. */
92 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
93 #endif /* RX_LOCKS_DB */
94 static struct rx_packet *rx_mallocedP = 0;
96 static afs_uint32 rx_packet_id = 0;
99 extern char cml_version_number[];
101 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
103 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
104 afs_int32 ahost, short aport,
107 #ifdef RX_ENABLE_TSFPQ
109 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
113 struct rx_queue * q);
116 /* some rules about packets:
117 * 1. When a packet is allocated, the final iov_buf contains room for
118 * a security trailer, but iov_len masks that fact. If the security
119 * package wants to add the trailer, it may do so, and then extend
120 * iov_len appropriately. For this reason, packet's niovecs and
121 * iov_len fields should be accurate before calling PreparePacket.
125 * all packet buffers (iov_base) are integral multiples of
127 * offset is an integral multiple of the word size.
130 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
134 for (l = 0, i = 1; i < packet->niovecs; i++) {
135 if (l + packet->wirevec[i].iov_len > offset) {
137 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
140 l += packet->wirevec[i].iov_len;
147 * all packet buffers (iov_base) are integral multiples of the word size.
148 * offset is an integral multiple of the word size.
151 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
155 for (l = 0, i = 1; i < packet->niovecs; i++) {
156 if (l + packet->wirevec[i].iov_len > offset) {
157 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
158 (offset - l))) = data;
161 l += packet->wirevec[i].iov_len;
168 * all packet buffers (iov_base) are integral multiples of the
170 * offset is an integral multiple of the word size.
172 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
175 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
178 unsigned int i, j, l, r;
179 for (l = 0, i = 1; i < packet->niovecs; i++) {
180 if (l + packet->wirevec[i].iov_len > offset) {
183 l += packet->wirevec[i].iov_len;
186 /* i is the iovec which contains the first little bit of data in which we
187 * are interested. l is the total length of everything prior to this iovec.
188 * j is the number of bytes we can safely copy out of this iovec.
189 * offset only applies to the first iovec.
192 while ((r > 0) && (i < packet->niovecs)) {
193 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
194 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
197 l += packet->wirevec[i].iov_len;
202 return (r ? (resid - r) : resid);
207 * all packet buffers (iov_base) are integral multiples of the
209 * offset is an integral multiple of the word size.
212 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
214 unsigned int i, j, l, o, r;
217 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
218 if (l + packet->wirevec[i].iov_len > o) {
221 l += packet->wirevec[i].iov_len;
224 /* i is the iovec which contains the first little bit of data in which we
225 * are interested. l is the total length of everything prior to this iovec.
226 * j is the number of bytes we can safely copy out of this iovec.
227 * offset only applies to the first iovec.
230 while ((r > 0) && (i <= RX_MAXWVECS)) {
231 if (i >= packet->niovecs)
232 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
235 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
236 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
240 l += packet->wirevec[i].iov_len;
245 return (r ? (resid - r) : resid);
249 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
251 struct rx_packet *p, *np;
253 num_pkts = AllocPacketBufs(class, num_pkts, q);
255 for (queue_Scan(q, p, np, rx_packet)) {
256 RX_PACKET_IOV_FULLINIT(p);
262 #ifdef RX_ENABLE_TSFPQ
264 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
266 struct rx_ts_info_t * rx_ts_info;
270 RX_TS_INFO_GET(rx_ts_info);
272 transfer = num_pkts - rx_ts_info->_FPQ.len;
275 MUTEX_ENTER(&rx_freePktQ_lock);
276 transfer = MAX(transfer, rx_TSFPQGlobSize);
277 if (transfer > rx_nFreePackets) {
278 /* alloc enough for us, plus a few globs for other threads */
279 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
282 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
284 MUTEX_EXIT(&rx_freePktQ_lock);
288 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
292 #else /* RX_ENABLE_TSFPQ */
294 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
305 MUTEX_ENTER(&rx_freePktQ_lock);
308 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
309 num_pkts--, overq++);
312 rxi_NeedMorePackets = TRUE;
313 if (rx_stats_active) {
315 case RX_PACKET_CLASS_RECEIVE:
316 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
318 case RX_PACKET_CLASS_SEND:
319 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
321 case RX_PACKET_CLASS_SPECIAL:
322 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
324 case RX_PACKET_CLASS_RECV_CBUF:
325 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
327 case RX_PACKET_CLASS_SEND_CBUF:
328 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
334 if (rx_nFreePackets < num_pkts)
335 num_pkts = rx_nFreePackets;
338 rxi_NeedMorePackets = TRUE;
342 if (rx_nFreePackets < num_pkts) {
343 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
347 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
349 i++, c=queue_Next(c, rx_packet)) {
353 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
355 rx_nFreePackets -= num_pkts;
360 MUTEX_EXIT(&rx_freePktQ_lock);
365 #endif /* RX_ENABLE_TSFPQ */
368 * Free a packet currently used as a continuation buffer
370 #ifdef RX_ENABLE_TSFPQ
371 /* num_pkts=0 means queue length is unknown */
373 rxi_FreePackets(int num_pkts, struct rx_queue * q)
375 struct rx_ts_info_t * rx_ts_info;
376 struct rx_packet *c, *nc;
379 osi_Assert(num_pkts >= 0);
380 RX_TS_INFO_GET(rx_ts_info);
383 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
384 rxi_FreeDataBufsTSFPQ(c, 2, 0);
387 for (queue_Scan(q, c, nc, rx_packet)) {
388 rxi_FreeDataBufsTSFPQ(c, 2, 0);
393 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
396 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
398 MUTEX_ENTER(&rx_freePktQ_lock);
400 RX_TS_FPQ_LTOG(rx_ts_info);
402 /* Wakeup anyone waiting for packets */
405 MUTEX_EXIT(&rx_freePktQ_lock);
411 #else /* RX_ENABLE_TSFPQ */
412 /* num_pkts=0 means queue length is unknown */
414 rxi_FreePackets(int num_pkts, struct rx_queue *q)
417 struct rx_packet *p, *np;
421 osi_Assert(num_pkts >= 0);
425 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
434 for (queue_Scan(q, p, np, rx_packet)) {
435 if (p->niovecs > 2) {
436 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
443 queue_SpliceAppend(q, &cbs);
449 MUTEX_ENTER(&rx_freePktQ_lock);
451 queue_SpliceAppend(&rx_freePacketQueue, q);
452 rx_nFreePackets += qlen;
454 /* Wakeup anyone waiting for packets */
457 MUTEX_EXIT(&rx_freePktQ_lock);
462 #endif /* RX_ENABLE_TSFPQ */
464 /* this one is kind of awful.
465 * In rxkad, the packet has been all shortened, and everything, ready for
466 * sending. All of a sudden, we discover we need some of that space back.
467 * This isn't terribly general, because it knows that the packets are only
468 * rounded up to the EBS (userdata + security header).
471 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
475 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
476 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
477 p->wirevec[i].iov_len += nb;
481 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
482 p->wirevec[i].iov_len += nb;
490 /* get sufficient space to store nb bytes of data (or more), and hook
491 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
492 * returns the number of bytes >0 which it failed to come up with.
493 * Don't need to worry about locking on packet, since only
494 * one thread can manipulate one at a time. Locking on continution
495 * packets is handled by AllocPacketBufs */
496 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
498 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
502 struct rx_packet *cb, *ncb;
504 /* compute the number of cbuf's we need */
505 nv = nb / RX_CBUFFERSIZE;
506 if ((nv * RX_CBUFFERSIZE) < nb)
508 if ((nv + p->niovecs) > RX_MAXWVECS)
509 nv = RX_MAXWVECS - p->niovecs;
513 /* allocate buffers */
515 nv = AllocPacketBufs(class, nv, &q);
517 /* setup packet iovs */
518 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
520 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
521 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
524 nb -= (nv * RX_CBUFFERSIZE);
525 p->length += (nv * RX_CBUFFERSIZE);
531 /* Add more packet buffers */
532 #ifdef RX_ENABLE_TSFPQ
534 rxi_MorePackets(int apackets)
536 struct rx_packet *p, *e;
537 struct rx_ts_info_t * rx_ts_info;
541 getme = apackets * sizeof(struct rx_packet);
542 p = (struct rx_packet *)osi_Alloc(getme);
545 PIN(p, getme); /* XXXXX */
547 RX_TS_INFO_GET(rx_ts_info);
549 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
550 /* TSFPQ patch also needs to keep track of total packets */
552 MUTEX_ENTER(&rx_packets_mutex);
553 rx_nPackets += apackets;
554 RX_TS_FPQ_COMPUTE_LIMITS;
555 MUTEX_EXIT(&rx_packets_mutex);
557 for (e = p + apackets; p < e; p++) {
558 RX_PACKET_IOV_INIT(p);
561 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
564 MUTEX_ENTER(&rx_freePktQ_lock);
565 #ifdef RXDEBUG_PACKET
566 p->packetId = rx_packet_id++;
567 p->allNextp = rx_mallocedP;
568 #endif /* RXDEBUG_PACKET */
570 MUTEX_EXIT(&rx_freePktQ_lock);
573 rx_ts_info->_FPQ.delta += apackets;
575 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
577 MUTEX_ENTER(&rx_freePktQ_lock);
579 RX_TS_FPQ_LTOG(rx_ts_info);
580 rxi_NeedMorePackets = FALSE;
583 MUTEX_EXIT(&rx_freePktQ_lock);
587 #else /* RX_ENABLE_TSFPQ */
589 rxi_MorePackets(int apackets)
591 struct rx_packet *p, *e;
595 getme = apackets * sizeof(struct rx_packet);
596 p = (struct rx_packet *)osi_Alloc(getme);
599 PIN(p, getme); /* XXXXX */
602 MUTEX_ENTER(&rx_freePktQ_lock);
604 for (e = p + apackets; p < e; p++) {
605 RX_PACKET_IOV_INIT(p);
606 p->flags |= RX_PKTFLAG_FREE;
609 queue_Append(&rx_freePacketQueue, p);
610 #ifdef RXDEBUG_PACKET
611 p->packetId = rx_packet_id++;
612 p->allNextp = rx_mallocedP;
613 #endif /* RXDEBUG_PACKET */
617 rx_nPackets += apackets;
618 rx_nFreePackets += apackets;
619 rxi_NeedMorePackets = FALSE;
622 MUTEX_EXIT(&rx_freePktQ_lock);
625 #endif /* RX_ENABLE_TSFPQ */
627 #ifdef RX_ENABLE_TSFPQ
629 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
631 struct rx_packet *p, *e;
632 struct rx_ts_info_t * rx_ts_info;
636 getme = apackets * sizeof(struct rx_packet);
637 p = (struct rx_packet *)osi_Alloc(getme);
639 PIN(p, getme); /* XXXXX */
641 RX_TS_INFO_GET(rx_ts_info);
643 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
644 /* TSFPQ patch also needs to keep track of total packets */
645 MUTEX_ENTER(&rx_packets_mutex);
646 rx_nPackets += apackets;
647 RX_TS_FPQ_COMPUTE_LIMITS;
648 MUTEX_EXIT(&rx_packets_mutex);
650 for (e = p + apackets; p < e; p++) {
651 RX_PACKET_IOV_INIT(p);
653 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
656 MUTEX_ENTER(&rx_freePktQ_lock);
657 #ifdef RXDEBUG_PACKET
658 p->packetId = rx_packet_id++;
659 p->allNextp = rx_mallocedP;
660 #endif /* RXDEBUG_PACKET */
662 MUTEX_EXIT(&rx_freePktQ_lock);
665 rx_ts_info->_FPQ.delta += apackets;
668 (num_keep_local < apackets)) {
670 MUTEX_ENTER(&rx_freePktQ_lock);
672 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
673 rxi_NeedMorePackets = FALSE;
676 MUTEX_EXIT(&rx_freePktQ_lock);
680 #endif /* RX_ENABLE_TSFPQ */
683 /* Add more packet buffers */
685 rxi_MorePacketsNoLock(int apackets)
687 #ifdef RX_ENABLE_TSFPQ
688 struct rx_ts_info_t * rx_ts_info;
689 #endif /* RX_ENABLE_TSFPQ */
690 struct rx_packet *p, *e;
693 /* allocate enough packets that 1/4 of the packets will be able
694 * to hold maximal amounts of data */
695 apackets += (apackets / 4)
696 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
698 getme = apackets * sizeof(struct rx_packet);
699 p = (struct rx_packet *)osi_Alloc(getme);
701 apackets -= apackets / 4;
702 osi_Assert(apackets > 0);
707 #ifdef RX_ENABLE_TSFPQ
708 RX_TS_INFO_GET(rx_ts_info);
709 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
710 #endif /* RX_ENABLE_TSFPQ */
712 for (e = p + apackets; p < e; p++) {
713 RX_PACKET_IOV_INIT(p);
714 p->flags |= RX_PKTFLAG_FREE;
717 queue_Append(&rx_freePacketQueue, p);
718 #ifdef RXDEBUG_PACKET
719 p->packetId = rx_packet_id++;
720 p->allNextp = rx_mallocedP;
721 #endif /* RXDEBUG_PACKET */
725 rx_nFreePackets += apackets;
726 MUTEX_ENTER(&rx_packets_mutex);
727 rx_nPackets += apackets;
728 #ifdef RX_ENABLE_TSFPQ
729 RX_TS_FPQ_COMPUTE_LIMITS;
730 #endif /* RX_ENABLE_TSFPQ */
731 MUTEX_EXIT(&rx_packets_mutex);
732 rxi_NeedMorePackets = FALSE;
738 rxi_FreeAllPackets(void)
740 /* must be called at proper interrupt level, etcetera */
741 /* MTUXXX need to free all Packets */
742 osi_Free(rx_mallocedP,
743 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
744 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
747 #ifdef RX_ENABLE_TSFPQ
749 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
751 struct rx_ts_info_t * rx_ts_info;
755 RX_TS_INFO_GET(rx_ts_info);
757 if (num_keep_local != rx_ts_info->_FPQ.len) {
759 MUTEX_ENTER(&rx_freePktQ_lock);
760 if (num_keep_local < rx_ts_info->_FPQ.len) {
761 xfer = rx_ts_info->_FPQ.len - num_keep_local;
762 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
765 xfer = num_keep_local - rx_ts_info->_FPQ.len;
766 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
767 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
768 if (rx_nFreePackets < xfer) {
769 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
771 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
773 MUTEX_EXIT(&rx_freePktQ_lock);
779 rxi_FlushLocalPacketsTSFPQ(void)
781 rxi_AdjustLocalPacketsTSFPQ(0, 0);
783 #endif /* RX_ENABLE_TSFPQ */
785 /* Allocate more packets iff we need more continuation buffers */
786 /* In kernel, can't page in memory with interrupts disabled, so we
787 * don't use the event mechanism. */
789 rx_CheckPackets(void)
791 if (rxi_NeedMorePackets) {
792 rxi_MorePackets(rx_maxSendWindow);
796 /* In the packet freeing routine below, the assumption is that
797 we want all of the packets to be used equally frequently, so that we
798 don't get packet buffers paging out. It would be just as valid to
799 assume that we DO want them to page out if not many are being used.
800 In any event, we assume the former, and append the packets to the end
802 /* This explanation is bogus. The free list doesn't remain in any kind of
803 useful order for afs_int32: the packets in use get pretty much randomly scattered
804 across all the pages. In order to permit unused {packets,bufs} to page out, they
805 must be stored so that packets which are adjacent in memory are adjacent in the
806 free list. An array springs rapidly to mind.
809 /* Actually free the packet p. */
810 #ifdef RX_ENABLE_TSFPQ
812 rxi_FreePacketNoLock(struct rx_packet *p)
814 struct rx_ts_info_t * rx_ts_info;
815 dpf(("Free %"AFS_PTR_FMT"\n", p));
817 RX_TS_INFO_GET(rx_ts_info);
818 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
819 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
820 RX_TS_FPQ_LTOG(rx_ts_info);
823 #else /* RX_ENABLE_TSFPQ */
825 rxi_FreePacketNoLock(struct rx_packet *p)
827 dpf(("Free %"AFS_PTR_FMT"\n", p));
831 queue_Append(&rx_freePacketQueue, p);
833 #endif /* RX_ENABLE_TSFPQ */
835 #ifdef RX_ENABLE_TSFPQ
837 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
839 struct rx_ts_info_t * rx_ts_info;
840 dpf(("Free %"AFS_PTR_FMT"\n", p));
842 RX_TS_INFO_GET(rx_ts_info);
843 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
845 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
847 MUTEX_ENTER(&rx_freePktQ_lock);
849 RX_TS_FPQ_LTOG(rx_ts_info);
851 /* Wakeup anyone waiting for packets */
854 MUTEX_EXIT(&rx_freePktQ_lock);
858 #endif /* RX_ENABLE_TSFPQ */
861 * free continuation buffers off a packet into a queue
863 * [IN] p -- packet from which continuation buffers will be freed
864 * [IN] first -- iovec offset of first continuation buffer to free
865 * [IN] q -- queue into which continuation buffers will be chained
868 * number of continuation buffers freed
870 #ifndef RX_ENABLE_TSFPQ
872 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
875 struct rx_packet * cb;
878 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
879 iov = &p->wirevec[first];
881 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
882 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
883 RX_FPQ_MARK_FREE(cb);
894 * free packet continuation buffers into the global free packet pool
896 * [IN] p -- packet from which to free continuation buffers
897 * [IN] first -- iovec offset of first continuation buffer to free
903 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
907 for (first = MAX(2, first); first < p->niovecs; first++) {
908 iov = &p->wirevec[first];
910 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
911 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
919 #ifdef RX_ENABLE_TSFPQ
921 * free packet continuation buffers into the thread-local free pool
923 * [IN] p -- packet from which continuation buffers will be freed
924 * [IN] first -- iovec offset of first continuation buffer to free
925 * any value less than 2, the min number of iovecs,
926 * is treated as if it is 2.
927 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
928 * global free pool before returning
934 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
937 struct rx_ts_info_t * rx_ts_info;
939 RX_TS_INFO_GET(rx_ts_info);
941 for (first = MAX(2, first); first < p->niovecs; first++) {
942 iov = &p->wirevec[first];
944 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
945 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
950 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
952 MUTEX_ENTER(&rx_freePktQ_lock);
954 RX_TS_FPQ_LTOG(rx_ts_info);
956 /* Wakeup anyone waiting for packets */
959 MUTEX_EXIT(&rx_freePktQ_lock);
964 #endif /* RX_ENABLE_TSFPQ */
966 int rxi_nBadIovecs = 0;
968 /* rxi_RestoreDataBufs
970 * Restore the correct sizes to the iovecs. Called when reusing a packet
971 * for reading off the wire.
974 rxi_RestoreDataBufs(struct rx_packet *p)
977 struct iovec *iov = &p->wirevec[2];
979 RX_PACKET_IOV_INIT(p);
981 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
982 if (!iov->iov_base) {
987 iov->iov_len = RX_CBUFFERSIZE;
991 #ifdef RX_ENABLE_TSFPQ
993 rxi_TrimDataBufs(struct rx_packet *p, int first)
996 struct iovec *iov, *end;
997 struct rx_ts_info_t * rx_ts_info;
1001 osi_Panic("TrimDataBufs 1: first must be 1");
1003 /* Skip over continuation buffers containing message data */
1004 iov = &p->wirevec[2];
1005 end = iov + (p->niovecs - 2);
1006 length = p->length - p->wirevec[1].iov_len;
1007 for (; iov < end && length > 0; iov++) {
1009 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1010 length -= iov->iov_len;
1013 /* iov now points to the first empty data buffer. */
1017 RX_TS_INFO_GET(rx_ts_info);
1018 for (; iov < end; iov++) {
1020 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1021 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1024 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1026 MUTEX_ENTER(&rx_freePktQ_lock);
1028 RX_TS_FPQ_LTOG(rx_ts_info);
1029 rxi_PacketsUnWait();
1031 MUTEX_EXIT(&rx_freePktQ_lock);
1037 #else /* RX_ENABLE_TSFPQ */
1039 rxi_TrimDataBufs(struct rx_packet *p, int first)
1042 struct iovec *iov, *end;
1046 osi_Panic("TrimDataBufs 1: first must be 1");
1048 /* Skip over continuation buffers containing message data */
1049 iov = &p->wirevec[2];
1050 end = iov + (p->niovecs - 2);
1051 length = p->length - p->wirevec[1].iov_len;
1052 for (; iov < end && length > 0; iov++) {
1054 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1055 length -= iov->iov_len;
1058 /* iov now points to the first empty data buffer. */
1063 MUTEX_ENTER(&rx_freePktQ_lock);
1065 for (; iov < end; iov++) {
1067 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1068 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1071 rxi_PacketsUnWait();
1073 MUTEX_EXIT(&rx_freePktQ_lock);
1078 #endif /* RX_ENABLE_TSFPQ */
1080 /* Free the packet p. P is assumed not to be on any queue, i.e.
1081 * remove it yourself first if you call this routine. */
1082 #ifdef RX_ENABLE_TSFPQ
1084 rxi_FreePacket(struct rx_packet *p)
1086 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1087 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1089 #else /* RX_ENABLE_TSFPQ */
1091 rxi_FreePacket(struct rx_packet *p)
1096 MUTEX_ENTER(&rx_freePktQ_lock);
1098 rxi_FreeDataBufsNoLock(p, 2);
1099 rxi_FreePacketNoLock(p);
1100 /* Wakeup anyone waiting for packets */
1101 rxi_PacketsUnWait();
1103 MUTEX_EXIT(&rx_freePktQ_lock);
1106 #endif /* RX_ENABLE_TSFPQ */
1108 /* rxi_AllocPacket sets up p->length so it reflects the number of
1109 * bytes in the packet at this point, **not including** the header.
1110 * The header is absolutely necessary, besides, this is the way the
1111 * length field is usually used */
1112 #ifdef RX_ENABLE_TSFPQ
1114 rxi_AllocPacketNoLock(int class)
1116 struct rx_packet *p;
1117 struct rx_ts_info_t * rx_ts_info;
1119 RX_TS_INFO_GET(rx_ts_info);
1122 if (rxi_OverQuota(class)) {
1123 rxi_NeedMorePackets = TRUE;
1124 if (rx_stats_active) {
1126 case RX_PACKET_CLASS_RECEIVE:
1127 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1129 case RX_PACKET_CLASS_SEND:
1130 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1132 case RX_PACKET_CLASS_SPECIAL:
1133 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1135 case RX_PACKET_CLASS_RECV_CBUF:
1136 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1138 case RX_PACKET_CLASS_SEND_CBUF:
1139 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1143 return (struct rx_packet *)0;
1147 if (rx_stats_active)
1148 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1149 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1152 if (queue_IsEmpty(&rx_freePacketQueue))
1153 osi_Panic("rxi_AllocPacket error");
1155 if (queue_IsEmpty(&rx_freePacketQueue))
1156 rxi_MorePacketsNoLock(rx_maxSendWindow);
1160 RX_TS_FPQ_GTOL(rx_ts_info);
1163 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1165 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1168 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1169 * order to truncate outbound packets. In the near future, may need
1170 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1172 RX_PACKET_IOV_FULLINIT(p);
1175 #else /* RX_ENABLE_TSFPQ */
1177 rxi_AllocPacketNoLock(int class)
1179 struct rx_packet *p;
1182 if (rxi_OverQuota(class)) {
1183 rxi_NeedMorePackets = TRUE;
1184 if (rx_stats_active) {
1186 case RX_PACKET_CLASS_RECEIVE:
1187 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1189 case RX_PACKET_CLASS_SEND:
1190 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1192 case RX_PACKET_CLASS_SPECIAL:
1193 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1195 case RX_PACKET_CLASS_RECV_CBUF:
1196 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1198 case RX_PACKET_CLASS_SEND_CBUF:
1199 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1203 return (struct rx_packet *)0;
1207 if (rx_stats_active)
1208 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1211 if (queue_IsEmpty(&rx_freePacketQueue))
1212 osi_Panic("rxi_AllocPacket error");
1214 if (queue_IsEmpty(&rx_freePacketQueue))
1215 rxi_MorePacketsNoLock(rx_maxSendWindow);
1219 p = queue_First(&rx_freePacketQueue, rx_packet);
1221 RX_FPQ_MARK_USED(p);
1223 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1226 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1227 * order to truncate outbound packets. In the near future, may need
1228 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1230 RX_PACKET_IOV_FULLINIT(p);
1233 #endif /* RX_ENABLE_TSFPQ */
1235 #ifdef RX_ENABLE_TSFPQ
1237 rxi_AllocPacketTSFPQ(int class, int pull_global)
1239 struct rx_packet *p;
1240 struct rx_ts_info_t * rx_ts_info;
1242 RX_TS_INFO_GET(rx_ts_info);
1244 if (rx_stats_active)
1245 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1246 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1247 MUTEX_ENTER(&rx_freePktQ_lock);
1249 if (queue_IsEmpty(&rx_freePacketQueue))
1250 rxi_MorePacketsNoLock(rx_maxSendWindow);
1252 RX_TS_FPQ_GTOL(rx_ts_info);
1254 MUTEX_EXIT(&rx_freePktQ_lock);
1255 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1259 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1261 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1263 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1264 * order to truncate outbound packets. In the near future, may need
1265 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1267 RX_PACKET_IOV_FULLINIT(p);
1270 #endif /* RX_ENABLE_TSFPQ */
1272 #ifdef RX_ENABLE_TSFPQ
1274 rxi_AllocPacket(int class)
1276 struct rx_packet *p;
1278 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1281 #else /* RX_ENABLE_TSFPQ */
1283 rxi_AllocPacket(int class)
1285 struct rx_packet *p;
1287 MUTEX_ENTER(&rx_freePktQ_lock);
1288 p = rxi_AllocPacketNoLock(class);
1289 MUTEX_EXIT(&rx_freePktQ_lock);
1292 #endif /* RX_ENABLE_TSFPQ */
1294 /* This guy comes up with as many buffers as it {takes,can get} given
1295 * the MTU for this call. It also sets the packet length before
1296 * returning. caution: this is often called at NETPRI
1297 * Called with call locked.
1300 rxi_AllocSendPacket(struct rx_call *call, int want)
1302 struct rx_packet *p = (struct rx_packet *)0;
1307 mud = call->MTU - RX_HEADER_SIZE;
1309 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1310 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1312 #ifdef RX_ENABLE_TSFPQ
1313 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1315 want = MIN(want, mud);
1317 if ((unsigned)want > p->length)
1318 (void)rxi_AllocDataBuf(p, (want - p->length),
1319 RX_PACKET_CLASS_SEND_CBUF);
1321 if (p->length > mud)
1324 if (delta >= p->length) {
1332 #endif /* RX_ENABLE_TSFPQ */
1334 while (!(call->error)) {
1335 MUTEX_ENTER(&rx_freePktQ_lock);
1336 /* if an error occurred, or we get the packet we want, we're done */
1337 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1338 MUTEX_EXIT(&rx_freePktQ_lock);
1341 want = MIN(want, mud);
1343 if ((unsigned)want > p->length)
1344 (void)rxi_AllocDataBuf(p, (want - p->length),
1345 RX_PACKET_CLASS_SEND_CBUF);
1347 if (p->length > mud)
1350 if (delta >= p->length) {
1359 /* no error occurred, and we didn't get a packet, so we sleep.
1360 * At this point, we assume that packets will be returned
1361 * sooner or later, as packets are acknowledged, and so we
1364 call->flags |= RX_CALL_WAIT_PACKETS;
1365 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1366 MUTEX_EXIT(&call->lock);
1367 rx_waitingForPackets = 1;
1369 #ifdef RX_ENABLE_LOCKS
1370 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1372 osi_rxSleep(&rx_waitingForPackets);
1374 MUTEX_EXIT(&rx_freePktQ_lock);
1375 MUTEX_ENTER(&call->lock);
1376 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1377 call->flags &= ~RX_CALL_WAIT_PACKETS;
1386 /* Windows does not use file descriptors. */
1387 #define CountFDs(amax) 0
1389 /* count the number of used FDs */
1398 for (i = 0; i < amax; i++) {
1399 code = fstat(i, &tstat);
1405 #endif /* AFS_NT40_ENV */
1408 #define CountFDs(amax) amax
1412 #if !defined(KERNEL) || defined(UKERNEL)
1414 /* This function reads a single packet from the interface into the
1415 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1416 * (host,port) of the sender are stored in the supplied variables, and
1417 * the data length of the packet is stored in the packet structure.
1418 * The header is decoded. */
1420 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1423 struct sockaddr_in from;
1424 unsigned int nbytes;
1426 afs_uint32 tlen, savelen;
1428 rx_computelen(p, tlen);
1429 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1431 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1432 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1433 * it once in order to avoid races. */
1436 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1444 /* Extend the last iovec for padding, it's just to make sure that the
1445 * read doesn't return more data than we expect, and is done to get around
1446 * our problems caused by the lack of a length field in the rx header.
1447 * Use the extra buffer that follows the localdata in each packet
1449 savelen = p->wirevec[p->niovecs - 1].iov_len;
1450 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1452 memset(&msg, 0, sizeof(msg));
1453 msg.msg_name = (char *)&from;
1454 msg.msg_namelen = sizeof(struct sockaddr_in);
1455 msg.msg_iov = p->wirevec;
1456 msg.msg_iovlen = p->niovecs;
1457 nbytes = rxi_Recvmsg(socket, &msg, 0);
1459 /* restore the vec to its correct state */
1460 p->wirevec[p->niovecs - 1].iov_len = savelen;
1462 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1463 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1464 if (nbytes < 0 && errno == EWOULDBLOCK) {
1465 if (rx_stats_active)
1466 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1467 } else if (nbytes <= 0) {
1468 if (rx_stats_active) {
1469 MUTEX_ENTER(&rx_stats_mutex);
1470 rx_stats.bogusPacketOnRead++;
1471 rx_stats.bogusHost = from.sin_addr.s_addr;
1472 MUTEX_EXIT(&rx_stats_mutex);
1474 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1475 ntohs(from.sin_port), nbytes));
1480 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1481 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1482 rxi_DecodePacketHeader(p);
1484 *host = from.sin_addr.s_addr;
1485 *port = from.sin_port;
1487 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1488 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1489 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1491 #ifdef RX_TRIMDATABUFS
1492 rxi_TrimDataBufs(p, 1);
1498 /* Extract packet header. */
1499 rxi_DecodePacketHeader(p);
1501 *host = from.sin_addr.s_addr;
1502 *port = from.sin_port;
1503 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1504 struct rx_peer *peer;
1505 if (rx_stats_active)
1506 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1508 * Try to look up this peer structure. If it doesn't exist,
1509 * don't create a new one -
1510 * we don't keep count of the bytes sent/received if a peer
1511 * structure doesn't already exist.
1513 * The peer/connection cleanup code assumes that there is 1 peer
1514 * per connection. If we actually created a peer structure here
1515 * and this packet was an rxdebug packet, the peer structure would
1516 * never be cleaned up.
1518 peer = rxi_FindPeer(*host, *port, 0, 0);
1519 /* Since this may not be associated with a connection,
1520 * it may have no refCount, meaning we could race with
1523 if (peer && (peer->refCount > 0)) {
1524 MUTEX_ENTER(&peer->peer_lock);
1525 hadd32(peer->bytesReceived, p->length);
1526 MUTEX_EXIT(&peer->peer_lock);
1530 #ifdef RX_TRIMDATABUFS
1531 /* Free any empty packet buffers at the end of this packet */
1532 rxi_TrimDataBufs(p, 1);
1538 #endif /* !KERNEL || UKERNEL */
1540 /* This function splits off the first packet in a jumbo packet.
1541 * As of AFS 3.5, jumbograms contain more than one fixed size
1542 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1543 * last packet header. All packets (except the last) are padded to
1544 * fall on RX_CBUFFERSIZE boundaries.
1545 * HACK: We store the length of the first n-1 packets in the
1546 * last two pad bytes. */
1549 rxi_SplitJumboPacket(struct rx_packet *p, afs_int32 host, short port,
1552 struct rx_packet *np;
1553 struct rx_jumboHeader *jp;
1559 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1560 * bytes in length. All but the first packet are preceded by
1561 * an abbreviated four byte header. The length of the last packet
1562 * is calculated from the size of the jumbogram. */
1563 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1565 if ((int)p->length < length) {
1566 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1569 niov = p->niovecs - 2;
1571 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1574 iov = &p->wirevec[2];
1575 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1577 /* Get a pointer to the abbreviated packet header */
1578 jp = (struct rx_jumboHeader *)
1579 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1581 /* Set up the iovecs for the next packet */
1582 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1583 np->wirevec[0].iov_len = sizeof(struct rx_header);
1584 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1585 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1586 np->niovecs = niov + 1;
1587 for (i = 2, iov++; i <= niov; i++, iov++) {
1588 np->wirevec[i] = *iov;
1590 np->length = p->length - length;
1591 p->length = RX_JUMBOBUFFERSIZE;
1594 /* Convert the jumbo packet header to host byte order */
1595 temp = ntohl(*(afs_uint32 *) jp);
1596 jp->flags = (u_char) (temp >> 24);
1597 jp->cksum = (u_short) (temp);
1599 /* Fill in the packet header */
1600 np->header = p->header;
1601 np->header.serial = p->header.serial + 1;
1602 np->header.seq = p->header.seq + 1;
1603 np->header.flags = jp->flags;
1604 np->header.spare = jp->cksum;
1610 /* Send a udp datagram */
1612 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1613 int length, int istack)
1618 memset(&msg, 0, sizeof(msg));
1620 msg.msg_iovlen = nvecs;
1621 msg.msg_name = addr;
1622 msg.msg_namelen = sizeof(struct sockaddr_in);
1624 ret = rxi_Sendmsg(socket, &msg, 0);
1628 #elif !defined(UKERNEL)
1630 * message receipt is done in rxk_input or rx_put.
1633 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1635 * Copy an mblock to the contiguous area pointed to by cp.
1636 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1637 * but it doesn't really.
1638 * Returns the number of bytes not transferred.
1639 * The message is NOT changed.
1642 cpytoc(mblk_t * mp, int off, int len, char *cp)
1646 for (; mp && len > 0; mp = mp->b_cont) {
1647 if (mp->b_datap->db_type != M_DATA) {
1650 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1651 memcpy(cp, (char *)mp->b_rptr, n);
1659 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1660 * but it doesn't really.
1661 * This sucks, anyway, do it like m_cpy.... below
1664 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1669 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1670 if (mp->b_datap->db_type != M_DATA) {
1673 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1679 t = iovs[i].iov_len;
1682 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1692 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1693 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1695 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1697 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1700 unsigned int l1, l2, i, t;
1702 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1703 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1706 if (m->m_len <= off) {
1716 p1 = mtod(m, caddr_t) + off;
1717 l1 = m->m_len - off;
1719 p2 = iovs[0].iov_base;
1720 l2 = iovs[0].iov_len;
1723 t = MIN(l1, MIN(l2, (unsigned int)len));
1734 p1 = mtod(m, caddr_t);
1740 p2 = iovs[i].iov_base;
1741 l2 = iovs[i].iov_len;
1749 #endif /* AFS_SUN5_ENV */
1751 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1753 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1754 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1760 struct rx_packet *phandle;
1761 int hdr_len, data_len;
1766 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1773 #endif /*KERNEL && !UKERNEL */
1776 /* send a response to a debug packet */
1779 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1780 afs_int32 ahost, short aport, int istack)
1782 struct rx_debugIn tin;
1784 struct rx_serverQueueEntry *np, *nqe;
1787 * Only respond to client-initiated Rx debug packets,
1788 * and clear the client flag in the response.
1790 if (ap->header.flags & RX_CLIENT_INITIATED) {
1791 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1792 rxi_EncodePacketHeader(ap);
1797 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1798 /* all done with packet, now set length to the truth, so we can
1799 * reuse this packet */
1800 rx_computelen(ap, ap->length);
1802 tin.type = ntohl(tin.type);
1803 tin.index = ntohl(tin.index);
1805 case RX_DEBUGI_GETSTATS:{
1806 struct rx_debugStats tstat;
1808 /* get basic stats */
1809 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1810 tstat.version = RX_DEBUGI_VERSION;
1811 #ifndef RX_ENABLE_LOCKS
1812 tstat.waitingForPackets = rx_waitingForPackets;
1814 MUTEX_ENTER(&rx_serverPool_lock);
1815 tstat.nFreePackets = htonl(rx_nFreePackets);
1816 tstat.nPackets = htonl(rx_nPackets);
1817 tstat.callsExecuted = htonl(rxi_nCalls);
1818 tstat.packetReclaims = htonl(rx_packetReclaims);
1819 tstat.usedFDs = CountFDs(64);
1820 tstat.nWaiting = htonl(rx_nWaiting);
1821 tstat.nWaited = htonl(rx_nWaited);
1822 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1824 MUTEX_EXIT(&rx_serverPool_lock);
1825 tstat.idleThreads = htonl(tstat.idleThreads);
1826 tl = sizeof(struct rx_debugStats) - ap->length;
1828 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1831 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1833 ap->length = sizeof(struct rx_debugStats);
1834 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1835 rx_computelen(ap, ap->length);
1840 case RX_DEBUGI_GETALLCONN:
1841 case RX_DEBUGI_GETCONN:{
1843 struct rx_connection *tc;
1844 struct rx_call *tcall;
1845 struct rx_debugConn tconn;
1846 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1849 tl = sizeof(struct rx_debugConn) - ap->length;
1851 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1855 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1856 /* get N'th (maybe) "interesting" connection info */
1857 for (i = 0; i < rx_hashTableSize; i++) {
1858 #if !defined(KERNEL)
1859 /* the time complexity of the algorithm used here
1860 * exponentially increses with the number of connections.
1862 #ifdef AFS_PTHREAD_ENV
1868 MUTEX_ENTER(&rx_connHashTable_lock);
1869 /* We might be slightly out of step since we are not
1870 * locking each call, but this is only debugging output.
1872 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1873 if ((all || rxi_IsConnInteresting(tc))
1874 && tin.index-- <= 0) {
1875 tconn.host = tc->peer->host;
1876 tconn.port = tc->peer->port;
1877 tconn.cid = htonl(tc->cid);
1878 tconn.epoch = htonl(tc->epoch);
1879 tconn.serial = htonl(tc->serial);
1880 for (j = 0; j < RX_MAXCALLS; j++) {
1881 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1882 if ((tcall = tc->call[j])) {
1883 tconn.callState[j] = tcall->state;
1884 tconn.callMode[j] = tcall->mode;
1885 tconn.callFlags[j] = tcall->flags;
1886 if (queue_IsNotEmpty(&tcall->rq))
1887 tconn.callOther[j] |= RX_OTHER_IN;
1888 if (queue_IsNotEmpty(&tcall->tq))
1889 tconn.callOther[j] |= RX_OTHER_OUT;
1891 tconn.callState[j] = RX_STATE_NOTINIT;
1894 tconn.natMTU = htonl(tc->peer->natMTU);
1895 tconn.error = htonl(tc->error);
1896 tconn.flags = tc->flags;
1897 tconn.type = tc->type;
1898 tconn.securityIndex = tc->securityIndex;
1899 if (tc->securityObject) {
1900 RXS_GetStats(tc->securityObject, tc,
1902 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1903 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1906 DOHTONL(packetsReceived);
1907 DOHTONL(packetsSent);
1908 DOHTONL(bytesReceived);
1912 sizeof(tconn.secStats.spares) /
1917 sizeof(tconn.secStats.sparel) /
1918 sizeof(afs_int32); i++)
1922 MUTEX_EXIT(&rx_connHashTable_lock);
1923 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1926 ap->length = sizeof(struct rx_debugConn);
1927 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1933 MUTEX_EXIT(&rx_connHashTable_lock);
1935 /* if we make it here, there are no interesting packets */
1936 tconn.cid = htonl(0xffffffff); /* means end */
1937 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1940 ap->length = sizeof(struct rx_debugConn);
1941 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1947 * Pass back all the peer structures we have available
1950 case RX_DEBUGI_GETPEER:{
1953 struct rx_debugPeer tpeer;
1956 tl = sizeof(struct rx_debugPeer) - ap->length;
1958 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1962 memset(&tpeer, 0, sizeof(tpeer));
1963 for (i = 0; i < rx_hashTableSize; i++) {
1964 #if !defined(KERNEL)
1965 /* the time complexity of the algorithm used here
1966 * exponentially increses with the number of peers.
1968 * Yielding after processing each hash table entry
1969 * and dropping rx_peerHashTable_lock.
1970 * also increases the risk that we will miss a new
1971 * entry - but we are willing to live with this
1972 * limitation since this is meant for debugging only
1974 #ifdef AFS_PTHREAD_ENV
1980 MUTEX_ENTER(&rx_peerHashTable_lock);
1981 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1982 if (tin.index-- <= 0) {
1984 MUTEX_EXIT(&rx_peerHashTable_lock);
1986 MUTEX_ENTER(&tp->peer_lock);
1987 tpeer.host = tp->host;
1988 tpeer.port = tp->port;
1989 tpeer.ifMTU = htons(tp->ifMTU);
1990 tpeer.idleWhen = htonl(tp->idleWhen);
1991 tpeer.refCount = htons(tp->refCount);
1992 tpeer.burstSize = tp->burstSize;
1993 tpeer.burst = tp->burst;
1994 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1995 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1996 tpeer.rtt = htonl(tp->rtt);
1997 tpeer.rtt_dev = htonl(tp->rtt_dev);
1998 tpeer.timeout.sec = htonl(tp->timeout.sec);
1999 tpeer.timeout.usec = htonl(tp->timeout.usec);
2000 tpeer.nSent = htonl(tp->nSent);
2001 tpeer.reSends = htonl(tp->reSends);
2002 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2003 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2004 tpeer.rateFlag = htonl(tp->rateFlag);
2005 tpeer.natMTU = htons(tp->natMTU);
2006 tpeer.maxMTU = htons(tp->maxMTU);
2007 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2008 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2009 tpeer.MTU = htons(tp->MTU);
2010 tpeer.cwind = htons(tp->cwind);
2011 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2012 tpeer.congestSeq = htons(tp->congestSeq);
2013 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2014 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2015 tpeer.bytesReceived.high =
2016 htonl(tp->bytesReceived.high);
2017 tpeer.bytesReceived.low =
2018 htonl(tp->bytesReceived.low);
2019 MUTEX_EXIT(&tp->peer_lock);
2021 MUTEX_ENTER(&rx_peerHashTable_lock);
2023 MUTEX_EXIT(&rx_peerHashTable_lock);
2025 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2028 ap->length = sizeof(struct rx_debugPeer);
2029 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2035 MUTEX_EXIT(&rx_peerHashTable_lock);
2037 /* if we make it here, there are no interesting packets */
2038 tpeer.host = htonl(0xffffffff); /* means end */
2039 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2042 ap->length = sizeof(struct rx_debugPeer);
2043 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2048 case RX_DEBUGI_RXSTATS:{
2052 tl = sizeof(rx_stats) - ap->length;
2054 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2058 /* Since its all int32s convert to network order with a loop. */
2059 if (rx_stats_active)
2060 MUTEX_ENTER(&rx_stats_mutex);
2061 s = (afs_int32 *) & rx_stats;
2062 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2063 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2066 ap->length = sizeof(rx_stats);
2067 if (rx_stats_active)
2068 MUTEX_EXIT(&rx_stats_mutex);
2069 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2075 /* error response packet */
2076 tin.type = htonl(RX_DEBUGI_BADTYPE);
2077 tin.index = tin.type;
2078 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2080 ap->length = sizeof(struct rx_debugIn);
2081 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2089 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2090 afs_int32 ahost, short aport, int istack)
2095 * Only respond to client-initiated version requests, and
2096 * clear that flag in the response.
2098 if (ap->header.flags & RX_CLIENT_INITIATED) {
2101 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2102 rxi_EncodePacketHeader(ap);
2103 memset(buf, 0, sizeof(buf));
2104 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2105 rx_packetwrite(ap, 0, 65, buf);
2108 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2116 /* send a debug packet back to the sender */
2118 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2119 afs_int32 ahost, short aport, afs_int32 istack)
2121 struct sockaddr_in taddr;
2122 unsigned int i, nbytes, savelen = 0;
2125 int waslocked = ISAFS_GLOCK();
2128 taddr.sin_family = AF_INET;
2129 taddr.sin_port = aport;
2130 taddr.sin_addr.s_addr = ahost;
2131 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2132 taddr.sin_len = sizeof(struct sockaddr_in);
2135 /* We need to trim the niovecs. */
2136 nbytes = apacket->length;
2137 for (i = 1; i < apacket->niovecs; i++) {
2138 if (nbytes <= apacket->wirevec[i].iov_len) {
2139 savelen = apacket->wirevec[i].iov_len;
2140 saven = apacket->niovecs;
2141 apacket->wirevec[i].iov_len = nbytes;
2142 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2144 nbytes -= apacket->wirevec[i].iov_len;
2147 #ifdef RX_KERNEL_TRACE
2148 if (ICL_SETACTIVE(afs_iclSetp)) {
2151 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2152 "before osi_NetSend()");
2160 /* debug packets are not reliably delivered, hence the cast below. */
2161 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2162 apacket->length + RX_HEADER_SIZE, istack);
2164 #ifdef RX_KERNEL_TRACE
2165 if (ICL_SETACTIVE(afs_iclSetp)) {
2167 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2168 "after osi_NetSend()");
2177 if (saven) { /* means we truncated the packet above. */
2178 apacket->wirevec[i - 1].iov_len = savelen;
2179 apacket->niovecs = saven;
2184 /* Send the packet to appropriate destination for the specified
2185 * call. The header is first encoded and placed in the packet.
2188 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2189 struct rx_packet *p, int istack)
2195 struct sockaddr_in addr;
2196 struct rx_peer *peer = conn->peer;
2199 char deliveryType = 'S';
2201 /* The address we're sending the packet to */
2202 memset(&addr, 0, sizeof(addr));
2203 addr.sin_family = AF_INET;
2204 addr.sin_port = peer->port;
2205 addr.sin_addr.s_addr = peer->host;
2207 /* This stuff should be revamped, I think, so that most, if not
2208 * all, of the header stuff is always added here. We could
2209 * probably do away with the encode/decode routines. XXXXX */
2211 /* Stamp each packet with a unique serial number. The serial
2212 * number is maintained on a connection basis because some types
2213 * of security may be based on the serial number of the packet,
2214 * and security is handled on a per authenticated-connection
2216 /* Pre-increment, to guarantee no zero serial number; a zero
2217 * serial number means the packet was never sent. */
2218 MUTEX_ENTER(&conn->conn_data_lock);
2219 p->header.serial = ++conn->serial;
2220 MUTEX_EXIT(&conn->conn_data_lock);
2221 /* This is so we can adjust retransmit time-outs better in the face of
2222 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2224 if (p->firstSerial == 0) {
2225 p->firstSerial = p->header.serial;
2228 /* If an output tracer function is defined, call it with the packet and
2229 * network address. Note this function may modify its arguments. */
2230 if (rx_almostSent) {
2231 int drop = (*rx_almostSent) (p, &addr);
2232 /* drop packet if return value is non-zero? */
2234 deliveryType = 'D'; /* Drop the packet */
2238 /* Get network byte order header */
2239 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2240 * touch ALL the fields */
2242 /* Send the packet out on the same socket that related packets are being
2246 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2249 /* Possibly drop this packet, for testing purposes */
2250 if ((deliveryType == 'D')
2251 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2252 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2253 deliveryType = 'D'; /* Drop the packet */
2255 deliveryType = 'S'; /* Send the packet */
2256 #endif /* RXDEBUG */
2258 /* Loop until the packet is sent. We'd prefer just to use a
2259 * blocking socket, but unfortunately the interface doesn't
2260 * allow us to have the socket block in send mode, and not
2261 * block in receive mode */
2263 waslocked = ISAFS_GLOCK();
2264 #ifdef RX_KERNEL_TRACE
2265 if (ICL_SETACTIVE(afs_iclSetp)) {
2268 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2269 "before osi_NetSend()");
2278 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2279 p->length + RX_HEADER_SIZE, istack)) != 0) {
2280 /* send failed, so let's hurry up the resend, eh? */
2281 if (rx_stats_active)
2282 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2283 p->retryTime = p->timeSent; /* resend it very soon */
2284 clock_Addmsec(&(p->retryTime),
2285 10 + (((afs_uint32) p->backoff) << 8));
2286 /* Some systems are nice and tell us right away that we cannot
2287 * reach this recipient by returning an error code.
2288 * So, when this happens let's "down" the host NOW so
2289 * we don't sit around waiting for this host to timeout later.
2293 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2294 #elif defined(AFS_LINUX20_ENV)
2295 code == -ENETUNREACH
2296 #elif defined(AFS_DARWIN_ENV)
2297 code == EHOSTUNREACH
2302 call->lastReceiveTime = 0;
2305 #ifdef RX_KERNEL_TRACE
2306 if (ICL_SETACTIVE(afs_iclSetp)) {
2308 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2309 "after osi_NetSend()");
2320 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2321 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2322 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2323 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2325 if (rx_stats_active)
2326 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2327 MUTEX_ENTER(&peer->peer_lock);
2328 hadd32(peer->bytesSent, p->length);
2329 MUTEX_EXIT(&peer->peer_lock);
2332 /* Send a list of packets to appropriate destination for the specified
2333 * connection. The headers are first encoded and placed in the packets.
2336 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2337 struct rx_packet **list, int len, int istack)
2339 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2342 struct sockaddr_in addr;
2343 struct rx_peer *peer = conn->peer;
2345 struct rx_packet *p = NULL;
2346 struct iovec wirevec[RX_MAXIOVECS];
2347 int i, length, code;
2350 struct rx_jumboHeader *jp;
2352 char deliveryType = 'S';
2354 /* The address we're sending the packet to */
2355 addr.sin_family = AF_INET;
2356 addr.sin_port = peer->port;
2357 addr.sin_addr.s_addr = peer->host;
2359 if (len + 1 > RX_MAXIOVECS) {
2360 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2364 * Stamp the packets in this jumbogram with consecutive serial numbers
2366 MUTEX_ENTER(&conn->conn_data_lock);
2367 serial = conn->serial;
2368 conn->serial += len;
2369 MUTEX_EXIT(&conn->conn_data_lock);
2372 /* This stuff should be revamped, I think, so that most, if not
2373 * all, of the header stuff is always added here. We could
2374 * probably do away with the encode/decode routines. XXXXX */
2377 length = RX_HEADER_SIZE;
2378 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2379 wirevec[0].iov_len = RX_HEADER_SIZE;
2380 for (i = 0; i < len; i++) {
2383 /* The whole 3.5 jumbogram scheme relies on packets fitting
2384 * in a single packet buffer. */
2385 if (p->niovecs > 2) {
2386 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2389 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2392 if (p->length != RX_JUMBOBUFFERSIZE) {
2393 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2395 p->header.flags |= RX_JUMBO_PACKET;
2396 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2397 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2399 wirevec[i + 1].iov_len = p->length;
2400 length += p->length;
2402 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2404 /* Convert jumbo packet header to network byte order */
2405 temp = (afs_uint32) (p->header.flags) << 24;
2406 temp |= (afs_uint32) (p->header.spare);
2407 *(afs_uint32 *) jp = htonl(temp);
2409 jp = (struct rx_jumboHeader *)
2410 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2412 /* Stamp each packet with a unique serial number. The serial
2413 * number is maintained on a connection basis because some types
2414 * of security may be based on the serial number of the packet,
2415 * and security is handled on a per authenticated-connection
2417 /* Pre-increment, to guarantee no zero serial number; a zero
2418 * serial number means the packet was never sent. */
2419 p->header.serial = ++serial;
2420 /* This is so we can adjust retransmit time-outs better in the face of
2421 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2423 if (p->firstSerial == 0) {
2424 p->firstSerial = p->header.serial;
2427 /* If an output tracer function is defined, call it with the packet and
2428 * network address. Note this function may modify its arguments. */
2429 if (rx_almostSent) {
2430 int drop = (*rx_almostSent) (p, &addr);
2431 /* drop packet if return value is non-zero? */
2433 deliveryType = 'D'; /* Drop the packet */
2437 /* Get network byte order header */
2438 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2439 * touch ALL the fields */
2442 /* Send the packet out on the same socket that related packets are being
2446 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2449 /* Possibly drop this packet, for testing purposes */
2450 if ((deliveryType == 'D')
2451 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2452 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2453 deliveryType = 'D'; /* Drop the packet */
2455 deliveryType = 'S'; /* Send the packet */
2456 #endif /* RXDEBUG */
2458 /* Loop until the packet is sent. We'd prefer just to use a
2459 * blocking socket, but unfortunately the interface doesn't
2460 * allow us to have the socket block in send mode, and not
2461 * block in receive mode */
2462 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2463 waslocked = ISAFS_GLOCK();
2464 if (!istack && waslocked)
2468 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2470 /* send failed, so let's hurry up the resend, eh? */
2471 if (rx_stats_active)
2472 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2473 for (i = 0; i < len; i++) {
2475 p->retryTime = p->timeSent; /* resend it very soon */
2476 clock_Addmsec(&(p->retryTime),
2477 10 + (((afs_uint32) p->backoff) << 8));
2479 /* Some systems are nice and tell us right away that we cannot
2480 * reach this recipient by returning an error code.
2481 * So, when this happens let's "down" the host NOW so
2482 * we don't sit around waiting for this host to timeout later.
2486 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2487 #elif defined(AFS_LINUX20_ENV)
2488 code == -ENETUNREACH
2489 #elif defined(AFS_DARWIN_ENV)
2490 code == EHOSTUNREACH
2495 call->lastReceiveTime = 0;
2497 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2498 if (!istack && waslocked)
2506 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d",
2507 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2508 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2509 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2512 if (rx_stats_active)
2513 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2514 MUTEX_ENTER(&peer->peer_lock);
2515 hadd32(peer->bytesSent, p->length);
2516 MUTEX_EXIT(&peer->peer_lock);
2520 /* Send a "special" packet to the peer connection. If call is
2521 * specified, then the packet is directed to a specific call channel
2522 * associated with the connection, otherwise it is directed to the
2523 * connection only. Uses optionalPacket if it is supplied, rather than
2524 * allocating a new packet buffer. Nbytes is the length of the data
2525 * portion of the packet. If data is non-null, nbytes of data are
2526 * copied into the packet. Type is the type of the packet, as defined
2527 * in rx.h. Bug: there's a lot of duplication between this and other
2528 * routines. This needs to be cleaned up. */
2530 rxi_SendSpecial(struct rx_call *call,
2531 struct rx_connection *conn,
2532 struct rx_packet *optionalPacket, int type, char *data,
2533 int nbytes, int istack)
2535 /* Some of the following stuff should be common code for all
2536 * packet sends (it's repeated elsewhere) */
2537 struct rx_packet *p;
2539 int savelen = 0, saven = 0;
2540 int channel, callNumber;
2542 channel = call->channel;
2543 callNumber = *call->callNumber;
2544 /* BUSY packets refer to the next call on this connection */
2545 if (type == RX_PACKET_TYPE_BUSY) {
2554 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2556 osi_Panic("rxi_SendSpecial failure");
2563 p->header.serviceId = conn->serviceId;
2564 p->header.securityIndex = conn->securityIndex;
2565 p->header.cid = (conn->cid | channel);
2566 p->header.callNumber = callNumber;
2568 p->header.epoch = conn->epoch;
2569 p->header.type = type;
2570 p->header.flags = 0;
2571 if (conn->type == RX_CLIENT_CONNECTION)
2572 p->header.flags |= RX_CLIENT_INITIATED;
2574 rx_packetwrite(p, 0, nbytes, data);
2576 for (i = 1; i < p->niovecs; i++) {
2577 if (nbytes <= p->wirevec[i].iov_len) {
2578 savelen = p->wirevec[i].iov_len;
2580 p->wirevec[i].iov_len = nbytes;
2581 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2583 nbytes -= p->wirevec[i].iov_len;
2587 rxi_Send(call, p, istack);
2589 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2590 if (saven) { /* means we truncated the packet above. We probably don't */
2591 /* really need to do this, but it seems safer this way, given that */
2592 /* sneaky optionalPacket... */
2593 p->wirevec[i - 1].iov_len = savelen;
2596 if (!optionalPacket)
2598 return optionalPacket;
2602 /* Encode the packet's header (from the struct header in the packet to
2603 * the net byte order representation in the wire representation of the
2604 * packet, which is what is actually sent out on the wire) */
2606 rxi_EncodePacketHeader(struct rx_packet *p)
2608 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2610 memset(buf, 0, RX_HEADER_SIZE);
2611 *buf++ = htonl(p->header.epoch);
2612 *buf++ = htonl(p->header.cid);
2613 *buf++ = htonl(p->header.callNumber);
2614 *buf++ = htonl(p->header.seq);
2615 *buf++ = htonl(p->header.serial);
2616 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2617 | (((afs_uint32) p->header.flags) << 16)
2618 | (p->header.userStatus << 8) | p->header.securityIndex);
2619 /* Note: top 16 bits of this next word were reserved */
2620 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2623 /* Decode the packet's header (from net byte order to a struct header) */
2625 rxi_DecodePacketHeader(struct rx_packet *p)
2627 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2630 p->header.epoch = ntohl(*buf);
2632 p->header.cid = ntohl(*buf);
2634 p->header.callNumber = ntohl(*buf);
2636 p->header.seq = ntohl(*buf);
2638 p->header.serial = ntohl(*buf);
2644 /* C will truncate byte fields to bytes for me */
2645 p->header.type = temp >> 24;
2646 p->header.flags = temp >> 16;
2647 p->header.userStatus = temp >> 8;
2648 p->header.securityIndex = temp >> 0;
2653 p->header.serviceId = (temp & 0xffff);
2654 p->header.spare = temp >> 16;
2655 /* Note: top 16 bits of this last word are the security checksum */
2659 rxi_PrepareSendPacket(struct rx_call *call,
2660 struct rx_packet *p, int last)
2662 struct rx_connection *conn = call->conn;
2664 afs_int32 len; /* len must be a signed type; it can go negative */
2666 p->flags &= ~RX_PKTFLAG_ACKED;
2667 p->header.cid = (conn->cid | call->channel);
2668 p->header.serviceId = conn->serviceId;
2669 p->header.securityIndex = conn->securityIndex;
2671 /* No data packets on call 0. Where do these come from? */
2672 if (*call->callNumber == 0)
2673 *call->callNumber = 1;
2675 p->header.callNumber = *call->callNumber;
2676 p->header.seq = call->tnext++;
2677 p->header.epoch = conn->epoch;
2678 p->header.type = RX_PACKET_TYPE_DATA;
2679 p->header.flags = 0;
2680 p->header.spare = 0;
2681 if (conn->type == RX_CLIENT_CONNECTION)
2682 p->header.flags |= RX_CLIENT_INITIATED;
2685 p->header.flags |= RX_LAST_PACKET;
2687 clock_Zero(&p->retryTime); /* Never yet transmitted */
2688 clock_Zero(&p->firstSent); /* Never yet transmitted */
2689 p->header.serial = 0; /* Another way of saying never transmitted... */
2692 /* Now that we're sure this is the last data on the call, make sure
2693 * that the "length" and the sum of the iov_lens matches. */
2694 len = p->length + call->conn->securityHeaderSize;
2696 for (i = 1; i < p->niovecs && len > 0; i++) {
2697 len -= p->wirevec[i].iov_len;
2700 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2701 } else if (i < p->niovecs) {
2702 /* Free any extra elements in the wirevec */
2703 #if defined(RX_ENABLE_TSFPQ)
2704 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2705 #else /* !RX_ENABLE_TSFPQ */
2706 MUTEX_ENTER(&rx_freePktQ_lock);
2707 rxi_FreeDataBufsNoLock(p, i);
2708 MUTEX_EXIT(&rx_freePktQ_lock);
2709 #endif /* !RX_ENABLE_TSFPQ */
2714 p->wirevec[i - 1].iov_len += len;
2715 RXS_PreparePacket(conn->securityObject, call, p);
2718 /* Given an interface MTU size, calculate an adjusted MTU size that
2719 * will make efficient use of the RX buffers when the peer is sending
2720 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2722 rxi_AdjustIfMTU(int mtu)
2727 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2729 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2730 if (mtu <= adjMTU) {
2737 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2738 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2741 /* Given an interface MTU size, and the peer's advertised max receive
2742 * size, calculate an adjisted maxMTU size that makes efficient use
2743 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2745 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2747 int maxMTU = mtu * rxi_nSendFrags;
2748 maxMTU = MIN(maxMTU, peerMaxMTU);
2749 return rxi_AdjustIfMTU(maxMTU);
2752 /* Given a packet size, figure out how many datagram packet will fit.
2753 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2754 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2755 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2757 rxi_AdjustDgramPackets(int frags, int mtu)
2760 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2763 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2764 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2765 /* subtract the size of the first and last packets */
2766 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2770 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2775 * This function can be used by the Windows Cache Manager
2776 * to dump the list of all rx packets so that we can determine
2777 * where the packet leakage is.
2779 int rx_DumpPackets(FILE *outputFile, char *cookie)
2781 #ifdef RXDEBUG_PACKET
2782 struct rx_packet *p;
2786 #define RXDPRINTF sprintf
2787 #define RXDPRINTOUT output
2789 #define RXDPRINTF fprintf
2790 #define RXDPRINTOUT outputFile
2794 MUTEX_ENTER(&rx_freePktQ_lock);
2795 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2797 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2800 for (p = rx_mallocedP; p; p = p->allNextp) {
2801 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2802 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2803 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2804 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2805 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2806 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2808 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2812 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2814 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2817 MUTEX_EXIT(&rx_freePktQ_lock);
2819 #endif /* RXDEBUG_PACKET */