2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
19 #include "afs/sysincludes.h"
20 #include "afsincludes.h"
21 #include "rx/rx_kcommon.h"
22 #include "rx/rx_clock.h"
23 #include "rx/rx_queue.h"
24 #include "rx/rx_packet.h"
25 #include "rx/rx_atomic.h"
26 #include "rx/rx_internal.h"
27 #include "rx/rx_stats.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
54 #include "rx_atomic.h"
56 #include <sys/sysmacros.h>
58 #include "rx/rx_packet.h"
59 #include "rx_internal.h"
61 #endif /* defined(UKERNEL) */
62 #include "rx/rx_globals.h"
64 #include "sys/types.h"
67 #if defined(AFS_NT40_ENV)
70 #define EWOULDBLOCK WSAEWOULDBLOCK
73 #include "rx_xmit_nt.h"
76 #include <sys/socket.h>
77 #include <netinet/in.h>
83 #include <sys/sysmacros.h>
85 #include "rx_packet.h"
86 #include "rx_atomic.h"
87 #include "rx_globals.h"
88 #include "rx_internal.h"
98 /* rxdb_fileID is used to identify the lock location, along with line#. */
99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
100 #endif /* RX_LOCKS_DB */
101 static struct rx_packet *rx_mallocedP = 0;
102 #ifdef RXDEBUG_PACKET
103 static afs_uint32 rx_packet_id = 0;
106 extern char cml_version_number[];
108 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
110 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
111 afs_uint32 ahost, short aport,
114 #ifdef RX_ENABLE_TSFPQ
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
118 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
120 struct rx_queue * q);
123 /* some rules about packets:
124 * 1. When a packet is allocated, the final iov_buf contains room for
125 * a security trailer, but iov_len masks that fact. If the security
126 * package wants to add the trailer, it may do so, and then extend
127 * iov_len appropriately. For this reason, packet's niovecs and
128 * iov_len fields should be accurate before calling PreparePacket.
132 * all packet buffers (iov_base) are integral multiples of
134 * offset is an integral multiple of the word size.
137 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
141 for (l = 0, i = 1; i < packet->niovecs; i++) {
142 if (l + packet->wirevec[i].iov_len > offset) {
144 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
147 l += packet->wirevec[i].iov_len;
154 * all packet buffers (iov_base) are integral multiples of the word size.
155 * offset is an integral multiple of the word size.
158 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
162 for (l = 0, i = 1; i < packet->niovecs; i++) {
163 if (l + packet->wirevec[i].iov_len > offset) {
164 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
165 (offset - l))) = data;
168 l += packet->wirevec[i].iov_len;
175 * all packet buffers (iov_base) are integral multiples of the
177 * offset is an integral multiple of the word size.
179 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
182 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
185 unsigned int i, j, l, r;
186 for (l = 0, i = 1; i < packet->niovecs; i++) {
187 if (l + packet->wirevec[i].iov_len > offset) {
190 l += packet->wirevec[i].iov_len;
193 /* i is the iovec which contains the first little bit of data in which we
194 * are interested. l is the total length of everything prior to this iovec.
195 * j is the number of bytes we can safely copy out of this iovec.
196 * offset only applies to the first iovec.
199 while ((r > 0) && (i < packet->niovecs)) {
200 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
201 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
204 l += packet->wirevec[i].iov_len;
209 return (r ? (resid - r) : resid);
214 * all packet buffers (iov_base) are integral multiples of the
216 * offset is an integral multiple of the word size.
219 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
221 unsigned int i, j, l, o, r;
224 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
225 if (l + packet->wirevec[i].iov_len > o) {
228 l += packet->wirevec[i].iov_len;
231 /* i is the iovec which contains the first little bit of data in which we
232 * are interested. l is the total length of everything prior to this iovec.
233 * j is the number of bytes we can safely copy out of this iovec.
234 * offset only applies to the first iovec.
237 while ((r > 0) && (i <= RX_MAXWVECS)) {
238 if (i >= packet->niovecs)
239 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
242 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
243 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
247 l += packet->wirevec[i].iov_len;
252 return (r ? (resid - r) : resid);
256 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
258 struct rx_packet *p, *np;
260 num_pkts = AllocPacketBufs(class, num_pkts, q);
262 for (queue_Scan(q, p, np, rx_packet)) {
263 RX_PACKET_IOV_FULLINIT(p);
269 #ifdef RX_ENABLE_TSFPQ
271 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
273 struct rx_ts_info_t * rx_ts_info;
277 RX_TS_INFO_GET(rx_ts_info);
279 transfer = num_pkts - rx_ts_info->_FPQ.len;
282 MUTEX_ENTER(&rx_freePktQ_lock);
283 transfer = MAX(transfer, rx_TSFPQGlobSize);
284 if (transfer > rx_nFreePackets) {
285 /* alloc enough for us, plus a few globs for other threads */
286 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
289 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
291 MUTEX_EXIT(&rx_freePktQ_lock);
295 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
299 #else /* RX_ENABLE_TSFPQ */
301 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
312 MUTEX_ENTER(&rx_freePktQ_lock);
315 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
316 num_pkts--, overq++);
319 rxi_NeedMorePackets = TRUE;
320 if (rx_stats_active) {
322 case RX_PACKET_CLASS_RECEIVE:
323 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
325 case RX_PACKET_CLASS_SEND:
326 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
328 case RX_PACKET_CLASS_SPECIAL:
329 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
331 case RX_PACKET_CLASS_RECV_CBUF:
332 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
334 case RX_PACKET_CLASS_SEND_CBUF:
335 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
341 if (rx_nFreePackets < num_pkts)
342 num_pkts = rx_nFreePackets;
345 rxi_NeedMorePackets = TRUE;
349 if (rx_nFreePackets < num_pkts) {
350 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
354 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
356 i++, c=queue_Next(c, rx_packet)) {
360 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
362 rx_nFreePackets -= num_pkts;
367 MUTEX_EXIT(&rx_freePktQ_lock);
372 #endif /* RX_ENABLE_TSFPQ */
375 * Free a packet currently used as a continuation buffer
377 #ifdef RX_ENABLE_TSFPQ
378 /* num_pkts=0 means queue length is unknown */
380 rxi_FreePackets(int num_pkts, struct rx_queue * q)
382 struct rx_ts_info_t * rx_ts_info;
383 struct rx_packet *c, *nc;
386 osi_Assert(num_pkts >= 0);
387 RX_TS_INFO_GET(rx_ts_info);
390 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
391 rxi_FreeDataBufsTSFPQ(c, 2, 0);
394 for (queue_Scan(q, c, nc, rx_packet)) {
395 rxi_FreeDataBufsTSFPQ(c, 2, 0);
400 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
403 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
405 MUTEX_ENTER(&rx_freePktQ_lock);
407 RX_TS_FPQ_LTOG(rx_ts_info);
409 /* Wakeup anyone waiting for packets */
412 MUTEX_EXIT(&rx_freePktQ_lock);
418 #else /* RX_ENABLE_TSFPQ */
419 /* num_pkts=0 means queue length is unknown */
421 rxi_FreePackets(int num_pkts, struct rx_queue *q)
424 struct rx_packet *p, *np;
428 osi_Assert(num_pkts >= 0);
432 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
433 if (p->niovecs > 2) {
434 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
441 for (queue_Scan(q, p, np, rx_packet)) {
442 if (p->niovecs > 2) {
443 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
450 queue_SpliceAppend(q, &cbs);
456 MUTEX_ENTER(&rx_freePktQ_lock);
458 queue_SpliceAppend(&rx_freePacketQueue, q);
459 rx_nFreePackets += qlen;
461 /* Wakeup anyone waiting for packets */
464 MUTEX_EXIT(&rx_freePktQ_lock);
469 #endif /* RX_ENABLE_TSFPQ */
471 /* this one is kind of awful.
472 * In rxkad, the packet has been all shortened, and everything, ready for
473 * sending. All of a sudden, we discover we need some of that space back.
474 * This isn't terribly general, because it knows that the packets are only
475 * rounded up to the EBS (userdata + security header).
478 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
482 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
483 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
484 p->wirevec[i].iov_len += nb;
488 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
489 p->wirevec[i].iov_len += nb;
497 /* get sufficient space to store nb bytes of data (or more), and hook
498 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
499 * returns the number of bytes >0 which it failed to come up with.
500 * Don't need to worry about locking on packet, since only
501 * one thread can manipulate one at a time. Locking on continution
502 * packets is handled by AllocPacketBufs */
503 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
505 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
509 struct rx_packet *cb, *ncb;
511 /* compute the number of cbuf's we need */
512 nv = nb / RX_CBUFFERSIZE;
513 if ((nv * RX_CBUFFERSIZE) < nb)
515 if ((nv + p->niovecs) > RX_MAXWVECS)
516 nv = RX_MAXWVECS - p->niovecs;
520 /* allocate buffers */
522 nv = AllocPacketBufs(class, nv, &q);
524 /* setup packet iovs */
525 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
527 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
528 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
531 nb -= (nv * RX_CBUFFERSIZE);
532 p->length += (nv * RX_CBUFFERSIZE);
538 /* Add more packet buffers */
539 #ifdef RX_ENABLE_TSFPQ
541 rxi_MorePackets(int apackets)
543 struct rx_packet *p, *e;
544 struct rx_ts_info_t * rx_ts_info;
548 getme = apackets * sizeof(struct rx_packet);
549 p = (struct rx_packet *)osi_Alloc(getme);
552 PIN(p, getme); /* XXXXX */
554 RX_TS_INFO_GET(rx_ts_info);
556 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
557 /* TSFPQ patch also needs to keep track of total packets */
559 MUTEX_ENTER(&rx_packets_mutex);
560 rx_nPackets += apackets;
561 RX_TS_FPQ_COMPUTE_LIMITS;
562 MUTEX_EXIT(&rx_packets_mutex);
564 for (e = p + apackets; p < e; p++) {
565 RX_PACKET_IOV_INIT(p);
568 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
571 MUTEX_ENTER(&rx_freePktQ_lock);
572 #ifdef RXDEBUG_PACKET
573 p->packetId = rx_packet_id++;
574 p->allNextp = rx_mallocedP;
575 #endif /* RXDEBUG_PACKET */
577 MUTEX_EXIT(&rx_freePktQ_lock);
580 rx_ts_info->_FPQ.delta += apackets;
582 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
584 MUTEX_ENTER(&rx_freePktQ_lock);
586 RX_TS_FPQ_LTOG(rx_ts_info);
587 rxi_NeedMorePackets = FALSE;
590 MUTEX_EXIT(&rx_freePktQ_lock);
594 #else /* RX_ENABLE_TSFPQ */
596 rxi_MorePackets(int apackets)
598 struct rx_packet *p, *e;
602 getme = apackets * sizeof(struct rx_packet);
603 p = (struct rx_packet *)osi_Alloc(getme);
606 PIN(p, getme); /* XXXXX */
609 MUTEX_ENTER(&rx_freePktQ_lock);
611 for (e = p + apackets; p < e; p++) {
612 RX_PACKET_IOV_INIT(p);
613 #ifdef RX_TRACK_PACKETS
614 p->flags |= RX_PKTFLAG_FREE;
618 queue_Append(&rx_freePacketQueue, p);
619 #ifdef RXDEBUG_PACKET
620 p->packetId = rx_packet_id++;
621 p->allNextp = rx_mallocedP;
622 #endif /* RXDEBUG_PACKET */
626 rx_nPackets += apackets;
627 rx_nFreePackets += apackets;
628 rxi_NeedMorePackets = FALSE;
631 MUTEX_EXIT(&rx_freePktQ_lock);
634 #endif /* RX_ENABLE_TSFPQ */
636 #ifdef RX_ENABLE_TSFPQ
638 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
640 struct rx_packet *p, *e;
641 struct rx_ts_info_t * rx_ts_info;
645 getme = apackets * sizeof(struct rx_packet);
646 p = (struct rx_packet *)osi_Alloc(getme);
648 PIN(p, getme); /* XXXXX */
650 RX_TS_INFO_GET(rx_ts_info);
652 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
653 /* TSFPQ patch also needs to keep track of total packets */
654 MUTEX_ENTER(&rx_packets_mutex);
655 rx_nPackets += apackets;
656 RX_TS_FPQ_COMPUTE_LIMITS;
657 MUTEX_EXIT(&rx_packets_mutex);
659 for (e = p + apackets; p < e; p++) {
660 RX_PACKET_IOV_INIT(p);
662 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
665 MUTEX_ENTER(&rx_freePktQ_lock);
666 #ifdef RXDEBUG_PACKET
667 p->packetId = rx_packet_id++;
668 p->allNextp = rx_mallocedP;
669 #endif /* RXDEBUG_PACKET */
671 MUTEX_EXIT(&rx_freePktQ_lock);
674 rx_ts_info->_FPQ.delta += apackets;
677 (num_keep_local < apackets)) {
679 MUTEX_ENTER(&rx_freePktQ_lock);
681 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
682 rxi_NeedMorePackets = FALSE;
685 MUTEX_EXIT(&rx_freePktQ_lock);
689 #endif /* RX_ENABLE_TSFPQ */
692 /* Add more packet buffers */
694 rxi_MorePacketsNoLock(int apackets)
696 #ifdef RX_ENABLE_TSFPQ
697 struct rx_ts_info_t * rx_ts_info;
698 #endif /* RX_ENABLE_TSFPQ */
699 struct rx_packet *p, *e;
702 /* allocate enough packets that 1/4 of the packets will be able
703 * to hold maximal amounts of data */
704 apackets += (apackets / 4)
705 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
707 getme = apackets * sizeof(struct rx_packet);
708 p = (struct rx_packet *)osi_Alloc(getme);
710 apackets -= apackets / 4;
711 osi_Assert(apackets > 0);
716 #ifdef RX_ENABLE_TSFPQ
717 RX_TS_INFO_GET(rx_ts_info);
718 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
719 #endif /* RX_ENABLE_TSFPQ */
721 for (e = p + apackets; p < e; p++) {
722 RX_PACKET_IOV_INIT(p);
723 #ifdef RX_TRACK_PACKETS
724 p->flags |= RX_PKTFLAG_FREE;
728 queue_Append(&rx_freePacketQueue, p);
729 #ifdef RXDEBUG_PACKET
730 p->packetId = rx_packet_id++;
731 p->allNextp = rx_mallocedP;
732 #endif /* RXDEBUG_PACKET */
736 rx_nFreePackets += apackets;
737 MUTEX_ENTER(&rx_packets_mutex);
738 rx_nPackets += apackets;
739 #ifdef RX_ENABLE_TSFPQ
740 RX_TS_FPQ_COMPUTE_LIMITS;
741 #endif /* RX_ENABLE_TSFPQ */
742 MUTEX_EXIT(&rx_packets_mutex);
743 rxi_NeedMorePackets = FALSE;
749 rxi_FreeAllPackets(void)
751 /* must be called at proper interrupt level, etcetera */
752 /* MTUXXX need to free all Packets */
753 osi_Free(rx_mallocedP,
754 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
755 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
758 #ifdef RX_ENABLE_TSFPQ
760 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
762 struct rx_ts_info_t * rx_ts_info;
766 RX_TS_INFO_GET(rx_ts_info);
768 if (num_keep_local != rx_ts_info->_FPQ.len) {
770 MUTEX_ENTER(&rx_freePktQ_lock);
771 if (num_keep_local < rx_ts_info->_FPQ.len) {
772 xfer = rx_ts_info->_FPQ.len - num_keep_local;
773 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
776 xfer = num_keep_local - rx_ts_info->_FPQ.len;
777 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
778 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
779 if (rx_nFreePackets < xfer) {
780 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
782 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
784 MUTEX_EXIT(&rx_freePktQ_lock);
790 rxi_FlushLocalPacketsTSFPQ(void)
792 rxi_AdjustLocalPacketsTSFPQ(0, 0);
794 #endif /* RX_ENABLE_TSFPQ */
796 /* Allocate more packets iff we need more continuation buffers */
797 /* In kernel, can't page in memory with interrupts disabled, so we
798 * don't use the event mechanism. */
800 rx_CheckPackets(void)
802 if (rxi_NeedMorePackets) {
803 rxi_MorePackets(rx_maxSendWindow);
807 /* In the packet freeing routine below, the assumption is that
808 we want all of the packets to be used equally frequently, so that we
809 don't get packet buffers paging out. It would be just as valid to
810 assume that we DO want them to page out if not many are being used.
811 In any event, we assume the former, and append the packets to the end
813 /* This explanation is bogus. The free list doesn't remain in any kind of
814 useful order for afs_int32: the packets in use get pretty much randomly scattered
815 across all the pages. In order to permit unused {packets,bufs} to page out, they
816 must be stored so that packets which are adjacent in memory are adjacent in the
817 free list. An array springs rapidly to mind.
820 /* Actually free the packet p. */
821 #ifdef RX_ENABLE_TSFPQ
823 rxi_FreePacketNoLock(struct rx_packet *p)
825 struct rx_ts_info_t * rx_ts_info;
826 dpf(("Free %"AFS_PTR_FMT"\n", p));
828 RX_TS_INFO_GET(rx_ts_info);
829 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
830 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
831 RX_TS_FPQ_LTOG(rx_ts_info);
834 #else /* RX_ENABLE_TSFPQ */
836 rxi_FreePacketNoLock(struct rx_packet *p)
838 dpf(("Free %"AFS_PTR_FMT"\n", p));
842 queue_Append(&rx_freePacketQueue, p);
844 #endif /* RX_ENABLE_TSFPQ */
846 #ifdef RX_ENABLE_TSFPQ
848 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
850 struct rx_ts_info_t * rx_ts_info;
851 dpf(("Free %"AFS_PTR_FMT"\n", p));
853 RX_TS_INFO_GET(rx_ts_info);
854 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
856 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
858 MUTEX_ENTER(&rx_freePktQ_lock);
860 RX_TS_FPQ_LTOG(rx_ts_info);
862 /* Wakeup anyone waiting for packets */
865 MUTEX_EXIT(&rx_freePktQ_lock);
869 #endif /* RX_ENABLE_TSFPQ */
872 * free continuation buffers off a packet into a queue
874 * [IN] p -- packet from which continuation buffers will be freed
875 * [IN] first -- iovec offset of first continuation buffer to free
876 * [IN] q -- queue into which continuation buffers will be chained
879 * number of continuation buffers freed
881 #ifndef RX_ENABLE_TSFPQ
883 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
886 struct rx_packet * cb;
889 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
890 iov = &p->wirevec[first];
892 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
893 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
894 RX_FPQ_MARK_FREE(cb);
905 * free packet continuation buffers into the global free packet pool
907 * [IN] p -- packet from which to free continuation buffers
908 * [IN] first -- iovec offset of first continuation buffer to free
914 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
918 for (first = MAX(2, first); first < p->niovecs; first++) {
919 iov = &p->wirevec[first];
921 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
922 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
930 #ifdef RX_ENABLE_TSFPQ
932 * free packet continuation buffers into the thread-local free pool
934 * [IN] p -- packet from which continuation buffers will be freed
935 * [IN] first -- iovec offset of first continuation buffer to free
936 * any value less than 2, the min number of iovecs,
937 * is treated as if it is 2.
938 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
939 * global free pool before returning
945 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
948 struct rx_ts_info_t * rx_ts_info;
950 RX_TS_INFO_GET(rx_ts_info);
952 for (first = MAX(2, first); first < p->niovecs; first++) {
953 iov = &p->wirevec[first];
955 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
956 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
961 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
963 MUTEX_ENTER(&rx_freePktQ_lock);
965 RX_TS_FPQ_LTOG(rx_ts_info);
967 /* Wakeup anyone waiting for packets */
970 MUTEX_EXIT(&rx_freePktQ_lock);
975 #endif /* RX_ENABLE_TSFPQ */
977 int rxi_nBadIovecs = 0;
979 /* rxi_RestoreDataBufs
981 * Restore the correct sizes to the iovecs. Called when reusing a packet
982 * for reading off the wire.
985 rxi_RestoreDataBufs(struct rx_packet *p)
988 struct iovec *iov = &p->wirevec[2];
990 RX_PACKET_IOV_INIT(p);
992 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
993 if (!iov->iov_base) {
998 iov->iov_len = RX_CBUFFERSIZE;
1002 #ifdef RX_ENABLE_TSFPQ
1004 rxi_TrimDataBufs(struct rx_packet *p, int first)
1007 struct iovec *iov, *end;
1008 struct rx_ts_info_t * rx_ts_info;
1012 osi_Panic("TrimDataBufs 1: first must be 1");
1014 /* Skip over continuation buffers containing message data */
1015 iov = &p->wirevec[2];
1016 end = iov + (p->niovecs - 2);
1017 length = p->length - p->wirevec[1].iov_len;
1018 for (; iov < end && length > 0; iov++) {
1020 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1021 length -= iov->iov_len;
1024 /* iov now points to the first empty data buffer. */
1028 RX_TS_INFO_GET(rx_ts_info);
1029 for (; iov < end; iov++) {
1031 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1032 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1035 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1037 MUTEX_ENTER(&rx_freePktQ_lock);
1039 RX_TS_FPQ_LTOG(rx_ts_info);
1040 rxi_PacketsUnWait();
1042 MUTEX_EXIT(&rx_freePktQ_lock);
1048 #else /* RX_ENABLE_TSFPQ */
1050 rxi_TrimDataBufs(struct rx_packet *p, int first)
1053 struct iovec *iov, *end;
1057 osi_Panic("TrimDataBufs 1: first must be 1");
1059 /* Skip over continuation buffers containing message data */
1060 iov = &p->wirevec[2];
1061 end = iov + (p->niovecs - 2);
1062 length = p->length - p->wirevec[1].iov_len;
1063 for (; iov < end && length > 0; iov++) {
1065 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1066 length -= iov->iov_len;
1069 /* iov now points to the first empty data buffer. */
1074 MUTEX_ENTER(&rx_freePktQ_lock);
1076 for (; iov < end; iov++) {
1078 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1079 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1082 rxi_PacketsUnWait();
1084 MUTEX_EXIT(&rx_freePktQ_lock);
1089 #endif /* RX_ENABLE_TSFPQ */
1091 /* Free the packet p. P is assumed not to be on any queue, i.e.
1092 * remove it yourself first if you call this routine. */
1093 #ifdef RX_ENABLE_TSFPQ
1095 rxi_FreePacket(struct rx_packet *p)
1097 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1098 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1100 #else /* RX_ENABLE_TSFPQ */
1102 rxi_FreePacket(struct rx_packet *p)
1107 MUTEX_ENTER(&rx_freePktQ_lock);
1109 rxi_FreeDataBufsNoLock(p, 2);
1110 rxi_FreePacketNoLock(p);
1111 /* Wakeup anyone waiting for packets */
1112 rxi_PacketsUnWait();
1114 MUTEX_EXIT(&rx_freePktQ_lock);
1117 #endif /* RX_ENABLE_TSFPQ */
1119 /* rxi_AllocPacket sets up p->length so it reflects the number of
1120 * bytes in the packet at this point, **not including** the header.
1121 * The header is absolutely necessary, besides, this is the way the
1122 * length field is usually used */
1123 #ifdef RX_ENABLE_TSFPQ
1125 rxi_AllocPacketNoLock(int class)
1127 struct rx_packet *p;
1128 struct rx_ts_info_t * rx_ts_info;
1130 RX_TS_INFO_GET(rx_ts_info);
1133 if (rxi_OverQuota(class)) {
1134 rxi_NeedMorePackets = TRUE;
1135 if (rx_stats_active) {
1137 case RX_PACKET_CLASS_RECEIVE:
1138 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1140 case RX_PACKET_CLASS_SEND:
1141 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1143 case RX_PACKET_CLASS_SPECIAL:
1144 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1146 case RX_PACKET_CLASS_RECV_CBUF:
1147 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1149 case RX_PACKET_CLASS_SEND_CBUF:
1150 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1154 return (struct rx_packet *)0;
1158 if (rx_stats_active)
1159 rx_atomic_inc(&rx_stats.packetRequests);
1160 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1163 if (queue_IsEmpty(&rx_freePacketQueue))
1164 osi_Panic("rxi_AllocPacket error");
1166 if (queue_IsEmpty(&rx_freePacketQueue))
1167 rxi_MorePacketsNoLock(rx_maxSendWindow);
1171 RX_TS_FPQ_GTOL(rx_ts_info);
1174 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1176 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1179 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1180 * order to truncate outbound packets. In the near future, may need
1181 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1183 RX_PACKET_IOV_FULLINIT(p);
1186 #else /* RX_ENABLE_TSFPQ */
1188 rxi_AllocPacketNoLock(int class)
1190 struct rx_packet *p;
1193 if (rxi_OverQuota(class)) {
1194 rxi_NeedMorePackets = TRUE;
1195 if (rx_stats_active) {
1197 case RX_PACKET_CLASS_RECEIVE:
1198 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1200 case RX_PACKET_CLASS_SEND:
1201 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1203 case RX_PACKET_CLASS_SPECIAL:
1204 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1206 case RX_PACKET_CLASS_RECV_CBUF:
1207 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1209 case RX_PACKET_CLASS_SEND_CBUF:
1210 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1214 return (struct rx_packet *)0;
1218 if (rx_stats_active)
1219 rx_atomic_inc(&rx_stats.packetRequests);
1222 if (queue_IsEmpty(&rx_freePacketQueue))
1223 osi_Panic("rxi_AllocPacket error");
1225 if (queue_IsEmpty(&rx_freePacketQueue))
1226 rxi_MorePacketsNoLock(rx_maxSendWindow);
1230 p = queue_First(&rx_freePacketQueue, rx_packet);
1232 RX_FPQ_MARK_USED(p);
1234 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1237 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1238 * order to truncate outbound packets. In the near future, may need
1239 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1241 RX_PACKET_IOV_FULLINIT(p);
1244 #endif /* RX_ENABLE_TSFPQ */
1246 #ifdef RX_ENABLE_TSFPQ
1248 rxi_AllocPacketTSFPQ(int class, int pull_global)
1250 struct rx_packet *p;
1251 struct rx_ts_info_t * rx_ts_info;
1253 RX_TS_INFO_GET(rx_ts_info);
1255 if (rx_stats_active)
1256 rx_atomic_inc(&rx_stats.packetRequests);
1257 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1258 MUTEX_ENTER(&rx_freePktQ_lock);
1260 if (queue_IsEmpty(&rx_freePacketQueue))
1261 rxi_MorePacketsNoLock(rx_maxSendWindow);
1263 RX_TS_FPQ_GTOL(rx_ts_info);
1265 MUTEX_EXIT(&rx_freePktQ_lock);
1266 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1270 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1272 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1274 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1275 * order to truncate outbound packets. In the near future, may need
1276 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1278 RX_PACKET_IOV_FULLINIT(p);
1281 #endif /* RX_ENABLE_TSFPQ */
1283 #ifdef RX_ENABLE_TSFPQ
1285 rxi_AllocPacket(int class)
1287 struct rx_packet *p;
1289 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1292 #else /* RX_ENABLE_TSFPQ */
1294 rxi_AllocPacket(int class)
1296 struct rx_packet *p;
1298 MUTEX_ENTER(&rx_freePktQ_lock);
1299 p = rxi_AllocPacketNoLock(class);
1300 MUTEX_EXIT(&rx_freePktQ_lock);
1303 #endif /* RX_ENABLE_TSFPQ */
1305 /* This guy comes up with as many buffers as it {takes,can get} given
1306 * the MTU for this call. It also sets the packet length before
1307 * returning. caution: this is often called at NETPRI
1308 * Called with call locked.
1311 rxi_AllocSendPacket(struct rx_call *call, int want)
1313 struct rx_packet *p = (struct rx_packet *)0;
1318 mud = call->MTU - RX_HEADER_SIZE;
1320 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1321 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1323 #ifdef RX_ENABLE_TSFPQ
1324 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1326 want = MIN(want, mud);
1328 if ((unsigned)want > p->length)
1329 (void)rxi_AllocDataBuf(p, (want - p->length),
1330 RX_PACKET_CLASS_SEND_CBUF);
1332 if (p->length > mud)
1335 if (delta >= p->length) {
1343 #endif /* RX_ENABLE_TSFPQ */
1345 while (!(call->error)) {
1346 MUTEX_ENTER(&rx_freePktQ_lock);
1347 /* if an error occurred, or we get the packet we want, we're done */
1348 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1349 MUTEX_EXIT(&rx_freePktQ_lock);
1352 want = MIN(want, mud);
1354 if ((unsigned)want > p->length)
1355 (void)rxi_AllocDataBuf(p, (want - p->length),
1356 RX_PACKET_CLASS_SEND_CBUF);
1358 if (p->length > mud)
1361 if (delta >= p->length) {
1370 /* no error occurred, and we didn't get a packet, so we sleep.
1371 * At this point, we assume that packets will be returned
1372 * sooner or later, as packets are acknowledged, and so we
1375 call->flags |= RX_CALL_WAIT_PACKETS;
1376 MUTEX_ENTER(&rx_refcnt_mutex);
1377 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1378 MUTEX_EXIT(&rx_refcnt_mutex);
1379 MUTEX_EXIT(&call->lock);
1380 rx_waitingForPackets = 1;
1382 #ifdef RX_ENABLE_LOCKS
1383 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1385 osi_rxSleep(&rx_waitingForPackets);
1387 MUTEX_EXIT(&rx_freePktQ_lock);
1388 MUTEX_ENTER(&call->lock);
1389 MUTEX_ENTER(&rx_refcnt_mutex);
1390 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1391 MUTEX_EXIT(&rx_refcnt_mutex);
1392 call->flags &= ~RX_CALL_WAIT_PACKETS;
1401 /* Windows does not use file descriptors. */
1402 #define CountFDs(amax) 0
1404 /* count the number of used FDs */
1413 for (i = 0; i < amax; i++) {
1414 code = fstat(i, &tstat);
1420 #endif /* AFS_NT40_ENV */
1423 #define CountFDs(amax) amax
1427 #if !defined(KERNEL) || defined(UKERNEL)
1429 /* This function reads a single packet from the interface into the
1430 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1431 * (host,port) of the sender are stored in the supplied variables, and
1432 * the data length of the packet is stored in the packet structure.
1433 * The header is decoded. */
1435 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1438 struct sockaddr_in from;
1439 unsigned int nbytes;
1441 afs_uint32 tlen, savelen;
1443 rx_computelen(p, tlen);
1444 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1446 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1447 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1448 * it once in order to avoid races. */
1451 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1459 /* Extend the last iovec for padding, it's just to make sure that the
1460 * read doesn't return more data than we expect, and is done to get around
1461 * our problems caused by the lack of a length field in the rx header.
1462 * Use the extra buffer that follows the localdata in each packet
1464 savelen = p->wirevec[p->niovecs - 1].iov_len;
1465 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1467 memset(&msg, 0, sizeof(msg));
1468 msg.msg_name = (char *)&from;
1469 msg.msg_namelen = sizeof(struct sockaddr_in);
1470 msg.msg_iov = p->wirevec;
1471 msg.msg_iovlen = p->niovecs;
1472 nbytes = rxi_Recvmsg(socket, &msg, 0);
1474 /* restore the vec to its correct state */
1475 p->wirevec[p->niovecs - 1].iov_len = savelen;
1477 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1478 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1479 if (nbytes < 0 && errno == EWOULDBLOCK) {
1480 if (rx_stats_active)
1481 rx_atomic_inc(&rx_stats.noPacketOnRead);
1482 } else if (nbytes <= 0) {
1483 if (rx_stats_active) {
1484 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1485 rx_stats.bogusHost = from.sin_addr.s_addr;
1487 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1488 ntohs(from.sin_port), nbytes));
1493 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1494 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1495 rxi_DecodePacketHeader(p);
1497 *host = from.sin_addr.s_addr;
1498 *port = from.sin_port;
1500 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1501 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1502 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1504 #ifdef RX_TRIMDATABUFS
1505 rxi_TrimDataBufs(p, 1);
1511 /* Extract packet header. */
1512 rxi_DecodePacketHeader(p);
1514 *host = from.sin_addr.s_addr;
1515 *port = from.sin_port;
1516 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1517 if (rx_stats_active) {
1518 struct rx_peer *peer;
1519 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1521 * Try to look up this peer structure. If it doesn't exist,
1522 * don't create a new one -
1523 * we don't keep count of the bytes sent/received if a peer
1524 * structure doesn't already exist.
1526 * The peer/connection cleanup code assumes that there is 1 peer
1527 * per connection. If we actually created a peer structure here
1528 * and this packet was an rxdebug packet, the peer structure would
1529 * never be cleaned up.
1531 peer = rxi_FindPeer(*host, *port, 0, 0);
1532 /* Since this may not be associated with a connection,
1533 * it may have no refCount, meaning we could race with
1536 if (peer && (peer->refCount > 0)) {
1537 MUTEX_ENTER(&peer->peer_lock);
1538 hadd32(peer->bytesReceived, p->length);
1539 MUTEX_EXIT(&peer->peer_lock);
1544 #ifdef RX_TRIMDATABUFS
1545 /* Free any empty packet buffers at the end of this packet */
1546 rxi_TrimDataBufs(p, 1);
1552 #endif /* !KERNEL || UKERNEL */
1554 /* This function splits off the first packet in a jumbo packet.
1555 * As of AFS 3.5, jumbograms contain more than one fixed size
1556 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1557 * last packet header. All packets (except the last) are padded to
1558 * fall on RX_CBUFFERSIZE boundaries.
1559 * HACK: We store the length of the first n-1 packets in the
1560 * last two pad bytes. */
1563 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1566 struct rx_packet *np;
1567 struct rx_jumboHeader *jp;
1573 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1574 * bytes in length. All but the first packet are preceded by
1575 * an abbreviated four byte header. The length of the last packet
1576 * is calculated from the size of the jumbogram. */
1577 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1579 if ((int)p->length < length) {
1580 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1583 niov = p->niovecs - 2;
1585 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1588 iov = &p->wirevec[2];
1589 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1591 /* Get a pointer to the abbreviated packet header */
1592 jp = (struct rx_jumboHeader *)
1593 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1595 /* Set up the iovecs for the next packet */
1596 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1597 np->wirevec[0].iov_len = sizeof(struct rx_header);
1598 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1599 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1600 np->niovecs = niov + 1;
1601 for (i = 2, iov++; i <= niov; i++, iov++) {
1602 np->wirevec[i] = *iov;
1604 np->length = p->length - length;
1605 p->length = RX_JUMBOBUFFERSIZE;
1608 /* Convert the jumbo packet header to host byte order */
1609 temp = ntohl(*(afs_uint32 *) jp);
1610 jp->flags = (u_char) (temp >> 24);
1611 jp->cksum = (u_short) (temp);
1613 /* Fill in the packet header */
1614 np->header = p->header;
1615 np->header.serial = p->header.serial + 1;
1616 np->header.seq = p->header.seq + 1;
1617 np->header.flags = jp->flags;
1618 np->header.spare = jp->cksum;
1624 /* Send a udp datagram */
1626 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1627 int length, int istack)
1632 memset(&msg, 0, sizeof(msg));
1634 msg.msg_iovlen = nvecs;
1635 msg.msg_name = addr;
1636 msg.msg_namelen = sizeof(struct sockaddr_in);
1638 ret = rxi_Sendmsg(socket, &msg, 0);
1642 #elif !defined(UKERNEL)
1644 * message receipt is done in rxk_input or rx_put.
1647 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1649 * Copy an mblock to the contiguous area pointed to by cp.
1650 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1651 * but it doesn't really.
1652 * Returns the number of bytes not transferred.
1653 * The message is NOT changed.
1656 cpytoc(mblk_t * mp, int off, int len, char *cp)
1660 for (; mp && len > 0; mp = mp->b_cont) {
1661 if (mp->b_datap->db_type != M_DATA) {
1664 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1665 memcpy(cp, (char *)mp->b_rptr, n);
1673 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1674 * but it doesn't really.
1675 * This sucks, anyway, do it like m_cpy.... below
1678 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1683 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1684 if (mp->b_datap->db_type != M_DATA) {
1687 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1693 t = iovs[i].iov_len;
1696 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1706 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1707 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1709 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1711 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1714 unsigned int l1, l2, i, t;
1716 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1717 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1720 if (m->m_len <= off) {
1730 p1 = mtod(m, caddr_t) + off;
1731 l1 = m->m_len - off;
1733 p2 = iovs[0].iov_base;
1734 l2 = iovs[0].iov_len;
1737 t = MIN(l1, MIN(l2, (unsigned int)len));
1748 p1 = mtod(m, caddr_t);
1754 p2 = iovs[i].iov_base;
1755 l2 = iovs[i].iov_len;
1763 #endif /* AFS_SUN5_ENV */
1765 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1767 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1768 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1774 struct rx_packet *phandle;
1775 int hdr_len, data_len;
1780 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1787 #endif /*KERNEL && !UKERNEL */
1790 /* send a response to a debug packet */
1793 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1794 afs_uint32 ahost, short aport, int istack)
1796 struct rx_debugIn tin;
1798 struct rx_serverQueueEntry *np, *nqe;
1801 * Only respond to client-initiated Rx debug packets,
1802 * and clear the client flag in the response.
1804 if (ap->header.flags & RX_CLIENT_INITIATED) {
1805 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1806 rxi_EncodePacketHeader(ap);
1811 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1812 /* all done with packet, now set length to the truth, so we can
1813 * reuse this packet */
1814 rx_computelen(ap, ap->length);
1816 tin.type = ntohl(tin.type);
1817 tin.index = ntohl(tin.index);
1819 case RX_DEBUGI_GETSTATS:{
1820 struct rx_debugStats tstat;
1822 /* get basic stats */
1823 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1824 tstat.version = RX_DEBUGI_VERSION;
1825 #ifndef RX_ENABLE_LOCKS
1826 tstat.waitingForPackets = rx_waitingForPackets;
1828 MUTEX_ENTER(&rx_serverPool_lock);
1829 tstat.nFreePackets = htonl(rx_nFreePackets);
1830 tstat.nPackets = htonl(rx_nPackets);
1831 tstat.callsExecuted = htonl(rxi_nCalls);
1832 tstat.packetReclaims = htonl(rx_packetReclaims);
1833 tstat.usedFDs = CountFDs(64);
1834 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1835 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1836 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1838 MUTEX_EXIT(&rx_serverPool_lock);
1839 tstat.idleThreads = htonl(tstat.idleThreads);
1840 tl = sizeof(struct rx_debugStats) - ap->length;
1842 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1845 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1847 ap->length = sizeof(struct rx_debugStats);
1848 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1849 rx_computelen(ap, ap->length);
1854 case RX_DEBUGI_GETALLCONN:
1855 case RX_DEBUGI_GETCONN:{
1857 struct rx_connection *tc;
1858 struct rx_call *tcall;
1859 struct rx_debugConn tconn;
1860 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1863 tl = sizeof(struct rx_debugConn) - ap->length;
1865 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1869 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1870 /* get N'th (maybe) "interesting" connection info */
1871 for (i = 0; i < rx_hashTableSize; i++) {
1872 #if !defined(KERNEL)
1873 /* the time complexity of the algorithm used here
1874 * exponentially increses with the number of connections.
1876 #ifdef AFS_PTHREAD_ENV
1882 MUTEX_ENTER(&rx_connHashTable_lock);
1883 /* We might be slightly out of step since we are not
1884 * locking each call, but this is only debugging output.
1886 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1887 if ((all || rxi_IsConnInteresting(tc))
1888 && tin.index-- <= 0) {
1889 tconn.host = tc->peer->host;
1890 tconn.port = tc->peer->port;
1891 tconn.cid = htonl(tc->cid);
1892 tconn.epoch = htonl(tc->epoch);
1893 tconn.serial = htonl(tc->serial);
1894 for (j = 0; j < RX_MAXCALLS; j++) {
1895 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1896 if ((tcall = tc->call[j])) {
1897 tconn.callState[j] = tcall->state;
1898 tconn.callMode[j] = tcall->mode;
1899 tconn.callFlags[j] = tcall->flags;
1900 if (queue_IsNotEmpty(&tcall->rq))
1901 tconn.callOther[j] |= RX_OTHER_IN;
1902 if (queue_IsNotEmpty(&tcall->tq))
1903 tconn.callOther[j] |= RX_OTHER_OUT;
1905 tconn.callState[j] = RX_STATE_NOTINIT;
1908 tconn.natMTU = htonl(tc->peer->natMTU);
1909 tconn.error = htonl(tc->error);
1910 tconn.flags = tc->flags;
1911 tconn.type = tc->type;
1912 tconn.securityIndex = tc->securityIndex;
1913 if (tc->securityObject) {
1914 RXS_GetStats(tc->securityObject, tc,
1916 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1917 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1920 DOHTONL(packetsReceived);
1921 DOHTONL(packetsSent);
1922 DOHTONL(bytesReceived);
1926 sizeof(tconn.secStats.spares) /
1931 sizeof(tconn.secStats.sparel) /
1932 sizeof(afs_int32); i++)
1936 MUTEX_EXIT(&rx_connHashTable_lock);
1937 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1940 ap->length = sizeof(struct rx_debugConn);
1941 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1947 MUTEX_EXIT(&rx_connHashTable_lock);
1949 /* if we make it here, there are no interesting packets */
1950 tconn.cid = htonl(0xffffffff); /* means end */
1951 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1954 ap->length = sizeof(struct rx_debugConn);
1955 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1961 * Pass back all the peer structures we have available
1964 case RX_DEBUGI_GETPEER:{
1967 struct rx_debugPeer tpeer;
1970 tl = sizeof(struct rx_debugPeer) - ap->length;
1972 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1976 memset(&tpeer, 0, sizeof(tpeer));
1977 for (i = 0; i < rx_hashTableSize; i++) {
1978 #if !defined(KERNEL)
1979 /* the time complexity of the algorithm used here
1980 * exponentially increses with the number of peers.
1982 * Yielding after processing each hash table entry
1983 * and dropping rx_peerHashTable_lock.
1984 * also increases the risk that we will miss a new
1985 * entry - but we are willing to live with this
1986 * limitation since this is meant for debugging only
1988 #ifdef AFS_PTHREAD_ENV
1994 MUTEX_ENTER(&rx_peerHashTable_lock);
1995 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1996 if (tin.index-- <= 0) {
1998 MUTEX_EXIT(&rx_peerHashTable_lock);
2000 MUTEX_ENTER(&tp->peer_lock);
2001 tpeer.host = tp->host;
2002 tpeer.port = tp->port;
2003 tpeer.ifMTU = htons(tp->ifMTU);
2004 tpeer.idleWhen = htonl(tp->idleWhen);
2005 tpeer.refCount = htons(tp->refCount);
2006 tpeer.burstSize = tp->burstSize;
2007 tpeer.burst = tp->burst;
2008 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
2009 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
2010 tpeer.rtt = htonl(tp->rtt);
2011 tpeer.rtt_dev = htonl(tp->rtt_dev);
2012 tpeer.timeout.sec = htonl(tp->timeout.sec);
2013 tpeer.timeout.usec = htonl(tp->timeout.usec);
2014 tpeer.nSent = htonl(tp->nSent);
2015 tpeer.reSends = htonl(tp->reSends);
2016 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2017 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2018 tpeer.rateFlag = htonl(tp->rateFlag);
2019 tpeer.natMTU = htons(tp->natMTU);
2020 tpeer.maxMTU = htons(tp->maxMTU);
2021 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2022 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2023 tpeer.MTU = htons(tp->MTU);
2024 tpeer.cwind = htons(tp->cwind);
2025 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2026 tpeer.congestSeq = htons(tp->congestSeq);
2027 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2028 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2029 tpeer.bytesReceived.high =
2030 htonl(tp->bytesReceived.high);
2031 tpeer.bytesReceived.low =
2032 htonl(tp->bytesReceived.low);
2033 MUTEX_EXIT(&tp->peer_lock);
2035 MUTEX_ENTER(&rx_peerHashTable_lock);
2037 MUTEX_EXIT(&rx_peerHashTable_lock);
2039 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2042 ap->length = sizeof(struct rx_debugPeer);
2043 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2049 MUTEX_EXIT(&rx_peerHashTable_lock);
2051 /* if we make it here, there are no interesting packets */
2052 tpeer.host = htonl(0xffffffff); /* means end */
2053 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2056 ap->length = sizeof(struct rx_debugPeer);
2057 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2062 case RX_DEBUGI_RXSTATS:{
2066 tl = sizeof(rx_stats) - ap->length;
2068 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2072 /* Since its all int32s convert to network order with a loop. */
2073 if (rx_stats_active)
2074 MUTEX_ENTER(&rx_stats_mutex);
2075 s = (afs_int32 *) & rx_stats;
2076 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2077 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2080 ap->length = sizeof(rx_stats);
2081 if (rx_stats_active)
2082 MUTEX_EXIT(&rx_stats_mutex);
2083 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2089 /* error response packet */
2090 tin.type = htonl(RX_DEBUGI_BADTYPE);
2091 tin.index = tin.type;
2092 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2094 ap->length = sizeof(struct rx_debugIn);
2095 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2103 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2104 afs_uint32 ahost, short aport, int istack)
2109 * Only respond to client-initiated version requests, and
2110 * clear that flag in the response.
2112 if (ap->header.flags & RX_CLIENT_INITIATED) {
2115 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2116 rxi_EncodePacketHeader(ap);
2117 memset(buf, 0, sizeof(buf));
2118 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2119 rx_packetwrite(ap, 0, 65, buf);
2122 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2130 /* send a debug packet back to the sender */
2132 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2133 afs_uint32 ahost, short aport, afs_int32 istack)
2135 struct sockaddr_in taddr;
2136 unsigned int i, nbytes, savelen = 0;
2139 int waslocked = ISAFS_GLOCK();
2142 taddr.sin_family = AF_INET;
2143 taddr.sin_port = aport;
2144 taddr.sin_addr.s_addr = ahost;
2145 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2146 taddr.sin_len = sizeof(struct sockaddr_in);
2149 /* We need to trim the niovecs. */
2150 nbytes = apacket->length;
2151 for (i = 1; i < apacket->niovecs; i++) {
2152 if (nbytes <= apacket->wirevec[i].iov_len) {
2153 savelen = apacket->wirevec[i].iov_len;
2154 saven = apacket->niovecs;
2155 apacket->wirevec[i].iov_len = nbytes;
2156 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2158 nbytes -= apacket->wirevec[i].iov_len;
2161 #ifdef RX_KERNEL_TRACE
2162 if (ICL_SETACTIVE(afs_iclSetp)) {
2165 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2166 "before osi_NetSend()");
2174 /* debug packets are not reliably delivered, hence the cast below. */
2175 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2176 apacket->length + RX_HEADER_SIZE, istack);
2178 #ifdef RX_KERNEL_TRACE
2179 if (ICL_SETACTIVE(afs_iclSetp)) {
2181 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2182 "after osi_NetSend()");
2191 if (saven) { /* means we truncated the packet above. */
2192 apacket->wirevec[i - 1].iov_len = savelen;
2193 apacket->niovecs = saven;
2198 /* Send the packet to appropriate destination for the specified
2199 * call. The header is first encoded and placed in the packet.
2202 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2203 struct rx_packet *p, int istack)
2209 struct sockaddr_in addr;
2210 struct rx_peer *peer = conn->peer;
2213 char deliveryType = 'S';
2215 /* The address we're sending the packet to */
2216 memset(&addr, 0, sizeof(addr));
2217 addr.sin_family = AF_INET;
2218 addr.sin_port = peer->port;
2219 addr.sin_addr.s_addr = peer->host;
2221 /* This stuff should be revamped, I think, so that most, if not
2222 * all, of the header stuff is always added here. We could
2223 * probably do away with the encode/decode routines. XXXXX */
2225 /* Stamp each packet with a unique serial number. The serial
2226 * number is maintained on a connection basis because some types
2227 * of security may be based on the serial number of the packet,
2228 * and security is handled on a per authenticated-connection
2230 /* Pre-increment, to guarantee no zero serial number; a zero
2231 * serial number means the packet was never sent. */
2232 MUTEX_ENTER(&conn->conn_data_lock);
2233 p->header.serial = ++conn->serial;
2234 if (p->length > conn->peer->maxPacketSize) {
2235 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2236 (p->header.flags & RX_REQUEST_ACK)) {
2237 conn->lastPingSize = p->length;
2238 conn->lastPingSizeSer = p->header.serial;
2239 } else if (p->header.seq != 0) {
2240 conn->lastPacketSize = p->length;
2241 conn->lastPacketSizeSeq = p->header.seq;
2244 MUTEX_EXIT(&conn->conn_data_lock);
2245 /* This is so we can adjust retransmit time-outs better in the face of
2246 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2248 if (p->firstSerial == 0) {
2249 p->firstSerial = p->header.serial;
2252 /* If an output tracer function is defined, call it with the packet and
2253 * network address. Note this function may modify its arguments. */
2254 if (rx_almostSent) {
2255 int drop = (*rx_almostSent) (p, &addr);
2256 /* drop packet if return value is non-zero? */
2258 deliveryType = 'D'; /* Drop the packet */
2262 /* Get network byte order header */
2263 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2264 * touch ALL the fields */
2266 /* Send the packet out on the same socket that related packets are being
2270 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2273 /* Possibly drop this packet, for testing purposes */
2274 if ((deliveryType == 'D')
2275 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2276 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2277 deliveryType = 'D'; /* Drop the packet */
2279 deliveryType = 'S'; /* Send the packet */
2280 #endif /* RXDEBUG */
2282 /* Loop until the packet is sent. We'd prefer just to use a
2283 * blocking socket, but unfortunately the interface doesn't
2284 * allow us to have the socket block in send mode, and not
2285 * block in receive mode */
2287 waslocked = ISAFS_GLOCK();
2288 #ifdef RX_KERNEL_TRACE
2289 if (ICL_SETACTIVE(afs_iclSetp)) {
2292 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2293 "before osi_NetSend()");
2302 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2303 p->length + RX_HEADER_SIZE, istack)) != 0) {
2304 /* send failed, so let's hurry up the resend, eh? */
2305 if (rx_stats_active)
2306 rx_atomic_inc(&rx_stats.netSendFailures);
2307 p->retryTime = p->timeSent; /* resend it very soon */
2308 clock_Addmsec(&(p->retryTime),
2309 10 + (((afs_uint32) p->backoff) << 8));
2310 /* Some systems are nice and tell us right away that we cannot
2311 * reach this recipient by returning an error code.
2312 * So, when this happens let's "down" the host NOW so
2313 * we don't sit around waiting for this host to timeout later.
2317 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2318 #elif defined(AFS_LINUX20_ENV)
2319 code == -ENETUNREACH
2320 #elif defined(AFS_DARWIN_ENV)
2321 code == EHOSTUNREACH
2326 call->lastReceiveTime = 0;
2329 #ifdef RX_KERNEL_TRACE
2330 if (ICL_SETACTIVE(afs_iclSetp)) {
2332 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2333 "after osi_NetSend()");
2344 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2345 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2346 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2347 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2349 if (rx_stats_active) {
2350 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2351 MUTEX_ENTER(&peer->peer_lock);
2352 hadd32(peer->bytesSent, p->length);
2353 MUTEX_EXIT(&peer->peer_lock);
2357 /* Send a list of packets to appropriate destination for the specified
2358 * connection. The headers are first encoded and placed in the packets.
2361 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2362 struct rx_packet **list, int len, int istack)
2364 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2367 struct sockaddr_in addr;
2368 struct rx_peer *peer = conn->peer;
2370 struct rx_packet *p = NULL;
2371 struct iovec wirevec[RX_MAXIOVECS];
2372 int i, length, code;
2375 struct rx_jumboHeader *jp;
2377 char deliveryType = 'S';
2379 /* The address we're sending the packet to */
2380 addr.sin_family = AF_INET;
2381 addr.sin_port = peer->port;
2382 addr.sin_addr.s_addr = peer->host;
2384 if (len + 1 > RX_MAXIOVECS) {
2385 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2389 * Stamp the packets in this jumbogram with consecutive serial numbers
2391 MUTEX_ENTER(&conn->conn_data_lock);
2392 serial = conn->serial;
2393 conn->serial += len;
2394 for (i = 0; i < len; i++) {
2396 if (p->length > conn->peer->maxPacketSize) {
2397 /* a ping *or* a sequenced packet can count */
2398 if ((p->length > conn->peer->maxPacketSize)) {
2399 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2400 (p->header.flags & RX_REQUEST_ACK)) &&
2401 ((i == 0) || (p->length >= conn->lastPingSize))) {
2402 conn->lastPingSize = p->length;
2403 conn->lastPingSizeSer = serial + i;
2404 } else if ((p->header.seq != 0) &&
2405 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2406 conn->lastPacketSize = p->length;
2407 conn->lastPacketSizeSeq = p->header.seq;
2412 MUTEX_EXIT(&conn->conn_data_lock);
2415 /* This stuff should be revamped, I think, so that most, if not
2416 * all, of the header stuff is always added here. We could
2417 * probably do away with the encode/decode routines. XXXXX */
2420 length = RX_HEADER_SIZE;
2421 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2422 wirevec[0].iov_len = RX_HEADER_SIZE;
2423 for (i = 0; i < len; i++) {
2426 /* The whole 3.5 jumbogram scheme relies on packets fitting
2427 * in a single packet buffer. */
2428 if (p->niovecs > 2) {
2429 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2432 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2435 if (p->length != RX_JUMBOBUFFERSIZE) {
2436 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2438 p->header.flags |= RX_JUMBO_PACKET;
2439 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2440 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2442 wirevec[i + 1].iov_len = p->length;
2443 length += p->length;
2445 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2447 /* Convert jumbo packet header to network byte order */
2448 temp = (afs_uint32) (p->header.flags) << 24;
2449 temp |= (afs_uint32) (p->header.spare);
2450 *(afs_uint32 *) jp = htonl(temp);
2452 jp = (struct rx_jumboHeader *)
2453 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2455 /* Stamp each packet with a unique serial number. The serial
2456 * number is maintained on a connection basis because some types
2457 * of security may be based on the serial number of the packet,
2458 * and security is handled on a per authenticated-connection
2460 /* Pre-increment, to guarantee no zero serial number; a zero
2461 * serial number means the packet was never sent. */
2462 p->header.serial = ++serial;
2463 /* This is so we can adjust retransmit time-outs better in the face of
2464 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2466 if (p->firstSerial == 0) {
2467 p->firstSerial = p->header.serial;
2470 /* If an output tracer function is defined, call it with the packet and
2471 * network address. Note this function may modify its arguments. */
2472 if (rx_almostSent) {
2473 int drop = (*rx_almostSent) (p, &addr);
2474 /* drop packet if return value is non-zero? */
2476 deliveryType = 'D'; /* Drop the packet */
2480 /* Get network byte order header */
2481 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2482 * touch ALL the fields */
2485 /* Send the packet out on the same socket that related packets are being
2489 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2492 /* Possibly drop this packet, for testing purposes */
2493 if ((deliveryType == 'D')
2494 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2495 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2496 deliveryType = 'D'; /* Drop the packet */
2498 deliveryType = 'S'; /* Send the packet */
2499 #endif /* RXDEBUG */
2501 /* Loop until the packet is sent. We'd prefer just to use a
2502 * blocking socket, but unfortunately the interface doesn't
2503 * allow us to have the socket block in send mode, and not
2504 * block in receive mode */
2505 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2506 waslocked = ISAFS_GLOCK();
2507 if (!istack && waslocked)
2511 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2513 /* send failed, so let's hurry up the resend, eh? */
2514 if (rx_stats_active)
2515 rx_atomic_inc(&rx_stats.netSendFailures);
2516 for (i = 0; i < len; i++) {
2518 p->retryTime = p->timeSent; /* resend it very soon */
2519 clock_Addmsec(&(p->retryTime),
2520 10 + (((afs_uint32) p->backoff) << 8));
2522 /* Some systems are nice and tell us right away that we cannot
2523 * reach this recipient by returning an error code.
2524 * So, when this happens let's "down" the host NOW so
2525 * we don't sit around waiting for this host to timeout later.
2529 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2530 #elif defined(AFS_LINUX20_ENV)
2531 code == -ENETUNREACH
2532 #elif defined(AFS_DARWIN_ENV)
2533 code == EHOSTUNREACH
2538 call->lastReceiveTime = 0;
2540 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2541 if (!istack && waslocked)
2547 osi_Assert(p != NULL);
2549 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2550 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2551 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2552 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2555 if (rx_stats_active) {
2556 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2557 MUTEX_ENTER(&peer->peer_lock);
2558 hadd32(peer->bytesSent, p->length);
2559 MUTEX_EXIT(&peer->peer_lock);
2564 /* Send a "special" packet to the peer connection. If call is
2565 * specified, then the packet is directed to a specific call channel
2566 * associated with the connection, otherwise it is directed to the
2567 * connection only. Uses optionalPacket if it is supplied, rather than
2568 * allocating a new packet buffer. Nbytes is the length of the data
2569 * portion of the packet. If data is non-null, nbytes of data are
2570 * copied into the packet. Type is the type of the packet, as defined
2571 * in rx.h. Bug: there's a lot of duplication between this and other
2572 * routines. This needs to be cleaned up. */
2574 rxi_SendSpecial(struct rx_call *call,
2575 struct rx_connection *conn,
2576 struct rx_packet *optionalPacket, int type, char *data,
2577 int nbytes, int istack)
2579 /* Some of the following stuff should be common code for all
2580 * packet sends (it's repeated elsewhere) */
2581 struct rx_packet *p;
2583 int savelen = 0, saven = 0;
2584 int channel, callNumber;
2586 channel = call->channel;
2587 callNumber = *call->callNumber;
2588 /* BUSY packets refer to the next call on this connection */
2589 if (type == RX_PACKET_TYPE_BUSY) {
2598 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2600 osi_Panic("rxi_SendSpecial failure");
2607 p->header.serviceId = conn->serviceId;
2608 p->header.securityIndex = conn->securityIndex;
2609 p->header.cid = (conn->cid | channel);
2610 p->header.callNumber = callNumber;
2612 p->header.epoch = conn->epoch;
2613 p->header.type = type;
2614 p->header.flags = 0;
2615 if (conn->type == RX_CLIENT_CONNECTION)
2616 p->header.flags |= RX_CLIENT_INITIATED;
2618 rx_packetwrite(p, 0, nbytes, data);
2620 for (i = 1; i < p->niovecs; i++) {
2621 if (nbytes <= p->wirevec[i].iov_len) {
2622 savelen = p->wirevec[i].iov_len;
2624 p->wirevec[i].iov_len = nbytes;
2625 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2627 nbytes -= p->wirevec[i].iov_len;
2631 rxi_Send(call, p, istack);
2633 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2634 if (saven) { /* means we truncated the packet above. We probably don't */
2635 /* really need to do this, but it seems safer this way, given that */
2636 /* sneaky optionalPacket... */
2637 p->wirevec[i - 1].iov_len = savelen;
2640 if (!optionalPacket)
2642 return optionalPacket;
2646 /* Encode the packet's header (from the struct header in the packet to
2647 * the net byte order representation in the wire representation of the
2648 * packet, which is what is actually sent out on the wire) */
2650 rxi_EncodePacketHeader(struct rx_packet *p)
2652 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2654 memset(buf, 0, RX_HEADER_SIZE);
2655 *buf++ = htonl(p->header.epoch);
2656 *buf++ = htonl(p->header.cid);
2657 *buf++ = htonl(p->header.callNumber);
2658 *buf++ = htonl(p->header.seq);
2659 *buf++ = htonl(p->header.serial);
2660 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2661 | (((afs_uint32) p->header.flags) << 16)
2662 | (p->header.userStatus << 8) | p->header.securityIndex);
2663 /* Note: top 16 bits of this next word were reserved */
2664 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2667 /* Decode the packet's header (from net byte order to a struct header) */
2669 rxi_DecodePacketHeader(struct rx_packet *p)
2671 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2674 p->header.epoch = ntohl(*buf);
2676 p->header.cid = ntohl(*buf);
2678 p->header.callNumber = ntohl(*buf);
2680 p->header.seq = ntohl(*buf);
2682 p->header.serial = ntohl(*buf);
2688 /* C will truncate byte fields to bytes for me */
2689 p->header.type = temp >> 24;
2690 p->header.flags = temp >> 16;
2691 p->header.userStatus = temp >> 8;
2692 p->header.securityIndex = temp >> 0;
2697 p->header.serviceId = (temp & 0xffff);
2698 p->header.spare = temp >> 16;
2699 /* Note: top 16 bits of this last word are the security checksum */
2703 * LOCKS HELD: called with call->lock held.
2705 * PrepareSendPacket is the only place in the code that
2706 * can increment call->tnext. This could become an atomic
2707 * in the future. Beyond that there is nothing in this
2708 * function that requires the call being locked. This
2709 * function can only be called by the application thread.
2712 rxi_PrepareSendPacket(struct rx_call *call,
2713 struct rx_packet *p, int last)
2715 struct rx_connection *conn = call->conn;
2716 afs_uint32 seq = call->tnext++;
2718 afs_int32 len; /* len must be a signed type; it can go negative */
2720 /* No data packets on call 0. Where do these come from? */
2721 if (*call->callNumber == 0)
2722 *call->callNumber = 1;
2724 MUTEX_EXIT(&call->lock);
2725 p->flags &= ~RX_PKTFLAG_ACKED;
2726 p->header.cid = (conn->cid | call->channel);
2727 p->header.serviceId = conn->serviceId;
2728 p->header.securityIndex = conn->securityIndex;
2730 p->header.callNumber = *call->callNumber;
2731 p->header.seq = seq;
2732 p->header.epoch = conn->epoch;
2733 p->header.type = RX_PACKET_TYPE_DATA;
2734 p->header.flags = 0;
2735 p->header.spare = 0;
2736 if (conn->type == RX_CLIENT_CONNECTION)
2737 p->header.flags |= RX_CLIENT_INITIATED;
2740 p->header.flags |= RX_LAST_PACKET;
2742 clock_Zero(&p->retryTime); /* Never yet transmitted */
2743 clock_Zero(&p->firstSent); /* Never yet transmitted */
2744 p->header.serial = 0; /* Another way of saying never transmitted... */
2747 /* Now that we're sure this is the last data on the call, make sure
2748 * that the "length" and the sum of the iov_lens matches. */
2749 len = p->length + call->conn->securityHeaderSize;
2751 for (i = 1; i < p->niovecs && len > 0; i++) {
2752 len -= p->wirevec[i].iov_len;
2755 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2756 } else if (i < p->niovecs) {
2757 /* Free any extra elements in the wirevec */
2758 #if defined(RX_ENABLE_TSFPQ)
2759 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2760 #else /* !RX_ENABLE_TSFPQ */
2761 MUTEX_ENTER(&rx_freePktQ_lock);
2762 rxi_FreeDataBufsNoLock(p, i);
2763 MUTEX_EXIT(&rx_freePktQ_lock);
2764 #endif /* !RX_ENABLE_TSFPQ */
2769 p->wirevec[i - 1].iov_len += len;
2770 RXS_PreparePacket(conn->securityObject, call, p);
2771 MUTEX_ENTER(&call->lock);
2774 /* Given an interface MTU size, calculate an adjusted MTU size that
2775 * will make efficient use of the RX buffers when the peer is sending
2776 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2778 rxi_AdjustIfMTU(int mtu)
2783 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2785 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2786 if (mtu <= adjMTU) {
2793 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2794 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2797 /* Given an interface MTU size, and the peer's advertised max receive
2798 * size, calculate an adjisted maxMTU size that makes efficient use
2799 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2801 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2803 int maxMTU = mtu * rxi_nSendFrags;
2804 maxMTU = MIN(maxMTU, peerMaxMTU);
2805 return rxi_AdjustIfMTU(maxMTU);
2808 /* Given a packet size, figure out how many datagram packet will fit.
2809 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2810 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2811 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2813 rxi_AdjustDgramPackets(int frags, int mtu)
2816 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2819 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2820 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2821 /* subtract the size of the first and last packets */
2822 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2826 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2831 * This function can be used by the Windows Cache Manager
2832 * to dump the list of all rx packets so that we can determine
2833 * where the packet leakage is.
2835 int rx_DumpPackets(FILE *outputFile, char *cookie)
2837 #ifdef RXDEBUG_PACKET
2838 struct rx_packet *p;
2842 #define RXDPRINTF sprintf
2843 #define RXDPRINTOUT output
2845 #define RXDPRINTF fprintf
2846 #define RXDPRINTOUT outputFile
2850 MUTEX_ENTER(&rx_freePktQ_lock);
2851 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2853 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2856 for (p = rx_mallocedP; p; p = p->allNextp) {
2857 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2858 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2859 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2860 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2861 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2862 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2864 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2868 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2870 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2873 MUTEX_EXIT(&rx_freePktQ_lock);
2875 #endif /* RXDEBUG_PACKET */