2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
46 # if defined(AFS_NT40_ENV)
48 # define EWOULDBLOCK WSAEWOULDBLOCK
51 # include "rx_xmit_nt.h"
57 # include <sys/sysmacros.h>
63 #include "rx_packet.h"
64 #include "rx_atomic.h"
65 #include "rx_globals.h"
66 #include "rx_internal.h"
73 /* rxdb_fileID is used to identify the lock location, along with line#. */
74 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
75 #endif /* RX_LOCKS_DB */
76 static struct rx_packet *rx_mallocedP = 0;
78 static afs_uint32 rx_packet_id = 0;
81 extern char cml_version_number[];
83 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
85 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
86 afs_uint32 ahost, short aport,
89 #ifdef RX_ENABLE_TSFPQ
91 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
93 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
98 /* some rules about packets:
99 * 1. When a packet is allocated, the final iov_buf contains room for
100 * a security trailer, but iov_len masks that fact. If the security
101 * package wants to add the trailer, it may do so, and then extend
102 * iov_len appropriately. For this reason, packet's niovecs and
103 * iov_len fields should be accurate before calling PreparePacket.
107 * all packet buffers (iov_base) are integral multiples of
109 * offset is an integral multiple of the word size.
112 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
116 for (l = 0, i = 1; i < packet->niovecs; i++) {
117 if (l + packet->wirevec[i].iov_len > offset) {
119 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
122 l += packet->wirevec[i].iov_len;
129 * all packet buffers (iov_base) are integral multiples of the word size.
130 * offset is an integral multiple of the word size.
133 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
139 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
140 (offset - l))) = data;
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the
152 * offset is an integral multiple of the word size.
154 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
157 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
160 unsigned int i, j, l, r;
161 for (l = 0, i = 1; i < packet->niovecs; i++) {
162 if (l + packet->wirevec[i].iov_len > offset) {
165 l += packet->wirevec[i].iov_len;
168 /* i is the iovec which contains the first little bit of data in which we
169 * are interested. l is the total length of everything prior to this iovec.
170 * j is the number of bytes we can safely copy out of this iovec.
171 * offset only applies to the first iovec.
174 while ((r > 0) && (i < packet->niovecs)) {
175 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
176 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
179 l += packet->wirevec[i].iov_len;
184 return (r ? (resid - r) : resid);
189 * all packet buffers (iov_base) are integral multiples of the
191 * offset is an integral multiple of the word size.
194 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
196 unsigned int i, j, l, o, r;
199 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
200 if (l + packet->wirevec[i].iov_len > o) {
203 l += packet->wirevec[i].iov_len;
206 /* i is the iovec which contains the first little bit of data in which we
207 * are interested. l is the total length of everything prior to this iovec.
208 * j is the number of bytes we can safely copy out of this iovec.
209 * offset only applies to the first iovec.
212 while ((r > 0) && (i <= RX_MAXWVECS)) {
213 if (i >= packet->niovecs)
214 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
217 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
218 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
222 l += packet->wirevec[i].iov_len;
227 return (r ? (resid - r) : resid);
231 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
233 struct rx_packet *p, *np;
235 num_pkts = AllocPacketBufs(class, num_pkts, q);
237 for (queue_Scan(q, p, np, rx_packet)) {
238 RX_PACKET_IOV_FULLINIT(p);
244 #ifdef RX_ENABLE_TSFPQ
246 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
248 struct rx_ts_info_t * rx_ts_info;
252 RX_TS_INFO_GET(rx_ts_info);
254 transfer = num_pkts - rx_ts_info->_FPQ.len;
257 MUTEX_ENTER(&rx_freePktQ_lock);
258 transfer = MAX(transfer, rx_TSFPQGlobSize);
259 if (transfer > rx_nFreePackets) {
260 /* alloc enough for us, plus a few globs for other threads */
261 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
264 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
266 MUTEX_EXIT(&rx_freePktQ_lock);
270 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
274 #else /* RX_ENABLE_TSFPQ */
276 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
287 MUTEX_ENTER(&rx_freePktQ_lock);
290 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
291 num_pkts--, overq++);
294 rxi_NeedMorePackets = TRUE;
295 if (rx_stats_active) {
297 case RX_PACKET_CLASS_RECEIVE:
298 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
300 case RX_PACKET_CLASS_SEND:
301 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
303 case RX_PACKET_CLASS_SPECIAL:
304 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
306 case RX_PACKET_CLASS_RECV_CBUF:
307 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
309 case RX_PACKET_CLASS_SEND_CBUF:
310 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
316 if (rx_nFreePackets < num_pkts)
317 num_pkts = rx_nFreePackets;
320 rxi_NeedMorePackets = TRUE;
324 if (rx_nFreePackets < num_pkts) {
325 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
329 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
331 i++, c=queue_Next(c, rx_packet)) {
335 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
337 rx_nFreePackets -= num_pkts;
342 MUTEX_EXIT(&rx_freePktQ_lock);
347 #endif /* RX_ENABLE_TSFPQ */
350 * Free a packet currently used as a continuation buffer
352 #ifdef RX_ENABLE_TSFPQ
353 /* num_pkts=0 means queue length is unknown */
355 rxi_FreePackets(int num_pkts, struct rx_queue * q)
357 struct rx_ts_info_t * rx_ts_info;
358 struct rx_packet *c, *nc;
361 osi_Assert(num_pkts >= 0);
362 RX_TS_INFO_GET(rx_ts_info);
365 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
366 rxi_FreeDataBufsTSFPQ(c, 2, 0);
369 for (queue_Scan(q, c, nc, rx_packet)) {
370 rxi_FreeDataBufsTSFPQ(c, 2, 0);
375 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
378 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
380 MUTEX_ENTER(&rx_freePktQ_lock);
382 RX_TS_FPQ_LTOG(rx_ts_info);
384 /* Wakeup anyone waiting for packets */
387 MUTEX_EXIT(&rx_freePktQ_lock);
393 #else /* RX_ENABLE_TSFPQ */
394 /* num_pkts=0 means queue length is unknown */
396 rxi_FreePackets(int num_pkts, struct rx_queue *q)
399 struct rx_packet *p, *np;
403 osi_Assert(num_pkts >= 0);
407 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
408 if (p->niovecs > 2) {
409 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
416 for (queue_Scan(q, p, np, rx_packet)) {
417 if (p->niovecs > 2) {
418 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
425 queue_SpliceAppend(q, &cbs);
431 MUTEX_ENTER(&rx_freePktQ_lock);
433 queue_SpliceAppend(&rx_freePacketQueue, q);
434 rx_nFreePackets += qlen;
436 /* Wakeup anyone waiting for packets */
439 MUTEX_EXIT(&rx_freePktQ_lock);
444 #endif /* RX_ENABLE_TSFPQ */
446 /* this one is kind of awful.
447 * In rxkad, the packet has been all shortened, and everything, ready for
448 * sending. All of a sudden, we discover we need some of that space back.
449 * This isn't terribly general, because it knows that the packets are only
450 * rounded up to the EBS (userdata + security header).
453 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
457 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
458 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
459 p->wirevec[i].iov_len += nb;
463 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
464 p->wirevec[i].iov_len += nb;
472 /* get sufficient space to store nb bytes of data (or more), and hook
473 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
474 * returns the number of bytes >0 which it failed to come up with.
475 * Don't need to worry about locking on packet, since only
476 * one thread can manipulate one at a time. Locking on continution
477 * packets is handled by AllocPacketBufs */
478 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
480 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
484 struct rx_packet *cb, *ncb;
486 /* compute the number of cbuf's we need */
487 nv = nb / RX_CBUFFERSIZE;
488 if ((nv * RX_CBUFFERSIZE) < nb)
490 if ((nv + p->niovecs) > RX_MAXWVECS)
491 nv = RX_MAXWVECS - p->niovecs;
495 /* allocate buffers */
497 nv = AllocPacketBufs(class, nv, &q);
499 /* setup packet iovs */
500 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
502 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
503 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
506 nb -= (nv * RX_CBUFFERSIZE);
507 p->length += (nv * RX_CBUFFERSIZE);
513 /* Add more packet buffers */
514 #ifdef RX_ENABLE_TSFPQ
516 rxi_MorePackets(int apackets)
518 struct rx_packet *p, *e;
519 struct rx_ts_info_t * rx_ts_info;
523 getme = apackets * sizeof(struct rx_packet);
524 p = (struct rx_packet *)osi_Alloc(getme);
527 PIN(p, getme); /* XXXXX */
529 RX_TS_INFO_GET(rx_ts_info);
531 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
532 /* TSFPQ patch also needs to keep track of total packets */
534 MUTEX_ENTER(&rx_packets_mutex);
535 rx_nPackets += apackets;
536 RX_TS_FPQ_COMPUTE_LIMITS;
537 MUTEX_EXIT(&rx_packets_mutex);
539 for (e = p + apackets; p < e; p++) {
540 RX_PACKET_IOV_INIT(p);
543 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
546 MUTEX_ENTER(&rx_freePktQ_lock);
547 #ifdef RXDEBUG_PACKET
548 p->packetId = rx_packet_id++;
549 p->allNextp = rx_mallocedP;
550 #endif /* RXDEBUG_PACKET */
552 MUTEX_EXIT(&rx_freePktQ_lock);
555 rx_ts_info->_FPQ.delta += apackets;
557 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
559 MUTEX_ENTER(&rx_freePktQ_lock);
561 RX_TS_FPQ_LTOG(rx_ts_info);
562 rxi_NeedMorePackets = FALSE;
565 MUTEX_EXIT(&rx_freePktQ_lock);
569 #else /* RX_ENABLE_TSFPQ */
571 rxi_MorePackets(int apackets)
573 struct rx_packet *p, *e;
577 getme = apackets * sizeof(struct rx_packet);
578 p = (struct rx_packet *)osi_Alloc(getme);
581 PIN(p, getme); /* XXXXX */
584 MUTEX_ENTER(&rx_freePktQ_lock);
586 for (e = p + apackets; p < e; p++) {
587 RX_PACKET_IOV_INIT(p);
588 #ifdef RX_TRACK_PACKETS
589 p->flags |= RX_PKTFLAG_FREE;
593 queue_Append(&rx_freePacketQueue, p);
594 #ifdef RXDEBUG_PACKET
595 p->packetId = rx_packet_id++;
596 p->allNextp = rx_mallocedP;
597 #endif /* RXDEBUG_PACKET */
601 rx_nPackets += apackets;
602 rx_nFreePackets += apackets;
603 rxi_NeedMorePackets = FALSE;
606 MUTEX_EXIT(&rx_freePktQ_lock);
609 #endif /* RX_ENABLE_TSFPQ */
611 #ifdef RX_ENABLE_TSFPQ
613 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
615 struct rx_packet *p, *e;
616 struct rx_ts_info_t * rx_ts_info;
620 getme = apackets * sizeof(struct rx_packet);
621 p = (struct rx_packet *)osi_Alloc(getme);
623 PIN(p, getme); /* XXXXX */
625 RX_TS_INFO_GET(rx_ts_info);
627 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
628 /* TSFPQ patch also needs to keep track of total packets */
629 MUTEX_ENTER(&rx_packets_mutex);
630 rx_nPackets += apackets;
631 RX_TS_FPQ_COMPUTE_LIMITS;
632 MUTEX_EXIT(&rx_packets_mutex);
634 for (e = p + apackets; p < e; p++) {
635 RX_PACKET_IOV_INIT(p);
637 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
640 MUTEX_ENTER(&rx_freePktQ_lock);
641 #ifdef RXDEBUG_PACKET
642 p->packetId = rx_packet_id++;
643 p->allNextp = rx_mallocedP;
644 #endif /* RXDEBUG_PACKET */
646 MUTEX_EXIT(&rx_freePktQ_lock);
649 rx_ts_info->_FPQ.delta += apackets;
652 (num_keep_local < apackets)) {
654 MUTEX_ENTER(&rx_freePktQ_lock);
656 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
657 rxi_NeedMorePackets = FALSE;
660 MUTEX_EXIT(&rx_freePktQ_lock);
664 #endif /* RX_ENABLE_TSFPQ */
667 /* Add more packet buffers */
669 rxi_MorePacketsNoLock(int apackets)
671 #ifdef RX_ENABLE_TSFPQ
672 struct rx_ts_info_t * rx_ts_info;
673 #endif /* RX_ENABLE_TSFPQ */
674 struct rx_packet *p, *e;
677 /* allocate enough packets that 1/4 of the packets will be able
678 * to hold maximal amounts of data */
679 apackets += (apackets / 4)
680 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
682 getme = apackets * sizeof(struct rx_packet);
683 p = (struct rx_packet *)osi_Alloc(getme);
685 apackets -= apackets / 4;
686 osi_Assert(apackets > 0);
691 #ifdef RX_ENABLE_TSFPQ
692 RX_TS_INFO_GET(rx_ts_info);
693 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
694 #endif /* RX_ENABLE_TSFPQ */
696 for (e = p + apackets; p < e; p++) {
697 RX_PACKET_IOV_INIT(p);
698 #ifdef RX_TRACK_PACKETS
699 p->flags |= RX_PKTFLAG_FREE;
703 queue_Append(&rx_freePacketQueue, p);
704 #ifdef RXDEBUG_PACKET
705 p->packetId = rx_packet_id++;
706 p->allNextp = rx_mallocedP;
707 #endif /* RXDEBUG_PACKET */
711 rx_nFreePackets += apackets;
712 MUTEX_ENTER(&rx_packets_mutex);
713 rx_nPackets += apackets;
714 #ifdef RX_ENABLE_TSFPQ
715 RX_TS_FPQ_COMPUTE_LIMITS;
716 #endif /* RX_ENABLE_TSFPQ */
717 MUTEX_EXIT(&rx_packets_mutex);
718 rxi_NeedMorePackets = FALSE;
724 rxi_FreeAllPackets(void)
726 /* must be called at proper interrupt level, etcetera */
727 /* MTUXXX need to free all Packets */
728 osi_Free(rx_mallocedP,
729 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
730 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
733 #ifdef RX_ENABLE_TSFPQ
735 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
737 struct rx_ts_info_t * rx_ts_info;
741 RX_TS_INFO_GET(rx_ts_info);
743 if (num_keep_local != rx_ts_info->_FPQ.len) {
745 MUTEX_ENTER(&rx_freePktQ_lock);
746 if (num_keep_local < rx_ts_info->_FPQ.len) {
747 xfer = rx_ts_info->_FPQ.len - num_keep_local;
748 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
751 xfer = num_keep_local - rx_ts_info->_FPQ.len;
752 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
753 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
754 if (rx_nFreePackets < xfer) {
755 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
757 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
759 MUTEX_EXIT(&rx_freePktQ_lock);
765 rxi_FlushLocalPacketsTSFPQ(void)
767 rxi_AdjustLocalPacketsTSFPQ(0, 0);
769 #endif /* RX_ENABLE_TSFPQ */
771 /* Allocate more packets iff we need more continuation buffers */
772 /* In kernel, can't page in memory with interrupts disabled, so we
773 * don't use the event mechanism. */
775 rx_CheckPackets(void)
777 if (rxi_NeedMorePackets) {
778 rxi_MorePackets(rx_maxSendWindow);
782 /* In the packet freeing routine below, the assumption is that
783 we want all of the packets to be used equally frequently, so that we
784 don't get packet buffers paging out. It would be just as valid to
785 assume that we DO want them to page out if not many are being used.
786 In any event, we assume the former, and append the packets to the end
788 /* This explanation is bogus. The free list doesn't remain in any kind of
789 useful order for afs_int32: the packets in use get pretty much randomly scattered
790 across all the pages. In order to permit unused {packets,bufs} to page out, they
791 must be stored so that packets which are adjacent in memory are adjacent in the
792 free list. An array springs rapidly to mind.
795 /* Actually free the packet p. */
796 #ifdef RX_ENABLE_TSFPQ
798 rxi_FreePacketNoLock(struct rx_packet *p)
800 struct rx_ts_info_t * rx_ts_info;
801 dpf(("Free %"AFS_PTR_FMT"\n", p));
803 RX_TS_INFO_GET(rx_ts_info);
804 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
805 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
806 RX_TS_FPQ_LTOG(rx_ts_info);
809 #else /* RX_ENABLE_TSFPQ */
811 rxi_FreePacketNoLock(struct rx_packet *p)
813 dpf(("Free %"AFS_PTR_FMT"\n", p));
817 queue_Append(&rx_freePacketQueue, p);
819 #endif /* RX_ENABLE_TSFPQ */
821 #ifdef RX_ENABLE_TSFPQ
823 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
825 struct rx_ts_info_t * rx_ts_info;
826 dpf(("Free %"AFS_PTR_FMT"\n", p));
828 RX_TS_INFO_GET(rx_ts_info);
829 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
831 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
833 MUTEX_ENTER(&rx_freePktQ_lock);
835 RX_TS_FPQ_LTOG(rx_ts_info);
837 /* Wakeup anyone waiting for packets */
840 MUTEX_EXIT(&rx_freePktQ_lock);
844 #endif /* RX_ENABLE_TSFPQ */
847 * free continuation buffers off a packet into a queue
849 * [IN] p -- packet from which continuation buffers will be freed
850 * [IN] first -- iovec offset of first continuation buffer to free
851 * [IN] q -- queue into which continuation buffers will be chained
854 * number of continuation buffers freed
856 #ifndef RX_ENABLE_TSFPQ
858 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
861 struct rx_packet * cb;
864 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
865 iov = &p->wirevec[first];
867 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
868 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
869 RX_FPQ_MARK_FREE(cb);
880 * free packet continuation buffers into the global free packet pool
882 * [IN] p -- packet from which to free continuation buffers
883 * [IN] first -- iovec offset of first continuation buffer to free
889 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
893 for (first = MAX(2, first); first < p->niovecs; first++) {
894 iov = &p->wirevec[first];
896 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
897 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
905 #ifdef RX_ENABLE_TSFPQ
907 * free packet continuation buffers into the thread-local free pool
909 * [IN] p -- packet from which continuation buffers will be freed
910 * [IN] first -- iovec offset of first continuation buffer to free
911 * any value less than 2, the min number of iovecs,
912 * is treated as if it is 2.
913 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
914 * global free pool before returning
920 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
923 struct rx_ts_info_t * rx_ts_info;
925 RX_TS_INFO_GET(rx_ts_info);
927 for (first = MAX(2, first); first < p->niovecs; first++) {
928 iov = &p->wirevec[first];
930 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
931 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
936 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
938 MUTEX_ENTER(&rx_freePktQ_lock);
940 RX_TS_FPQ_LTOG(rx_ts_info);
942 /* Wakeup anyone waiting for packets */
945 MUTEX_EXIT(&rx_freePktQ_lock);
950 #endif /* RX_ENABLE_TSFPQ */
952 int rxi_nBadIovecs = 0;
954 /* rxi_RestoreDataBufs
956 * Restore the correct sizes to the iovecs. Called when reusing a packet
957 * for reading off the wire.
960 rxi_RestoreDataBufs(struct rx_packet *p)
963 struct iovec *iov = &p->wirevec[2];
965 RX_PACKET_IOV_INIT(p);
967 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
968 if (!iov->iov_base) {
973 iov->iov_len = RX_CBUFFERSIZE;
977 #ifdef RX_ENABLE_TSFPQ
979 rxi_TrimDataBufs(struct rx_packet *p, int first)
982 struct iovec *iov, *end;
983 struct rx_ts_info_t * rx_ts_info;
987 osi_Panic("TrimDataBufs 1: first must be 1");
989 /* Skip over continuation buffers containing message data */
990 iov = &p->wirevec[2];
991 end = iov + (p->niovecs - 2);
992 length = p->length - p->wirevec[1].iov_len;
993 for (; iov < end && length > 0; iov++) {
995 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
996 length -= iov->iov_len;
999 /* iov now points to the first empty data buffer. */
1003 RX_TS_INFO_GET(rx_ts_info);
1004 for (; iov < end; iov++) {
1006 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1007 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1010 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1012 MUTEX_ENTER(&rx_freePktQ_lock);
1014 RX_TS_FPQ_LTOG(rx_ts_info);
1015 rxi_PacketsUnWait();
1017 MUTEX_EXIT(&rx_freePktQ_lock);
1023 #else /* RX_ENABLE_TSFPQ */
1025 rxi_TrimDataBufs(struct rx_packet *p, int first)
1028 struct iovec *iov, *end;
1032 osi_Panic("TrimDataBufs 1: first must be 1");
1034 /* Skip over continuation buffers containing message data */
1035 iov = &p->wirevec[2];
1036 end = iov + (p->niovecs - 2);
1037 length = p->length - p->wirevec[1].iov_len;
1038 for (; iov < end && length > 0; iov++) {
1040 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1041 length -= iov->iov_len;
1044 /* iov now points to the first empty data buffer. */
1049 MUTEX_ENTER(&rx_freePktQ_lock);
1051 for (; iov < end; iov++) {
1053 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1054 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1057 rxi_PacketsUnWait();
1059 MUTEX_EXIT(&rx_freePktQ_lock);
1064 #endif /* RX_ENABLE_TSFPQ */
1066 /* Free the packet p. P is assumed not to be on any queue, i.e.
1067 * remove it yourself first if you call this routine. */
1068 #ifdef RX_ENABLE_TSFPQ
1070 rxi_FreePacket(struct rx_packet *p)
1072 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1073 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1075 #else /* RX_ENABLE_TSFPQ */
1077 rxi_FreePacket(struct rx_packet *p)
1082 MUTEX_ENTER(&rx_freePktQ_lock);
1084 rxi_FreeDataBufsNoLock(p, 2);
1085 rxi_FreePacketNoLock(p);
1086 /* Wakeup anyone waiting for packets */
1087 rxi_PacketsUnWait();
1089 MUTEX_EXIT(&rx_freePktQ_lock);
1092 #endif /* RX_ENABLE_TSFPQ */
1094 /* rxi_AllocPacket sets up p->length so it reflects the number of
1095 * bytes in the packet at this point, **not including** the header.
1096 * The header is absolutely necessary, besides, this is the way the
1097 * length field is usually used */
1098 #ifdef RX_ENABLE_TSFPQ
1100 rxi_AllocPacketNoLock(int class)
1102 struct rx_packet *p;
1103 struct rx_ts_info_t * rx_ts_info;
1105 RX_TS_INFO_GET(rx_ts_info);
1108 if (rxi_OverQuota(class)) {
1109 rxi_NeedMorePackets = TRUE;
1110 if (rx_stats_active) {
1112 case RX_PACKET_CLASS_RECEIVE:
1113 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1115 case RX_PACKET_CLASS_SEND:
1116 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1118 case RX_PACKET_CLASS_SPECIAL:
1119 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1121 case RX_PACKET_CLASS_RECV_CBUF:
1122 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1124 case RX_PACKET_CLASS_SEND_CBUF:
1125 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1129 return (struct rx_packet *)0;
1133 if (rx_stats_active)
1134 rx_atomic_inc(&rx_stats.packetRequests);
1135 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1138 if (queue_IsEmpty(&rx_freePacketQueue))
1139 osi_Panic("rxi_AllocPacket error");
1141 if (queue_IsEmpty(&rx_freePacketQueue))
1142 rxi_MorePacketsNoLock(rx_maxSendWindow);
1146 RX_TS_FPQ_GTOL(rx_ts_info);
1149 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1151 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1154 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1155 * order to truncate outbound packets. In the near future, may need
1156 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1158 RX_PACKET_IOV_FULLINIT(p);
1161 #else /* RX_ENABLE_TSFPQ */
1163 rxi_AllocPacketNoLock(int class)
1165 struct rx_packet *p;
1168 if (rxi_OverQuota(class)) {
1169 rxi_NeedMorePackets = TRUE;
1170 if (rx_stats_active) {
1172 case RX_PACKET_CLASS_RECEIVE:
1173 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1175 case RX_PACKET_CLASS_SEND:
1176 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1178 case RX_PACKET_CLASS_SPECIAL:
1179 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1181 case RX_PACKET_CLASS_RECV_CBUF:
1182 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1184 case RX_PACKET_CLASS_SEND_CBUF:
1185 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1189 return (struct rx_packet *)0;
1193 if (rx_stats_active)
1194 rx_atomic_inc(&rx_stats.packetRequests);
1197 if (queue_IsEmpty(&rx_freePacketQueue))
1198 osi_Panic("rxi_AllocPacket error");
1200 if (queue_IsEmpty(&rx_freePacketQueue))
1201 rxi_MorePacketsNoLock(rx_maxSendWindow);
1205 p = queue_First(&rx_freePacketQueue, rx_packet);
1207 RX_FPQ_MARK_USED(p);
1209 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1212 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1213 * order to truncate outbound packets. In the near future, may need
1214 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1216 RX_PACKET_IOV_FULLINIT(p);
1219 #endif /* RX_ENABLE_TSFPQ */
1221 #ifdef RX_ENABLE_TSFPQ
1223 rxi_AllocPacketTSFPQ(int class, int pull_global)
1225 struct rx_packet *p;
1226 struct rx_ts_info_t * rx_ts_info;
1228 RX_TS_INFO_GET(rx_ts_info);
1230 if (rx_stats_active)
1231 rx_atomic_inc(&rx_stats.packetRequests);
1232 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1233 MUTEX_ENTER(&rx_freePktQ_lock);
1235 if (queue_IsEmpty(&rx_freePacketQueue))
1236 rxi_MorePacketsNoLock(rx_maxSendWindow);
1238 RX_TS_FPQ_GTOL(rx_ts_info);
1240 MUTEX_EXIT(&rx_freePktQ_lock);
1241 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1245 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1247 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1249 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1250 * order to truncate outbound packets. In the near future, may need
1251 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1253 RX_PACKET_IOV_FULLINIT(p);
1256 #endif /* RX_ENABLE_TSFPQ */
1258 #ifdef RX_ENABLE_TSFPQ
1260 rxi_AllocPacket(int class)
1262 struct rx_packet *p;
1264 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1267 #else /* RX_ENABLE_TSFPQ */
1269 rxi_AllocPacket(int class)
1271 struct rx_packet *p;
1273 MUTEX_ENTER(&rx_freePktQ_lock);
1274 p = rxi_AllocPacketNoLock(class);
1275 MUTEX_EXIT(&rx_freePktQ_lock);
1278 #endif /* RX_ENABLE_TSFPQ */
1280 /* This guy comes up with as many buffers as it {takes,can get} given
1281 * the MTU for this call. It also sets the packet length before
1282 * returning. caution: this is often called at NETPRI
1283 * Called with call locked.
1286 rxi_AllocSendPacket(struct rx_call *call, int want)
1288 struct rx_packet *p = (struct rx_packet *)0;
1293 mud = call->MTU - RX_HEADER_SIZE;
1295 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1296 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1298 #ifdef RX_ENABLE_TSFPQ
1299 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1301 want = MIN(want, mud);
1303 if ((unsigned)want > p->length)
1304 (void)rxi_AllocDataBuf(p, (want - p->length),
1305 RX_PACKET_CLASS_SEND_CBUF);
1307 if (p->length > mud)
1310 if (delta >= p->length) {
1318 #endif /* RX_ENABLE_TSFPQ */
1320 while (!(call->error)) {
1321 MUTEX_ENTER(&rx_freePktQ_lock);
1322 /* if an error occurred, or we get the packet we want, we're done */
1323 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1324 MUTEX_EXIT(&rx_freePktQ_lock);
1327 want = MIN(want, mud);
1329 if ((unsigned)want > p->length)
1330 (void)rxi_AllocDataBuf(p, (want - p->length),
1331 RX_PACKET_CLASS_SEND_CBUF);
1333 if (p->length > mud)
1336 if (delta >= p->length) {
1345 /* no error occurred, and we didn't get a packet, so we sleep.
1346 * At this point, we assume that packets will be returned
1347 * sooner or later, as packets are acknowledged, and so we
1350 call->flags |= RX_CALL_WAIT_PACKETS;
1351 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1352 MUTEX_EXIT(&call->lock);
1353 rx_waitingForPackets = 1;
1355 #ifdef RX_ENABLE_LOCKS
1356 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1358 osi_rxSleep(&rx_waitingForPackets);
1360 MUTEX_EXIT(&rx_freePktQ_lock);
1361 MUTEX_ENTER(&call->lock);
1362 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1363 call->flags &= ~RX_CALL_WAIT_PACKETS;
1372 /* Windows does not use file descriptors. */
1373 #define CountFDs(amax) 0
1375 /* count the number of used FDs */
1384 for (i = 0; i < amax; i++) {
1385 code = fstat(i, &tstat);
1391 #endif /* AFS_NT40_ENV */
1394 #define CountFDs(amax) amax
1398 #if !defined(KERNEL) || defined(UKERNEL)
1400 /* This function reads a single packet from the interface into the
1401 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1402 * (host,port) of the sender are stored in the supplied variables, and
1403 * the data length of the packet is stored in the packet structure.
1404 * The header is decoded. */
1406 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1409 struct sockaddr_in from;
1412 afs_uint32 tlen, savelen;
1414 rx_computelen(p, tlen);
1415 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1417 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1418 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1419 * it once in order to avoid races. */
1422 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1430 /* Extend the last iovec for padding, it's just to make sure that the
1431 * read doesn't return more data than we expect, and is done to get around
1432 * our problems caused by the lack of a length field in the rx header.
1433 * Use the extra buffer that follows the localdata in each packet
1435 savelen = p->wirevec[p->niovecs - 1].iov_len;
1436 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1438 memset(&msg, 0, sizeof(msg));
1439 msg.msg_name = (char *)&from;
1440 msg.msg_namelen = sizeof(struct sockaddr_in);
1441 msg.msg_iov = p->wirevec;
1442 msg.msg_iovlen = p->niovecs;
1443 nbytes = rxi_Recvmsg(socket, &msg, 0);
1445 /* restore the vec to its correct state */
1446 p->wirevec[p->niovecs - 1].iov_len = savelen;
1448 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1449 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1450 if (nbytes < 0 && errno == EWOULDBLOCK) {
1451 if (rx_stats_active)
1452 rx_atomic_inc(&rx_stats.noPacketOnRead);
1453 } else if (nbytes <= 0) {
1454 if (rx_stats_active) {
1455 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1456 rx_stats.bogusHost = from.sin_addr.s_addr;
1458 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1459 ntohs(from.sin_port), nbytes));
1464 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1465 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1466 rxi_DecodePacketHeader(p);
1468 *host = from.sin_addr.s_addr;
1469 *port = from.sin_port;
1471 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1472 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1473 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1475 #ifdef RX_TRIMDATABUFS
1476 rxi_TrimDataBufs(p, 1);
1482 /* Extract packet header. */
1483 rxi_DecodePacketHeader(p);
1485 *host = from.sin_addr.s_addr;
1486 *port = from.sin_port;
1487 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1488 if (rx_stats_active) {
1489 struct rx_peer *peer;
1490 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1492 * Try to look up this peer structure. If it doesn't exist,
1493 * don't create a new one -
1494 * we don't keep count of the bytes sent/received if a peer
1495 * structure doesn't already exist.
1497 * The peer/connection cleanup code assumes that there is 1 peer
1498 * per connection. If we actually created a peer structure here
1499 * and this packet was an rxdebug packet, the peer structure would
1500 * never be cleaned up.
1502 peer = rxi_FindPeer(*host, *port, 0, 0);
1503 /* Since this may not be associated with a connection,
1504 * it may have no refCount, meaning we could race with
1507 if (peer && (peer->refCount > 0)) {
1508 MUTEX_ENTER(&peer->peer_lock);
1509 hadd32(peer->bytesReceived, p->length);
1510 MUTEX_EXIT(&peer->peer_lock);
1515 #ifdef RX_TRIMDATABUFS
1516 /* Free any empty packet buffers at the end of this packet */
1517 rxi_TrimDataBufs(p, 1);
1523 #endif /* !KERNEL || UKERNEL */
1525 /* This function splits off the first packet in a jumbo packet.
1526 * As of AFS 3.5, jumbograms contain more than one fixed size
1527 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1528 * last packet header. All packets (except the last) are padded to
1529 * fall on RX_CBUFFERSIZE boundaries.
1530 * HACK: We store the length of the first n-1 packets in the
1531 * last two pad bytes. */
1534 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1537 struct rx_packet *np;
1538 struct rx_jumboHeader *jp;
1544 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1545 * bytes in length. All but the first packet are preceded by
1546 * an abbreviated four byte header. The length of the last packet
1547 * is calculated from the size of the jumbogram. */
1548 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1550 if ((int)p->length < length) {
1551 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1554 niov = p->niovecs - 2;
1556 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1559 iov = &p->wirevec[2];
1560 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1562 /* Get a pointer to the abbreviated packet header */
1563 jp = (struct rx_jumboHeader *)
1564 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1566 /* Set up the iovecs for the next packet */
1567 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1568 np->wirevec[0].iov_len = sizeof(struct rx_header);
1569 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1570 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1571 np->niovecs = niov + 1;
1572 for (i = 2, iov++; i <= niov; i++, iov++) {
1573 np->wirevec[i] = *iov;
1575 np->length = p->length - length;
1576 p->length = RX_JUMBOBUFFERSIZE;
1579 /* Convert the jumbo packet header to host byte order */
1580 temp = ntohl(*(afs_uint32 *) jp);
1581 jp->flags = (u_char) (temp >> 24);
1582 jp->cksum = (u_short) (temp);
1584 /* Fill in the packet header */
1585 np->header = p->header;
1586 np->header.serial = p->header.serial + 1;
1587 np->header.seq = p->header.seq + 1;
1588 np->header.flags = jp->flags;
1589 np->header.spare = jp->cksum;
1595 /* Send a udp datagram */
1597 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1598 int length, int istack)
1603 memset(&msg, 0, sizeof(msg));
1605 msg.msg_iovlen = nvecs;
1606 msg.msg_name = addr;
1607 msg.msg_namelen = sizeof(struct sockaddr_in);
1609 ret = rxi_Sendmsg(socket, &msg, 0);
1613 #elif !defined(UKERNEL)
1615 * message receipt is done in rxk_input or rx_put.
1618 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1620 * Copy an mblock to the contiguous area pointed to by cp.
1621 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1622 * but it doesn't really.
1623 * Returns the number of bytes not transferred.
1624 * The message is NOT changed.
1627 cpytoc(mblk_t * mp, int off, int len, char *cp)
1631 for (; mp && len > 0; mp = mp->b_cont) {
1632 if (mp->b_datap->db_type != M_DATA) {
1635 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1636 memcpy(cp, (char *)mp->b_rptr, n);
1644 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1645 * but it doesn't really.
1646 * This sucks, anyway, do it like m_cpy.... below
1649 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1654 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1655 if (mp->b_datap->db_type != M_DATA) {
1658 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1664 t = iovs[i].iov_len;
1667 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1677 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1678 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1680 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1682 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1685 unsigned int l1, l2, i, t;
1687 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1688 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1691 if (m->m_len <= off) {
1701 p1 = mtod(m, caddr_t) + off;
1702 l1 = m->m_len - off;
1704 p2 = iovs[0].iov_base;
1705 l2 = iovs[0].iov_len;
1708 t = MIN(l1, MIN(l2, (unsigned int)len));
1719 p1 = mtod(m, caddr_t);
1725 p2 = iovs[i].iov_base;
1726 l2 = iovs[i].iov_len;
1734 #endif /* AFS_SUN5_ENV */
1736 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1737 #if defined(AFS_NBSD_ENV)
1739 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1742 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1743 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1749 struct rx_packet *phandle;
1750 int hdr_len, data_len;
1751 #endif /* AFS_NBSD_ENV */
1756 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1763 #endif /*KERNEL && !UKERNEL */
1766 /* send a response to a debug packet */
1769 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1770 afs_uint32 ahost, short aport, int istack)
1772 struct rx_debugIn tin;
1774 struct rx_serverQueueEntry *np, *nqe;
1777 * Only respond to client-initiated Rx debug packets,
1778 * and clear the client flag in the response.
1780 if (ap->header.flags & RX_CLIENT_INITIATED) {
1781 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1782 rxi_EncodePacketHeader(ap);
1787 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1788 /* all done with packet, now set length to the truth, so we can
1789 * reuse this packet */
1790 rx_computelen(ap, ap->length);
1792 tin.type = ntohl(tin.type);
1793 tin.index = ntohl(tin.index);
1795 case RX_DEBUGI_GETSTATS:{
1796 struct rx_debugStats tstat;
1798 /* get basic stats */
1799 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1800 tstat.version = RX_DEBUGI_VERSION;
1801 #ifndef RX_ENABLE_LOCKS
1802 tstat.waitingForPackets = rx_waitingForPackets;
1804 MUTEX_ENTER(&rx_serverPool_lock);
1805 tstat.nFreePackets = htonl(rx_nFreePackets);
1806 tstat.nPackets = htonl(rx_nPackets);
1807 tstat.callsExecuted = htonl(rxi_nCalls);
1808 tstat.packetReclaims = htonl(rx_packetReclaims);
1809 tstat.usedFDs = CountFDs(64);
1810 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1811 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1812 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1814 MUTEX_EXIT(&rx_serverPool_lock);
1815 tstat.idleThreads = htonl(tstat.idleThreads);
1816 tl = sizeof(struct rx_debugStats) - ap->length;
1818 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1821 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1823 ap->length = sizeof(struct rx_debugStats);
1824 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1825 rx_computelen(ap, ap->length);
1830 case RX_DEBUGI_GETALLCONN:
1831 case RX_DEBUGI_GETCONN:{
1833 struct rx_connection *tc;
1834 struct rx_call *tcall;
1835 struct rx_debugConn tconn;
1836 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1839 tl = sizeof(struct rx_debugConn) - ap->length;
1841 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1845 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1846 /* get N'th (maybe) "interesting" connection info */
1847 for (i = 0; i < rx_hashTableSize; i++) {
1848 #if !defined(KERNEL)
1849 /* the time complexity of the algorithm used here
1850 * exponentially increses with the number of connections.
1852 #ifdef AFS_PTHREAD_ENV
1858 MUTEX_ENTER(&rx_connHashTable_lock);
1859 /* We might be slightly out of step since we are not
1860 * locking each call, but this is only debugging output.
1862 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1863 if ((all || rxi_IsConnInteresting(tc))
1864 && tin.index-- <= 0) {
1865 tconn.host = tc->peer->host;
1866 tconn.port = tc->peer->port;
1867 tconn.cid = htonl(tc->cid);
1868 tconn.epoch = htonl(tc->epoch);
1869 tconn.serial = htonl(tc->serial);
1870 for (j = 0; j < RX_MAXCALLS; j++) {
1871 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1872 if ((tcall = tc->call[j])) {
1873 tconn.callState[j] = tcall->state;
1874 tconn.callMode[j] = tcall->mode;
1875 tconn.callFlags[j] = tcall->flags;
1876 if (queue_IsNotEmpty(&tcall->rq))
1877 tconn.callOther[j] |= RX_OTHER_IN;
1878 if (queue_IsNotEmpty(&tcall->tq))
1879 tconn.callOther[j] |= RX_OTHER_OUT;
1881 tconn.callState[j] = RX_STATE_NOTINIT;
1884 tconn.natMTU = htonl(tc->peer->natMTU);
1885 tconn.error = htonl(tc->error);
1886 tconn.flags = tc->flags;
1887 tconn.type = tc->type;
1888 tconn.securityIndex = tc->securityIndex;
1889 if (tc->securityObject) {
1890 RXS_GetStats(tc->securityObject, tc,
1892 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1893 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1896 DOHTONL(packetsReceived);
1897 DOHTONL(packetsSent);
1898 DOHTONL(bytesReceived);
1902 sizeof(tconn.secStats.spares) /
1907 sizeof(tconn.secStats.sparel) /
1908 sizeof(afs_int32); i++)
1912 MUTEX_EXIT(&rx_connHashTable_lock);
1913 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1916 ap->length = sizeof(struct rx_debugConn);
1917 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1923 MUTEX_EXIT(&rx_connHashTable_lock);
1925 /* if we make it here, there are no interesting packets */
1926 tconn.cid = htonl(0xffffffff); /* means end */
1927 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1930 ap->length = sizeof(struct rx_debugConn);
1931 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1937 * Pass back all the peer structures we have available
1940 case RX_DEBUGI_GETPEER:{
1943 struct rx_debugPeer tpeer;
1946 tl = sizeof(struct rx_debugPeer) - ap->length;
1948 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1952 memset(&tpeer, 0, sizeof(tpeer));
1953 for (i = 0; i < rx_hashTableSize; i++) {
1954 #if !defined(KERNEL)
1955 /* the time complexity of the algorithm used here
1956 * exponentially increses with the number of peers.
1958 * Yielding after processing each hash table entry
1959 * and dropping rx_peerHashTable_lock.
1960 * also increases the risk that we will miss a new
1961 * entry - but we are willing to live with this
1962 * limitation since this is meant for debugging only
1964 #ifdef AFS_PTHREAD_ENV
1970 MUTEX_ENTER(&rx_peerHashTable_lock);
1971 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1972 if (tin.index-- <= 0) {
1974 MUTEX_EXIT(&rx_peerHashTable_lock);
1976 MUTEX_ENTER(&tp->peer_lock);
1977 tpeer.host = tp->host;
1978 tpeer.port = tp->port;
1979 tpeer.ifMTU = htons(tp->ifMTU);
1980 tpeer.idleWhen = htonl(tp->idleWhen);
1981 tpeer.refCount = htons(tp->refCount);
1982 tpeer.burstSize = tp->burstSize;
1983 tpeer.burst = tp->burst;
1984 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1985 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1986 tpeer.rtt = htonl(tp->rtt);
1987 tpeer.rtt_dev = htonl(tp->rtt_dev);
1988 tpeer.nSent = htonl(tp->nSent);
1989 tpeer.reSends = htonl(tp->reSends);
1990 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1991 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1992 tpeer.natMTU = htons(tp->natMTU);
1993 tpeer.maxMTU = htons(tp->maxMTU);
1994 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1995 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1996 tpeer.MTU = htons(tp->MTU);
1997 tpeer.cwind = htons(tp->cwind);
1998 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1999 tpeer.congestSeq = htons(tp->congestSeq);
2000 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2001 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2002 tpeer.bytesReceived.high =
2003 htonl(tp->bytesReceived.high);
2004 tpeer.bytesReceived.low =
2005 htonl(tp->bytesReceived.low);
2006 MUTEX_EXIT(&tp->peer_lock);
2008 MUTEX_ENTER(&rx_peerHashTable_lock);
2010 MUTEX_EXIT(&rx_peerHashTable_lock);
2012 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2015 ap->length = sizeof(struct rx_debugPeer);
2016 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2022 MUTEX_EXIT(&rx_peerHashTable_lock);
2024 /* if we make it here, there are no interesting packets */
2025 tpeer.host = htonl(0xffffffff); /* means end */
2026 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2029 ap->length = sizeof(struct rx_debugPeer);
2030 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2035 case RX_DEBUGI_RXSTATS:{
2039 tl = sizeof(rx_stats) - ap->length;
2041 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2045 /* Since its all int32s convert to network order with a loop. */
2046 if (rx_stats_active)
2047 MUTEX_ENTER(&rx_stats_mutex);
2048 s = (afs_int32 *) & rx_stats;
2049 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2050 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2053 ap->length = sizeof(rx_stats);
2054 if (rx_stats_active)
2055 MUTEX_EXIT(&rx_stats_mutex);
2056 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2062 /* error response packet */
2063 tin.type = htonl(RX_DEBUGI_BADTYPE);
2064 tin.index = tin.type;
2065 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2067 ap->length = sizeof(struct rx_debugIn);
2068 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2076 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2077 afs_uint32 ahost, short aport, int istack)
2082 * Only respond to client-initiated version requests, and
2083 * clear that flag in the response.
2085 if (ap->header.flags & RX_CLIENT_INITIATED) {
2088 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2089 rxi_EncodePacketHeader(ap);
2090 memset(buf, 0, sizeof(buf));
2091 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2092 rx_packetwrite(ap, 0, 65, buf);
2095 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2103 /* send a debug packet back to the sender */
2105 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2106 afs_uint32 ahost, short aport, afs_int32 istack)
2108 struct sockaddr_in taddr;
2109 unsigned int i, nbytes, savelen = 0;
2112 int waslocked = ISAFS_GLOCK();
2115 taddr.sin_family = AF_INET;
2116 taddr.sin_port = aport;
2117 taddr.sin_addr.s_addr = ahost;
2118 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2119 taddr.sin_len = sizeof(struct sockaddr_in);
2122 /* We need to trim the niovecs. */
2123 nbytes = apacket->length;
2124 for (i = 1; i < apacket->niovecs; i++) {
2125 if (nbytes <= apacket->wirevec[i].iov_len) {
2126 savelen = apacket->wirevec[i].iov_len;
2127 saven = apacket->niovecs;
2128 apacket->wirevec[i].iov_len = nbytes;
2129 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2131 nbytes -= apacket->wirevec[i].iov_len;
2134 #ifdef RX_KERNEL_TRACE
2135 if (ICL_SETACTIVE(afs_iclSetp)) {
2138 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2139 "before osi_NetSend()");
2147 /* debug packets are not reliably delivered, hence the cast below. */
2148 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2149 apacket->length + RX_HEADER_SIZE, istack);
2151 #ifdef RX_KERNEL_TRACE
2152 if (ICL_SETACTIVE(afs_iclSetp)) {
2154 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2155 "after osi_NetSend()");
2164 if (saven) { /* means we truncated the packet above. */
2165 apacket->wirevec[i - 1].iov_len = savelen;
2166 apacket->niovecs = saven;
2171 /* Send the packet to appropriate destination for the specified
2172 * call. The header is first encoded and placed in the packet.
2175 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2176 struct rx_packet *p, int istack)
2182 struct sockaddr_in addr;
2183 struct rx_peer *peer = conn->peer;
2186 char deliveryType = 'S';
2188 /* The address we're sending the packet to */
2189 memset(&addr, 0, sizeof(addr));
2190 addr.sin_family = AF_INET;
2191 addr.sin_port = peer->port;
2192 addr.sin_addr.s_addr = peer->host;
2194 /* This stuff should be revamped, I think, so that most, if not
2195 * all, of the header stuff is always added here. We could
2196 * probably do away with the encode/decode routines. XXXXX */
2198 /* Stamp each packet with a unique serial number. The serial
2199 * number is maintained on a connection basis because some types
2200 * of security may be based on the serial number of the packet,
2201 * and security is handled on a per authenticated-connection
2203 /* Pre-increment, to guarantee no zero serial number; a zero
2204 * serial number means the packet was never sent. */
2205 MUTEX_ENTER(&conn->conn_data_lock);
2206 p->header.serial = ++conn->serial;
2207 if (p->length > conn->peer->maxPacketSize) {
2208 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2209 (p->header.flags & RX_REQUEST_ACK)) {
2210 conn->lastPingSize = p->length;
2211 conn->lastPingSizeSer = p->header.serial;
2212 } else if (p->header.seq != 0) {
2213 conn->lastPacketSize = p->length;
2214 conn->lastPacketSizeSeq = p->header.seq;
2217 MUTEX_EXIT(&conn->conn_data_lock);
2218 /* This is so we can adjust retransmit time-outs better in the face of
2219 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2221 if (p->firstSerial == 0) {
2222 p->firstSerial = p->header.serial;
2225 /* If an output tracer function is defined, call it with the packet and
2226 * network address. Note this function may modify its arguments. */
2227 if (rx_almostSent) {
2228 int drop = (*rx_almostSent) (p, &addr);
2229 /* drop packet if return value is non-zero? */
2231 deliveryType = 'D'; /* Drop the packet */
2235 /* Get network byte order header */
2236 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2237 * touch ALL the fields */
2239 /* Send the packet out on the same socket that related packets are being
2243 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2246 /* Possibly drop this packet, for testing purposes */
2247 if ((deliveryType == 'D')
2248 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2249 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2250 deliveryType = 'D'; /* Drop the packet */
2252 deliveryType = 'S'; /* Send the packet */
2253 #endif /* RXDEBUG */
2255 /* Loop until the packet is sent. We'd prefer just to use a
2256 * blocking socket, but unfortunately the interface doesn't
2257 * allow us to have the socket block in send mode, and not
2258 * block in receive mode */
2260 waslocked = ISAFS_GLOCK();
2261 #ifdef RX_KERNEL_TRACE
2262 if (ICL_SETACTIVE(afs_iclSetp)) {
2265 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2266 "before osi_NetSend()");
2275 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2276 p->length + RX_HEADER_SIZE, istack)) != 0) {
2277 /* send failed, so let's hurry up the resend, eh? */
2278 if (rx_stats_active)
2279 rx_atomic_inc(&rx_stats.netSendFailures);
2280 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2282 /* Some systems are nice and tell us right away that we cannot
2283 * reach this recipient by returning an error code.
2284 * So, when this happens let's "down" the host NOW so
2285 * we don't sit around waiting for this host to timeout later.
2289 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2290 #elif defined(AFS_LINUX20_ENV)
2291 code == -ENETUNREACH
2292 #elif defined(AFS_DARWIN_ENV)
2293 code == EHOSTUNREACH
2298 call->lastReceiveTime = 0;
2301 #ifdef RX_KERNEL_TRACE
2302 if (ICL_SETACTIVE(afs_iclSetp)) {
2304 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2305 "after osi_NetSend()");
2316 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2317 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2318 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2319 p->header.seq, p->header.flags, p, p->length));
2321 if (rx_stats_active) {
2322 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2323 MUTEX_ENTER(&peer->peer_lock);
2324 hadd32(peer->bytesSent, p->length);
2325 MUTEX_EXIT(&peer->peer_lock);
2329 /* Send a list of packets to appropriate destination for the specified
2330 * connection. The headers are first encoded and placed in the packets.
2333 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2334 struct rx_packet **list, int len, int istack)
2336 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2339 struct sockaddr_in addr;
2340 struct rx_peer *peer = conn->peer;
2342 struct rx_packet *p = NULL;
2343 struct iovec wirevec[RX_MAXIOVECS];
2344 int i, length, code;
2347 struct rx_jumboHeader *jp;
2349 char deliveryType = 'S';
2351 /* The address we're sending the packet to */
2352 addr.sin_family = AF_INET;
2353 addr.sin_port = peer->port;
2354 addr.sin_addr.s_addr = peer->host;
2356 if (len + 1 > RX_MAXIOVECS) {
2357 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2361 * Stamp the packets in this jumbogram with consecutive serial numbers
2363 MUTEX_ENTER(&conn->conn_data_lock);
2364 serial = conn->serial;
2365 conn->serial += len;
2366 for (i = 0; i < len; i++) {
2368 if (p->length > conn->peer->maxPacketSize) {
2369 /* a ping *or* a sequenced packet can count */
2370 if ((p->length > conn->peer->maxPacketSize)) {
2371 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2372 (p->header.flags & RX_REQUEST_ACK)) &&
2373 ((i == 0) || (p->length >= conn->lastPingSize))) {
2374 conn->lastPingSize = p->length;
2375 conn->lastPingSizeSer = serial + i;
2376 } else if ((p->header.seq != 0) &&
2377 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2378 conn->lastPacketSize = p->length;
2379 conn->lastPacketSizeSeq = p->header.seq;
2384 MUTEX_EXIT(&conn->conn_data_lock);
2387 /* This stuff should be revamped, I think, so that most, if not
2388 * all, of the header stuff is always added here. We could
2389 * probably do away with the encode/decode routines. XXXXX */
2392 length = RX_HEADER_SIZE;
2393 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2394 wirevec[0].iov_len = RX_HEADER_SIZE;
2395 for (i = 0; i < len; i++) {
2398 /* The whole 3.5 jumbogram scheme relies on packets fitting
2399 * in a single packet buffer. */
2400 if (p->niovecs > 2) {
2401 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2404 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2407 if (p->length != RX_JUMBOBUFFERSIZE) {
2408 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2410 p->header.flags |= RX_JUMBO_PACKET;
2411 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2412 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2414 wirevec[i + 1].iov_len = p->length;
2415 length += p->length;
2417 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2419 /* Convert jumbo packet header to network byte order */
2420 temp = (afs_uint32) (p->header.flags) << 24;
2421 temp |= (afs_uint32) (p->header.spare);
2422 *(afs_uint32 *) jp = htonl(temp);
2424 jp = (struct rx_jumboHeader *)
2425 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2427 /* Stamp each packet with a unique serial number. The serial
2428 * number is maintained on a connection basis because some types
2429 * of security may be based on the serial number of the packet,
2430 * and security is handled on a per authenticated-connection
2432 /* Pre-increment, to guarantee no zero serial number; a zero
2433 * serial number means the packet was never sent. */
2434 p->header.serial = ++serial;
2435 /* This is so we can adjust retransmit time-outs better in the face of
2436 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2438 if (p->firstSerial == 0) {
2439 p->firstSerial = p->header.serial;
2442 /* If an output tracer function is defined, call it with the packet and
2443 * network address. Note this function may modify its arguments. */
2444 if (rx_almostSent) {
2445 int drop = (*rx_almostSent) (p, &addr);
2446 /* drop packet if return value is non-zero? */
2448 deliveryType = 'D'; /* Drop the packet */
2452 /* Get network byte order header */
2453 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2454 * touch ALL the fields */
2457 /* Send the packet out on the same socket that related packets are being
2461 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2464 /* Possibly drop this packet, for testing purposes */
2465 if ((deliveryType == 'D')
2466 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2467 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2468 deliveryType = 'D'; /* Drop the packet */
2470 deliveryType = 'S'; /* Send the packet */
2471 #endif /* RXDEBUG */
2473 /* Loop until the packet is sent. We'd prefer just to use a
2474 * blocking socket, but unfortunately the interface doesn't
2475 * allow us to have the socket block in send mode, and not
2476 * block in receive mode */
2477 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2478 waslocked = ISAFS_GLOCK();
2479 if (!istack && waslocked)
2483 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2485 /* send failed, so let's hurry up the resend, eh? */
2486 if (rx_stats_active)
2487 rx_atomic_inc(&rx_stats.netSendFailures);
2488 for (i = 0; i < len; i++) {
2490 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2492 /* Some systems are nice and tell us right away that we cannot
2493 * reach this recipient by returning an error code.
2494 * So, when this happens let's "down" the host NOW so
2495 * we don't sit around waiting for this host to timeout later.
2499 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2500 #elif defined(AFS_LINUX20_ENV)
2501 code == -ENETUNREACH
2502 #elif defined(AFS_DARWIN_ENV)
2503 code == EHOSTUNREACH
2508 call->lastReceiveTime = 0;
2510 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2511 if (!istack && waslocked)
2517 osi_Assert(p != NULL);
2519 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2520 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2521 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2522 p->header.seq, p->header.flags, p, p->length));
2525 if (rx_stats_active) {
2526 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2527 MUTEX_ENTER(&peer->peer_lock);
2528 hadd32(peer->bytesSent, p->length);
2529 MUTEX_EXIT(&peer->peer_lock);
2534 /* Send a "special" packet to the peer connection. If call is
2535 * specified, then the packet is directed to a specific call channel
2536 * associated with the connection, otherwise it is directed to the
2537 * connection only. Uses optionalPacket if it is supplied, rather than
2538 * allocating a new packet buffer. Nbytes is the length of the data
2539 * portion of the packet. If data is non-null, nbytes of data are
2540 * copied into the packet. Type is the type of the packet, as defined
2541 * in rx.h. Bug: there's a lot of duplication between this and other
2542 * routines. This needs to be cleaned up. */
2544 rxi_SendSpecial(struct rx_call *call,
2545 struct rx_connection *conn,
2546 struct rx_packet *optionalPacket, int type, char *data,
2547 int nbytes, int istack)
2549 /* Some of the following stuff should be common code for all
2550 * packet sends (it's repeated elsewhere) */
2551 struct rx_packet *p;
2553 int savelen = 0, saven = 0;
2554 int channel, callNumber;
2556 channel = call->channel;
2557 callNumber = *call->callNumber;
2558 /* BUSY packets refer to the next call on this connection */
2559 if (type == RX_PACKET_TYPE_BUSY) {
2568 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2570 osi_Panic("rxi_SendSpecial failure");
2577 p->header.serviceId = conn->serviceId;
2578 p->header.securityIndex = conn->securityIndex;
2579 p->header.cid = (conn->cid | channel);
2580 p->header.callNumber = callNumber;
2582 p->header.epoch = conn->epoch;
2583 p->header.type = type;
2584 p->header.flags = 0;
2585 if (conn->type == RX_CLIENT_CONNECTION)
2586 p->header.flags |= RX_CLIENT_INITIATED;
2588 rx_packetwrite(p, 0, nbytes, data);
2590 for (i = 1; i < p->niovecs; i++) {
2591 if (nbytes <= p->wirevec[i].iov_len) {
2592 savelen = p->wirevec[i].iov_len;
2594 p->wirevec[i].iov_len = nbytes;
2595 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2597 nbytes -= p->wirevec[i].iov_len;
2601 rxi_Send(call, p, istack);
2603 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2604 if (saven) { /* means we truncated the packet above. We probably don't */
2605 /* really need to do this, but it seems safer this way, given that */
2606 /* sneaky optionalPacket... */
2607 p->wirevec[i - 1].iov_len = savelen;
2610 if (!optionalPacket)
2612 return optionalPacket;
2616 /* Encode the packet's header (from the struct header in the packet to
2617 * the net byte order representation in the wire representation of the
2618 * packet, which is what is actually sent out on the wire) */
2620 rxi_EncodePacketHeader(struct rx_packet *p)
2622 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2624 memset(buf, 0, RX_HEADER_SIZE);
2625 *buf++ = htonl(p->header.epoch);
2626 *buf++ = htonl(p->header.cid);
2627 *buf++ = htonl(p->header.callNumber);
2628 *buf++ = htonl(p->header.seq);
2629 *buf++ = htonl(p->header.serial);
2630 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2631 | (((afs_uint32) p->header.flags) << 16)
2632 | (p->header.userStatus << 8) | p->header.securityIndex);
2633 /* Note: top 16 bits of this next word were reserved */
2634 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2637 /* Decode the packet's header (from net byte order to a struct header) */
2639 rxi_DecodePacketHeader(struct rx_packet *p)
2641 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2644 p->header.epoch = ntohl(*buf);
2646 p->header.cid = ntohl(*buf);
2648 p->header.callNumber = ntohl(*buf);
2650 p->header.seq = ntohl(*buf);
2652 p->header.serial = ntohl(*buf);
2658 /* C will truncate byte fields to bytes for me */
2659 p->header.type = temp >> 24;
2660 p->header.flags = temp >> 16;
2661 p->header.userStatus = temp >> 8;
2662 p->header.securityIndex = temp >> 0;
2667 p->header.serviceId = (temp & 0xffff);
2668 p->header.spare = temp >> 16;
2669 /* Note: top 16 bits of this last word are the security checksum */
2673 * LOCKS HELD: called with call->lock held.
2675 * PrepareSendPacket is the only place in the code that
2676 * can increment call->tnext. This could become an atomic
2677 * in the future. Beyond that there is nothing in this
2678 * function that requires the call being locked. This
2679 * function can only be called by the application thread.
2682 rxi_PrepareSendPacket(struct rx_call *call,
2683 struct rx_packet *p, int last)
2685 struct rx_connection *conn = call->conn;
2686 afs_uint32 seq = call->tnext++;
2688 afs_int32 len; /* len must be a signed type; it can go negative */
2690 /* No data packets on call 0. Where do these come from? */
2691 if (*call->callNumber == 0)
2692 *call->callNumber = 1;
2694 MUTEX_EXIT(&call->lock);
2695 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2697 p->header.cid = (conn->cid | call->channel);
2698 p->header.serviceId = conn->serviceId;
2699 p->header.securityIndex = conn->securityIndex;
2701 p->header.callNumber = *call->callNumber;
2702 p->header.seq = seq;
2703 p->header.epoch = conn->epoch;
2704 p->header.type = RX_PACKET_TYPE_DATA;
2705 p->header.flags = 0;
2706 p->header.spare = 0;
2707 if (conn->type == RX_CLIENT_CONNECTION)
2708 p->header.flags |= RX_CLIENT_INITIATED;
2711 p->header.flags |= RX_LAST_PACKET;
2713 clock_Zero(&p->firstSent); /* Never yet transmitted */
2714 p->header.serial = 0; /* Another way of saying never transmitted... */
2716 /* Now that we're sure this is the last data on the call, make sure
2717 * that the "length" and the sum of the iov_lens matches. */
2718 len = p->length + call->conn->securityHeaderSize;
2720 for (i = 1; i < p->niovecs && len > 0; i++) {
2721 len -= p->wirevec[i].iov_len;
2724 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2725 } else if (i < p->niovecs) {
2726 /* Free any extra elements in the wirevec */
2727 #if defined(RX_ENABLE_TSFPQ)
2728 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2729 #else /* !RX_ENABLE_TSFPQ */
2730 MUTEX_ENTER(&rx_freePktQ_lock);
2731 rxi_FreeDataBufsNoLock(p, i);
2732 MUTEX_EXIT(&rx_freePktQ_lock);
2733 #endif /* !RX_ENABLE_TSFPQ */
2738 p->wirevec[i - 1].iov_len += len;
2739 MUTEX_ENTER(&call->lock);
2740 RXS_PreparePacket(conn->securityObject, call, p);
2743 /* Given an interface MTU size, calculate an adjusted MTU size that
2744 * will make efficient use of the RX buffers when the peer is sending
2745 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2747 rxi_AdjustIfMTU(int mtu)
2752 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2754 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2755 if (mtu <= adjMTU) {
2762 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2763 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2766 /* Given an interface MTU size, and the peer's advertised max receive
2767 * size, calculate an adjisted maxMTU size that makes efficient use
2768 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2770 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2772 int maxMTU = mtu * rxi_nSendFrags;
2773 maxMTU = MIN(maxMTU, peerMaxMTU);
2774 return rxi_AdjustIfMTU(maxMTU);
2777 /* Given a packet size, figure out how many datagram packet will fit.
2778 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2779 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2780 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2782 rxi_AdjustDgramPackets(int frags, int mtu)
2785 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2788 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2789 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2790 /* subtract the size of the first and last packets */
2791 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2795 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2800 * This function can be used by the Windows Cache Manager
2801 * to dump the list of all rx packets so that we can determine
2802 * where the packet leakage is.
2804 int rx_DumpPackets(FILE *outputFile, char *cookie)
2806 #ifdef RXDEBUG_PACKET
2807 struct rx_packet *p;
2811 #define RXDPRINTF sprintf
2812 #define RXDPRINTOUT output
2814 #define RXDPRINTF fprintf
2815 #define RXDPRINTOUT outputFile
2819 MUTEX_ENTER(&rx_freePktQ_lock);
2820 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2822 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2825 for (p = rx_mallocedP; p; p = p->allNextp) {
2826 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2827 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2828 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2829 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2830 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2831 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2833 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2837 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2839 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2842 MUTEX_EXIT(&rx_freePktQ_lock);
2844 #endif /* RXDEBUG_PACKET */