2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 # include "afs/sysincludes.h"
23 # include "afsincludes.h"
24 # include "rx/rx_kcommon.h"
25 # include "rx/rx_clock.h"
26 # include "rx/rx_queue.h"
27 # include "rx/rx_packet.h"
28 # else /* defined(UKERNEL) */
29 # ifdef RX_KERNEL_TRACE
30 # include "../rx/rx_kcommon.h"
33 # ifndef AFS_LINUX20_ENV
36 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 # include "afs/sysincludes.h"
39 # if defined(AFS_OBSD_ENV)
42 # include "h/socket.h"
43 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 # include "sys/mount.h" /* it gets pulled in by something later anyway */
49 # include "netinet/in.h"
50 # include "afs/afs_osi.h"
51 # include "rx_kmutex.h"
52 # include "rx/rx_clock.h"
53 # include "rx/rx_queue.h"
55 # include <sys/sysmacros.h>
57 # include "rx/rx_packet.h"
58 # endif /* defined(UKERNEL) */
59 # include "rx/rx_internal.h"
60 # include "rx/rx_globals.h"
62 # include "sys/types.h"
63 # include <sys/stat.h>
65 # if defined(AFS_NT40_ENV)
66 # include <winsock2.h>
68 # define EWOULDBLOCK WSAEWOULDBLOCK
71 # include "rx_xmit_nt.h"
74 # include <sys/socket.h>
75 # include <netinet/in.h>
77 # include "rx_clock.h"
78 # include "rx_internal.h"
80 # include "rx_queue.h"
82 # include <sys/sysmacros.h>
84 # include "rx_packet.h"
85 # include "rx_globals.h"
95 /* rxdb_fileID is used to identify the lock location, along with line#. */
96 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
97 #endif /* RX_LOCKS_DB */
98 static struct rx_packet *rx_mallocedP = 0;
100 static afs_uint32 rx_packet_id = 0;
103 extern char cml_version_number[];
105 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
107 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
108 afs_int32 ahost, short aport,
111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
113 struct rx_queue * q);
114 #ifdef RX_ENABLE_TSFPQ
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 /* some rules about packets:
120 * 1. When a packet is allocated, the final iov_buf contains room for
121 * a security trailer, but iov_len masks that fact. If the security
122 * package wants to add the trailer, it may do so, and then extend
123 * iov_len appropriately. For this reason, packet's niovecs and
124 * iov_len fields should be accurate before calling PreparePacket.
128 * all packet buffers (iov_base) are integral multiples of
130 * offset is an integral multiple of the word size.
133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the word size.
151 * offset is an integral multiple of the word size.
154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
160 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
161 (offset - l))) = data;
164 l += packet->wirevec[i].iov_len;
171 * all packet buffers (iov_base) are integral multiples of the
173 * offset is an integral multiple of the word size.
175 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
181 unsigned int i, j, l, r;
182 for (l = 0, i = 1; i < packet->niovecs; i++) {
183 if (l + packet->wirevec[i].iov_len > offset) {
186 l += packet->wirevec[i].iov_len;
189 /* i is the iovec which contains the first little bit of data in which we
190 * are interested. l is the total length of everything prior to this iovec.
191 * j is the number of bytes we can safely copy out of this iovec.
192 * offset only applies to the first iovec.
195 while ((resid > 0) && (i < packet->niovecs)) {
196 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
197 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
200 l += packet->wirevec[i].iov_len;
205 return (resid ? (r - resid) : r);
210 * all packet buffers (iov_base) are integral multiples of the
212 * offset is an integral multiple of the word size.
215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
220 for (l = 0, i = 1; i < packet->niovecs; i++) {
221 if (l + packet->wirevec[i].iov_len > offset) {
224 l += packet->wirevec[i].iov_len;
227 /* i is the iovec which contains the first little bit of data in which we
228 * are interested. l is the total length of everything prior to this iovec.
229 * j is the number of bytes we can safely copy out of this iovec.
230 * offset only applies to the first iovec.
233 while ((resid > 0) && (i <= RX_MAXWVECS)) {
234 if (i >= packet->niovecs)
235 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
238 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
239 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
243 l += packet->wirevec[i].iov_len;
248 return (resid ? (r - resid) : r);
252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
254 register struct rx_packet *p, *np;
256 num_pkts = AllocPacketBufs(class, num_pkts, q);
258 for (queue_Scan(q, p, np, rx_packet)) {
259 RX_PACKET_IOV_FULLINIT(p);
265 #ifdef RX_ENABLE_TSFPQ
267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
269 register struct rx_ts_info_t * rx_ts_info;
273 RX_TS_INFO_GET(rx_ts_info);
275 transfer = num_pkts - rx_ts_info->_FPQ.len;
278 MUTEX_ENTER(&rx_freePktQ_lock);
279 transfer = MAX(transfer, rx_TSFPQGlobSize);
280 if (transfer > rx_nFreePackets) {
281 /* alloc enough for us, plus a few globs for other threads */
282 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
285 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
287 MUTEX_EXIT(&rx_freePktQ_lock);
291 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
295 #else /* RX_ENABLE_TSFPQ */
297 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
308 MUTEX_ENTER(&rx_freePktQ_lock);
311 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
312 num_pkts--, overq++);
315 rxi_NeedMorePackets = TRUE;
316 if (rx_stats_active) {
318 case RX_PACKET_CLASS_RECEIVE:
319 rx_AtomicIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
321 case RX_PACKET_CLASS_SEND:
322 rx_AtomicIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
324 case RX_PACKET_CLASS_SPECIAL:
325 rx_AtomicIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
327 case RX_PACKET_CLASS_RECV_CBUF:
328 rx_AtomicIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
330 case RX_PACKET_CLASS_SEND_CBUF:
331 rx_AtomicIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
337 if (rx_nFreePackets < num_pkts)
338 num_pkts = rx_nFreePackets;
341 rxi_NeedMorePackets = TRUE;
345 if (rx_nFreePackets < num_pkts) {
346 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
350 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
352 i++, c=queue_Next(c, rx_packet)) {
356 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
358 rx_nFreePackets -= num_pkts;
363 MUTEX_EXIT(&rx_freePktQ_lock);
368 #endif /* RX_ENABLE_TSFPQ */
371 * Free a packet currently used as a continuation buffer
373 #ifdef RX_ENABLE_TSFPQ
374 /* num_pkts=0 means queue length is unknown */
376 rxi_FreePackets(int num_pkts, struct rx_queue * q)
378 register struct rx_ts_info_t * rx_ts_info;
379 register struct rx_packet *c, *nc;
382 osi_Assert(num_pkts >= 0);
383 RX_TS_INFO_GET(rx_ts_info);
386 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
387 rxi_FreeDataBufsTSFPQ(c, 2, 0);
390 for (queue_Scan(q, c, nc, rx_packet)) {
391 rxi_FreeDataBufsTSFPQ(c, 2, 0);
396 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
399 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
401 MUTEX_ENTER(&rx_freePktQ_lock);
403 RX_TS_FPQ_LTOG(rx_ts_info);
405 /* Wakeup anyone waiting for packets */
408 MUTEX_EXIT(&rx_freePktQ_lock);
414 #else /* RX_ENABLE_TSFPQ */
415 /* num_pkts=0 means queue length is unknown */
417 rxi_FreePackets(int num_pkts, struct rx_queue *q)
420 register struct rx_packet *p, *np;
424 osi_Assert(num_pkts >= 0);
428 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
429 if (p->niovecs > 2) {
430 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
437 for (queue_Scan(q, p, np, rx_packet)) {
438 if (p->niovecs > 2) {
439 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
446 queue_SpliceAppend(q, &cbs);
452 MUTEX_ENTER(&rx_freePktQ_lock);
454 queue_SpliceAppend(&rx_freePacketQueue, q);
455 rx_nFreePackets += qlen;
457 /* Wakeup anyone waiting for packets */
460 MUTEX_EXIT(&rx_freePktQ_lock);
465 #endif /* RX_ENABLE_TSFPQ */
467 /* this one is kind of awful.
468 * In rxkad, the packet has been all shortened, and everything, ready for
469 * sending. All of a sudden, we discover we need some of that space back.
470 * This isn't terribly general, because it knows that the packets are only
471 * rounded up to the EBS (userdata + security header).
474 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
478 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
479 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
480 p->wirevec[i].iov_len += nb;
484 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
485 p->wirevec[i].iov_len += nb;
493 /* get sufficient space to store nb bytes of data (or more), and hook
494 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
495 * returns the number of bytes >0 which it failed to come up with.
496 * Don't need to worry about locking on packet, since only
497 * one thread can manipulate one at a time. Locking on continution
498 * packets is handled by AllocPacketBufs */
499 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
501 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
505 register struct rx_packet *cb, *ncb;
507 /* compute the number of cbuf's we need */
508 nv = nb / RX_CBUFFERSIZE;
509 if ((nv * RX_CBUFFERSIZE) < nb)
511 if ((nv + p->niovecs) > RX_MAXWVECS)
512 nv = RX_MAXWVECS - p->niovecs;
516 /* allocate buffers */
518 nv = AllocPacketBufs(class, nv, &q);
520 /* setup packet iovs */
521 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
523 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
524 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
527 nb -= (nv * RX_CBUFFERSIZE);
528 p->length += (nv * RX_CBUFFERSIZE);
534 /* Add more packet buffers */
535 #ifdef RX_ENABLE_TSFPQ
537 rxi_MorePackets(int apackets)
539 struct rx_packet *p, *e;
540 register struct rx_ts_info_t * rx_ts_info;
544 getme = apackets * sizeof(struct rx_packet);
545 p = (struct rx_packet *)osi_Alloc(getme);
548 PIN(p, getme); /* XXXXX */
549 memset((char *)p, 0, getme);
550 RX_TS_INFO_GET(rx_ts_info);
552 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
553 /* TSFPQ patch also needs to keep track of total packets */
555 MUTEX_ENTER(&rx_packets_mutex);
556 rx_nPackets += apackets;
557 RX_TS_FPQ_COMPUTE_LIMITS;
558 MUTEX_EXIT(&rx_packets_mutex);
560 for (e = p + apackets; p < e; p++) {
561 RX_PACKET_IOV_INIT(p);
564 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
567 MUTEX_ENTER(&rx_freePktQ_lock);
568 #ifdef RXDEBUG_PACKET
569 p->packetId = rx_packet_id++;
570 p->allNextp = rx_mallocedP;
571 #endif /* RXDEBUG_PACKET */
573 MUTEX_EXIT(&rx_freePktQ_lock);
576 rx_ts_info->_FPQ.delta += apackets;
578 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
580 MUTEX_ENTER(&rx_freePktQ_lock);
582 RX_TS_FPQ_LTOG(rx_ts_info);
583 rxi_NeedMorePackets = FALSE;
586 MUTEX_EXIT(&rx_freePktQ_lock);
590 #else /* RX_ENABLE_TSFPQ */
592 rxi_MorePackets(int apackets)
594 struct rx_packet *p, *e;
598 getme = apackets * sizeof(struct rx_packet);
599 p = (struct rx_packet *)osi_Alloc(getme);
602 PIN(p, getme); /* XXXXX */
603 memset((char *)p, 0, getme);
605 MUTEX_ENTER(&rx_freePktQ_lock);
607 for (e = p + apackets; p < e; p++) {
608 RX_PACKET_IOV_INIT(p);
609 p->flags |= RX_PKTFLAG_FREE;
612 queue_Append(&rx_freePacketQueue, p);
613 #ifdef RXDEBUG_PACKET
614 p->packetId = rx_packet_id++;
615 p->allNextp = rx_mallocedP;
616 #endif /* RXDEBUG_PACKET */
620 rx_nFreePackets += apackets;
621 rxi_NeedMorePackets = FALSE;
624 MUTEX_EXIT(&rx_freePktQ_lock);
627 #endif /* RX_ENABLE_TSFPQ */
629 #ifdef RX_ENABLE_TSFPQ
631 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
633 struct rx_packet *p, *e;
634 register struct rx_ts_info_t * rx_ts_info;
638 getme = apackets * sizeof(struct rx_packet);
639 p = (struct rx_packet *)osi_Alloc(getme);
641 PIN(p, getme); /* XXXXX */
642 memset((char *)p, 0, getme);
643 RX_TS_INFO_GET(rx_ts_info);
645 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
646 /* TSFPQ patch also needs to keep track of total packets */
647 MUTEX_ENTER(&rx_packets_mutex);
648 rx_nPackets += apackets;
649 RX_TS_FPQ_COMPUTE_LIMITS;
650 MUTEX_EXIT(&rx_packets_mutex);
652 for (e = p + apackets; p < e; p++) {
653 RX_PACKET_IOV_INIT(p);
655 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
658 MUTEX_ENTER(&rx_freePktQ_lock);
659 #ifdef RXDEBUG_PACKET
660 p->packetId = rx_packet_id++;
661 p->allNextp = rx_mallocedP;
662 #endif /* RXDEBUG_PACKET */
664 MUTEX_EXIT(&rx_freePktQ_lock);
667 rx_ts_info->_FPQ.delta += apackets;
670 (num_keep_local < apackets)) {
672 MUTEX_ENTER(&rx_freePktQ_lock);
674 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
675 rxi_NeedMorePackets = FALSE;
678 MUTEX_EXIT(&rx_freePktQ_lock);
682 #endif /* RX_ENABLE_TSFPQ */
685 /* Add more packet buffers */
687 rxi_MorePacketsNoLock(int apackets)
689 #ifdef RX_ENABLE_TSFPQ
690 register struct rx_ts_info_t * rx_ts_info;
691 #endif /* RX_ENABLE_TSFPQ */
692 struct rx_packet *p, *e;
695 /* allocate enough packets that 1/4 of the packets will be able
696 * to hold maximal amounts of data */
697 apackets += (apackets / 4)
698 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
700 getme = apackets * sizeof(struct rx_packet);
701 p = (struct rx_packet *)osi_Alloc(getme);
703 apackets -= apackets / 4;
704 osi_Assert(apackets > 0);
707 memset((char *)p, 0, getme);
709 #ifdef RX_ENABLE_TSFPQ
710 RX_TS_INFO_GET(rx_ts_info);
711 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
712 #endif /* RX_ENABLE_TSFPQ */
714 for (e = p + apackets; p < e; p++) {
715 RX_PACKET_IOV_INIT(p);
716 p->flags |= RX_PKTFLAG_FREE;
719 queue_Append(&rx_freePacketQueue, p);
720 #ifdef RXDEBUG_PACKET
721 p->packetId = rx_packet_id++;
722 p->allNextp = rx_mallocedP;
723 #endif /* RXDEBUG_PACKET */
727 rx_nFreePackets += apackets;
728 #ifdef RX_ENABLE_TSFPQ
729 /* TSFPQ patch also needs to keep track of total packets */
730 MUTEX_ENTER(&rx_packets_mutex);
731 rx_nPackets += apackets;
732 RX_TS_FPQ_COMPUTE_LIMITS;
733 MUTEX_EXIT(&rx_packets_mutex);
734 #endif /* RX_ENABLE_TSFPQ */
735 rxi_NeedMorePackets = FALSE;
741 rxi_FreeAllPackets(void)
743 /* must be called at proper interrupt level, etcetera */
744 /* MTUXXX need to free all Packets */
745 osi_Free(rx_mallocedP,
746 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
747 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
750 #ifdef RX_ENABLE_TSFPQ
752 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
754 register struct rx_ts_info_t * rx_ts_info;
758 RX_TS_INFO_GET(rx_ts_info);
760 if (num_keep_local != rx_ts_info->_FPQ.len) {
762 MUTEX_ENTER(&rx_freePktQ_lock);
763 if (num_keep_local < rx_ts_info->_FPQ.len) {
764 xfer = rx_ts_info->_FPQ.len - num_keep_local;
765 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
768 xfer = num_keep_local - rx_ts_info->_FPQ.len;
769 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
770 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
771 if (rx_nFreePackets < xfer) {
772 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
774 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
776 MUTEX_EXIT(&rx_freePktQ_lock);
782 rxi_FlushLocalPacketsTSFPQ(void)
784 rxi_AdjustLocalPacketsTSFPQ(0, 0);
786 #endif /* RX_ENABLE_TSFPQ */
788 /* Allocate more packets iff we need more continuation buffers */
789 /* In kernel, can't page in memory with interrupts disabled, so we
790 * don't use the event mechanism. */
792 rx_CheckPackets(void)
794 if (rxi_NeedMorePackets) {
795 rxi_MorePackets(rx_initSendWindow);
799 /* In the packet freeing routine below, the assumption is that
800 we want all of the packets to be used equally frequently, so that we
801 don't get packet buffers paging out. It would be just as valid to
802 assume that we DO want them to page out if not many are being used.
803 In any event, we assume the former, and append the packets to the end
805 /* This explanation is bogus. The free list doesn't remain in any kind of
806 useful order for afs_int32: the packets in use get pretty much randomly scattered
807 across all the pages. In order to permit unused {packets,bufs} to page out, they
808 must be stored so that packets which are adjacent in memory are adjacent in the
809 free list. An array springs rapidly to mind.
812 /* Actually free the packet p. */
813 #ifdef RX_ENABLE_TSFPQ
815 rxi_FreePacketNoLock(struct rx_packet *p)
817 register struct rx_ts_info_t * rx_ts_info;
818 dpf(("Free %lx\n", (unsigned long)p));
820 RX_TS_INFO_GET(rx_ts_info);
821 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
822 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
823 RX_TS_FPQ_LTOG(rx_ts_info);
826 #else /* RX_ENABLE_TSFPQ */
828 rxi_FreePacketNoLock(struct rx_packet *p)
830 dpf(("Free %lx\n", (unsigned long)p));
834 queue_Append(&rx_freePacketQueue, p);
836 #endif /* RX_ENABLE_TSFPQ */
838 #ifdef RX_ENABLE_TSFPQ
840 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
842 register struct rx_ts_info_t * rx_ts_info;
843 dpf(("Free %lx\n", (unsigned long)p));
845 RX_TS_INFO_GET(rx_ts_info);
846 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
848 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
850 MUTEX_ENTER(&rx_freePktQ_lock);
852 RX_TS_FPQ_LTOG(rx_ts_info);
854 /* Wakeup anyone waiting for packets */
857 MUTEX_EXIT(&rx_freePktQ_lock);
861 #endif /* RX_ENABLE_TSFPQ */
864 * free continuation buffers off a packet into a queue
866 * [IN] p -- packet from which continuation buffers will be freed
867 * [IN] first -- iovec offset of first continuation buffer to free
868 * [IN] q -- queue into which continuation buffers will be chained
871 * number of continuation buffers freed
873 #ifndef RX_ENABLE_TSFPQ
875 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
878 struct rx_packet * cb;
881 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
882 iov = &p->wirevec[first];
884 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
885 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
886 RX_FPQ_MARK_FREE(cb);
897 * free packet continuation buffers into the global free packet pool
899 * [IN] p -- packet from which to free continuation buffers
900 * [IN] first -- iovec offset of first continuation buffer to free
906 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
910 for (first = MAX(2, first); first < p->niovecs; first++) {
911 iov = &p->wirevec[first];
913 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
914 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
922 #ifdef RX_ENABLE_TSFPQ
924 * free packet continuation buffers into the thread-local free pool
926 * [IN] p -- packet from which continuation buffers will be freed
927 * [IN] first -- iovec offset of first continuation buffer to free
928 * any value less than 2, the min number of iovecs,
929 * is treated as if it is 2.
930 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
931 * global free pool before returning
937 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
940 register struct rx_ts_info_t * rx_ts_info;
942 RX_TS_INFO_GET(rx_ts_info);
944 for (first = MAX(2, first); first < p->niovecs; first++) {
945 iov = &p->wirevec[first];
947 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
948 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
953 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
955 MUTEX_ENTER(&rx_freePktQ_lock);
957 RX_TS_FPQ_LTOG(rx_ts_info);
959 /* Wakeup anyone waiting for packets */
962 MUTEX_EXIT(&rx_freePktQ_lock);
967 #endif /* RX_ENABLE_TSFPQ */
969 int rxi_nBadIovecs = 0;
971 /* rxi_RestoreDataBufs
973 * Restore the correct sizes to the iovecs. Called when reusing a packet
974 * for reading off the wire.
977 rxi_RestoreDataBufs(struct rx_packet *p)
980 struct iovec *iov = &p->wirevec[2];
982 RX_PACKET_IOV_INIT(p);
984 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
985 if (!iov->iov_base) {
990 iov->iov_len = RX_CBUFFERSIZE;
994 #ifdef RX_ENABLE_TSFPQ
996 rxi_TrimDataBufs(struct rx_packet *p, int first)
999 struct iovec *iov, *end;
1000 register struct rx_ts_info_t * rx_ts_info;
1004 osi_Panic("TrimDataBufs 1: first must be 1");
1006 /* Skip over continuation buffers containing message data */
1007 iov = &p->wirevec[2];
1008 end = iov + (p->niovecs - 2);
1009 length = p->length - p->wirevec[1].iov_len;
1010 for (; iov < end && length > 0; iov++) {
1012 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1013 length -= iov->iov_len;
1016 /* iov now points to the first empty data buffer. */
1020 RX_TS_INFO_GET(rx_ts_info);
1021 for (; iov < end; iov++) {
1023 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1024 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1027 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1029 MUTEX_ENTER(&rx_freePktQ_lock);
1031 RX_TS_FPQ_LTOG(rx_ts_info);
1032 rxi_PacketsUnWait();
1034 MUTEX_EXIT(&rx_freePktQ_lock);
1040 #else /* RX_ENABLE_TSFPQ */
1042 rxi_TrimDataBufs(struct rx_packet *p, int first)
1045 struct iovec *iov, *end;
1049 osi_Panic("TrimDataBufs 1: first must be 1");
1051 /* Skip over continuation buffers containing message data */
1052 iov = &p->wirevec[2];
1053 end = iov + (p->niovecs - 2);
1054 length = p->length - p->wirevec[1].iov_len;
1055 for (; iov < end && length > 0; iov++) {
1057 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1058 length -= iov->iov_len;
1061 /* iov now points to the first empty data buffer. */
1066 MUTEX_ENTER(&rx_freePktQ_lock);
1068 for (; iov < end; iov++) {
1070 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1071 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1074 rxi_PacketsUnWait();
1076 MUTEX_EXIT(&rx_freePktQ_lock);
1081 #endif /* RX_ENABLE_TSFPQ */
1083 /* Free the packet p. P is assumed not to be on any queue, i.e.
1084 * remove it yourself first if you call this routine. */
1085 #ifdef RX_ENABLE_TSFPQ
1087 rxi_FreePacket(struct rx_packet *p)
1089 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1090 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1092 #else /* RX_ENABLE_TSFPQ */
1094 rxi_FreePacket(struct rx_packet *p)
1099 MUTEX_ENTER(&rx_freePktQ_lock);
1101 rxi_FreeDataBufsNoLock(p, 2);
1102 rxi_FreePacketNoLock(p);
1103 /* Wakeup anyone waiting for packets */
1104 rxi_PacketsUnWait();
1106 MUTEX_EXIT(&rx_freePktQ_lock);
1109 #endif /* RX_ENABLE_TSFPQ */
1111 /* rxi_AllocPacket sets up p->length so it reflects the number of
1112 * bytes in the packet at this point, **not including** the header.
1113 * The header is absolutely necessary, besides, this is the way the
1114 * length field is usually used */
1115 #ifdef RX_ENABLE_TSFPQ
1117 rxi_AllocPacketNoLock(int class)
1119 register struct rx_packet *p;
1120 register struct rx_ts_info_t * rx_ts_info;
1122 RX_TS_INFO_GET(rx_ts_info);
1125 if (rxi_OverQuota(class)) {
1126 rxi_NeedMorePackets = TRUE;
1127 if (rx_stats_active) {
1129 case RX_PACKET_CLASS_RECEIVE:
1130 rx_AtomicIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1132 case RX_PACKET_CLASS_SEND:
1133 rx_AtomicIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1135 case RX_PACKET_CLASS_SPECIAL:
1136 rx_AtomicIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1138 case RX_PACKET_CLASS_RECV_CBUF:
1139 rx_AtomicIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1141 case RX_PACKET_CLASS_SEND_CBUF:
1142 rx_AtomicIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1146 return (struct rx_packet *)0;
1150 if (rx_stats_active)
1151 rx_AtomicIncrement(rx_stats.packetRequests, rx_stats_mutex);
1152 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1155 if (queue_IsEmpty(&rx_freePacketQueue))
1156 osi_Panic("rxi_AllocPacket error");
1158 if (queue_IsEmpty(&rx_freePacketQueue))
1159 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1163 RX_TS_FPQ_GTOL(rx_ts_info);
1166 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1168 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1171 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1172 * order to truncate outbound packets. In the near future, may need
1173 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1175 RX_PACKET_IOV_FULLINIT(p);
1178 #else /* RX_ENABLE_TSFPQ */
1180 rxi_AllocPacketNoLock(int class)
1182 register struct rx_packet *p;
1185 if (rxi_OverQuota(class)) {
1186 rxi_NeedMorePackets = TRUE;
1187 if (rx_stats_active) {
1189 case RX_PACKET_CLASS_RECEIVE:
1190 rx_AtomicIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1192 case RX_PACKET_CLASS_SEND:
1193 rx_AtomicIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1195 case RX_PACKET_CLASS_SPECIAL:
1196 rx_AtomicIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1198 case RX_PACKET_CLASS_RECV_CBUF:
1199 rx_AtomicIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1201 case RX_PACKET_CLASS_SEND_CBUF:
1202 rx_AtomicIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1206 return (struct rx_packet *)0;
1210 if (rx_stats_active)
1211 rx_AtomicIncrement(rx_stats.packetRequests, rx_stats_mutex);
1214 if (queue_IsEmpty(&rx_freePacketQueue))
1215 osi_Panic("rxi_AllocPacket error");
1217 if (queue_IsEmpty(&rx_freePacketQueue))
1218 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1222 p = queue_First(&rx_freePacketQueue, rx_packet);
1224 RX_FPQ_MARK_USED(p);
1226 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1229 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1230 * order to truncate outbound packets. In the near future, may need
1231 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1233 RX_PACKET_IOV_FULLINIT(p);
1236 #endif /* RX_ENABLE_TSFPQ */
1238 #ifdef RX_ENABLE_TSFPQ
1240 rxi_AllocPacketTSFPQ(int class, int pull_global)
1242 register struct rx_packet *p;
1243 register struct rx_ts_info_t * rx_ts_info;
1245 RX_TS_INFO_GET(rx_ts_info);
1247 if (rx_stats_active)
1248 rx_AtomicIncrement(rx_stats.packetRequests, rx_stats_mutex);
1249 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1250 MUTEX_ENTER(&rx_freePktQ_lock);
1252 if (queue_IsEmpty(&rx_freePacketQueue))
1253 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1255 RX_TS_FPQ_GTOL(rx_ts_info);
1257 MUTEX_EXIT(&rx_freePktQ_lock);
1258 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1262 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1264 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1266 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1267 * order to truncate outbound packets. In the near future, may need
1268 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1270 RX_PACKET_IOV_FULLINIT(p);
1273 #endif /* RX_ENABLE_TSFPQ */
1275 #ifdef RX_ENABLE_TSFPQ
1277 rxi_AllocPacket(int class)
1279 register struct rx_packet *p;
1281 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1284 #else /* RX_ENABLE_TSFPQ */
1286 rxi_AllocPacket(int class)
1288 register struct rx_packet *p;
1290 MUTEX_ENTER(&rx_freePktQ_lock);
1291 p = rxi_AllocPacketNoLock(class);
1292 MUTEX_EXIT(&rx_freePktQ_lock);
1295 #endif /* RX_ENABLE_TSFPQ */
1297 /* This guy comes up with as many buffers as it {takes,can get} given
1298 * the MTU for this call. It also sets the packet length before
1299 * returning. caution: this is often called at NETPRI
1300 * Called with call locked.
1303 rxi_AllocSendPacket(register struct rx_call *call, int want)
1305 register struct rx_packet *p = (struct rx_packet *)0;
1307 register unsigned delta;
1310 mud = call->MTU - RX_HEADER_SIZE;
1312 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1313 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1315 #ifdef RX_ENABLE_TSFPQ
1316 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1318 want = MIN(want, mud);
1320 if ((unsigned)want > p->length)
1321 (void)rxi_AllocDataBuf(p, (want - p->length),
1322 RX_PACKET_CLASS_SEND_CBUF);
1324 if ((unsigned)p->length > mud)
1327 if (delta >= p->length) {
1335 #endif /* RX_ENABLE_TSFPQ */
1337 while (!(call->error)) {
1338 MUTEX_ENTER(&rx_freePktQ_lock);
1339 /* if an error occurred, or we get the packet we want, we're done */
1340 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1341 MUTEX_EXIT(&rx_freePktQ_lock);
1344 want = MIN(want, mud);
1346 if ((unsigned)want > p->length)
1347 (void)rxi_AllocDataBuf(p, (want - p->length),
1348 RX_PACKET_CLASS_SEND_CBUF);
1350 if ((unsigned)p->length > mud)
1353 if (delta >= p->length) {
1362 /* no error occurred, and we didn't get a packet, so we sleep.
1363 * At this point, we assume that packets will be returned
1364 * sooner or later, as packets are acknowledged, and so we
1367 call->flags |= RX_CALL_WAIT_PACKETS;
1368 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1369 MUTEX_EXIT(&call->lock);
1370 rx_waitingForPackets = 1;
1372 #ifdef RX_ENABLE_LOCKS
1373 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1375 osi_rxSleep(&rx_waitingForPackets);
1377 MUTEX_EXIT(&rx_freePktQ_lock);
1378 MUTEX_ENTER(&call->lock);
1379 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1380 call->flags &= ~RX_CALL_WAIT_PACKETS;
1389 /* Windows does not use file descriptors. */
1390 #define CountFDs(amax) 0
1392 /* count the number of used FDs */
1394 CountFDs(register int amax)
1397 register int i, code;
1401 for (i = 0; i < amax; i++) {
1402 code = fstat(i, &tstat);
1408 #endif /* AFS_NT40_ENV */
1411 #define CountFDs(amax) amax
1415 #if !defined(KERNEL) || defined(UKERNEL)
1417 /* This function reads a single packet from the interface into the
1418 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1419 * (host,port) of the sender are stored in the supplied variables, and
1420 * the data length of the packet is stored in the packet structure.
1421 * The header is decoded. */
1423 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1426 struct sockaddr_in from;
1429 register afs_int32 tlen, savelen;
1431 rx_computelen(p, tlen);
1432 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1434 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1435 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1436 * it once in order to avoid races. */
1439 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1447 /* Extend the last iovec for padding, it's just to make sure that the
1448 * read doesn't return more data than we expect, and is done to get around
1449 * our problems caused by the lack of a length field in the rx header.
1450 * Use the extra buffer that follows the localdata in each packet
1452 savelen = p->wirevec[p->niovecs - 1].iov_len;
1453 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1455 memset((char *)&msg, 0, sizeof(msg));
1456 msg.msg_name = (char *)&from;
1457 msg.msg_namelen = sizeof(struct sockaddr_in);
1458 msg.msg_iov = p->wirevec;
1459 msg.msg_iovlen = p->niovecs;
1460 nbytes = rxi_Recvmsg(socket, &msg, 0);
1462 /* restore the vec to its correct state */
1463 p->wirevec[p->niovecs - 1].iov_len = savelen;
1465 p->length = (nbytes - RX_HEADER_SIZE);
1466 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1467 if (nbytes < 0 && errno == EWOULDBLOCK) {
1468 if (rx_stats_active)
1469 rx_AtomicIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1470 } else if (nbytes <= 0) {
1471 if (rx_stats_active) {
1472 MUTEX_ENTER(&rx_stats_mutex);
1473 rx_AtomicIncrement_NL(rx_stats.bogusPacketOnRead);
1474 rx_AtomicSwap(&rx_stats.bogusHost, from.sin_addr.s_addr, rx_stats_mutex);
1475 MUTEX_EXIT(&rx_stats_mutex);
1477 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1478 ntohs(from.sin_port), nbytes));
1483 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1484 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1485 rxi_DecodePacketHeader(p);
1487 *host = from.sin_addr.s_addr;
1488 *port = from.sin_port;
1490 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1491 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1492 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1494 rxi_TrimDataBufs(p, 1);
1499 /* Extract packet header. */
1500 rxi_DecodePacketHeader(p);
1502 *host = from.sin_addr.s_addr;
1503 *port = from.sin_port;
1504 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1505 struct rx_peer *peer;
1506 if (rx_stats_active)
1507 rx_AtomicIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1509 * Try to look up this peer structure. If it doesn't exist,
1510 * don't create a new one -
1511 * we don't keep count of the bytes sent/received if a peer
1512 * structure doesn't already exist.
1514 * The peer/connection cleanup code assumes that there is 1 peer
1515 * per connection. If we actually created a peer structure here
1516 * and this packet was an rxdebug packet, the peer structure would
1517 * never be cleaned up.
1519 peer = rxi_FindPeer(*host, *port, 0, 0);
1520 /* Since this may not be associated with a connection,
1521 * it may have no refCount, meaning we could race with
1524 if (peer && (rx_AtomicPeek_NL(peer->refCount) > 0)) {
1525 MUTEX_ENTER(&peer->peer_lock);
1526 hadd32(peer->bytesReceived, p->length);
1527 MUTEX_EXIT(&peer->peer_lock);
1531 /* Free any empty packet buffers at the end of this packet */
1532 rxi_TrimDataBufs(p, 1);
1538 #endif /* !KERNEL || UKERNEL */
1540 /* This function splits off the first packet in a jumbo packet.
1541 * As of AFS 3.5, jumbograms contain more than one fixed size
1542 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1543 * last packet header. All packets (except the last) are padded to
1544 * fall on RX_CBUFFERSIZE boundaries.
1545 * HACK: We store the length of the first n-1 packets in the
1546 * last two pad bytes. */
1549 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1552 struct rx_packet *np;
1553 struct rx_jumboHeader *jp;
1559 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1560 * bytes in length. All but the first packet are preceded by
1561 * an abbreviated four byte header. The length of the last packet
1562 * is calculated from the size of the jumbogram. */
1563 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1565 if ((int)p->length < length) {
1566 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1569 niov = p->niovecs - 2;
1571 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1574 iov = &p->wirevec[2];
1575 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1577 /* Get a pointer to the abbreviated packet header */
1578 jp = (struct rx_jumboHeader *)
1579 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1581 /* Set up the iovecs for the next packet */
1582 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1583 np->wirevec[0].iov_len = sizeof(struct rx_header);
1584 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1585 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1586 np->niovecs = niov + 1;
1587 for (i = 2, iov++; i <= niov; i++, iov++) {
1588 np->wirevec[i] = *iov;
1590 np->length = p->length - length;
1591 p->length = RX_JUMBOBUFFERSIZE;
1594 /* Convert the jumbo packet header to host byte order */
1595 temp = ntohl(*(afs_uint32 *) jp);
1596 jp->flags = (u_char) (temp >> 24);
1597 jp->cksum = (u_short) (temp);
1599 /* Fill in the packet header */
1600 np->header = p->header;
1601 np->header.serial = p->header.serial + 1;
1602 np->header.seq = p->header.seq + 1;
1603 np->header.flags = jp->flags;
1604 np->header.spare = jp->cksum;
1610 /* Send a udp datagram */
1612 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1613 int length, int istack)
1618 memset(&msg, 0, sizeof(msg));
1620 msg.msg_iovlen = nvecs;
1621 msg.msg_name = addr;
1622 msg.msg_namelen = sizeof(struct sockaddr_in);
1624 ret = rxi_Sendmsg(socket, &msg, 0);
1628 #elif !defined(UKERNEL)
1630 * message receipt is done in rxk_input or rx_put.
1633 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1635 * Copy an mblock to the contiguous area pointed to by cp.
1636 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1637 * but it doesn't really.
1638 * Returns the number of bytes not transferred.
1639 * The message is NOT changed.
1642 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1646 for (; mp && len > 0; mp = mp->b_cont) {
1647 if (mp->b_datap->db_type != M_DATA) {
1650 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1651 memcpy(cp, (char *)mp->b_rptr, n);
1659 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1660 * but it doesn't really.
1661 * This sucks, anyway, do it like m_cpy.... below
1664 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1667 register int m, n, o, t, i;
1669 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1670 if (mp->b_datap->db_type != M_DATA) {
1673 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1679 t = iovs[i].iov_len;
1682 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1692 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1693 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1695 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1697 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1700 unsigned int l1, l2, i, t;
1702 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1703 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1706 if (m->m_len <= off) {
1716 p1 = mtod(m, caddr_t) + off;
1717 l1 = m->m_len - off;
1719 p2 = iovs[0].iov_base;
1720 l2 = iovs[0].iov_len;
1723 t = MIN(l1, MIN(l2, (unsigned int)len));
1734 p1 = mtod(m, caddr_t);
1740 p2 = iovs[i].iov_base;
1741 l2 = iovs[i].iov_len;
1749 #endif /* AFS_SUN5_ENV */
1751 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1753 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1754 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1760 struct rx_packet *phandle;
1761 int hdr_len, data_len;
1766 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1773 #endif /*KERNEL && !UKERNEL */
1776 /* send a response to a debug packet */
1779 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1780 afs_int32 ahost, short aport, int istack)
1782 struct rx_debugIn tin;
1784 struct rx_serverQueueEntry *np, *nqe;
1787 * Only respond to client-initiated Rx debug packets,
1788 * and clear the client flag in the response.
1790 if (ap->header.flags & RX_CLIENT_INITIATED) {
1791 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1792 rxi_EncodePacketHeader(ap);
1797 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1798 /* all done with packet, now set length to the truth, so we can
1799 * reuse this packet */
1800 rx_computelen(ap, ap->length);
1802 tin.type = ntohl(tin.type);
1803 tin.index = ntohl(tin.index);
1805 case RX_DEBUGI_GETSTATS:{
1806 struct rx_debugStats tstat;
1808 /* get basic stats */
1809 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1810 tstat.version = RX_DEBUGI_VERSION;
1811 #ifndef RX_ENABLE_LOCKS
1812 tstat.waitingForPackets = rx_waitingForPackets;
1814 MUTEX_ENTER(&rx_serverPool_lock);
1815 tstat.nFreePackets = htonl(rx_nFreePackets);
1816 tstat.nPackets = htonl(rx_nPackets);
1817 tstat.callsExecuted = htonl(rxi_nCalls);
1818 tstat.packetReclaims = htonl(rx_packetReclaims);
1819 tstat.usedFDs = CountFDs(64);
1820 tstat.nWaiting = htonl(rx_nWaiting);
1821 tstat.nWaited = htonl(rx_nWaited);
1822 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1824 MUTEX_EXIT(&rx_serverPool_lock);
1825 tstat.idleThreads = htonl(tstat.idleThreads);
1826 tl = sizeof(struct rx_debugStats) - ap->length;
1828 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1831 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1833 ap->length = sizeof(struct rx_debugStats);
1834 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1835 rx_computelen(ap, ap->length);
1840 case RX_DEBUGI_GETALLCONN:
1841 case RX_DEBUGI_GETCONN:{
1843 register struct rx_connection *tc;
1844 struct rx_call *tcall;
1845 struct rx_debugConn tconn;
1846 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1849 tl = sizeof(struct rx_debugConn) - ap->length;
1851 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1855 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1856 /* get N'th (maybe) "interesting" connection info */
1857 for (i = 0; i < rx_hashTableSize; i++) {
1858 #if !defined(KERNEL)
1859 /* the time complexity of the algorithm used here
1860 * exponentially increses with the number of connections.
1862 #ifdef AFS_PTHREAD_ENV
1868 MUTEX_ENTER(&rx_connHashTable_lock);
1869 /* We might be slightly out of step since we are not
1870 * locking each call, but this is only debugging output.
1872 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1873 if ((all || rxi_IsConnInteresting(tc))
1874 && tin.index-- <= 0) {
1875 tconn.host = tc->peer->host;
1876 tconn.port = tc->peer->port;
1877 tconn.cid = htonl(tc->cid);
1878 tconn.epoch = htonl(tc->epoch);
1879 tconn.serial = htonl(tc->serial);
1880 for (j = 0; j < RX_MAXCALLS; j++) {
1881 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1882 if ((tcall = tc->call[j])) {
1883 tconn.callState[j] = tcall->state;
1884 tconn.callMode[j] = tcall->mode;
1885 tconn.callFlags[j] = tcall->flags;
1886 if (queue_IsNotEmpty(&tcall->rq))
1887 tconn.callOther[j] |= RX_OTHER_IN;
1888 if (queue_IsNotEmpty(&tcall->tq))
1889 tconn.callOther[j] |= RX_OTHER_OUT;
1891 tconn.callState[j] = RX_STATE_NOTINIT;
1894 tconn.natMTU = htonl(tc->peer->natMTU);
1895 tconn.error = htonl(tc->error);
1896 tconn.flags = tc->flags;
1897 tconn.type = tc->type;
1898 tconn.securityIndex = tc->securityIndex;
1899 if (tc->securityObject) {
1900 RXS_GetStats(tc->securityObject, tc,
1902 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1903 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1906 DOHTONL(packetsReceived);
1907 DOHTONL(packetsSent);
1908 DOHTONL(bytesReceived);
1912 sizeof(tconn.secStats.spares) /
1917 sizeof(tconn.secStats.sparel) /
1918 sizeof(afs_int32); i++)
1922 MUTEX_EXIT(&rx_connHashTable_lock);
1923 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1926 ap->length = sizeof(struct rx_debugConn);
1927 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1933 MUTEX_EXIT(&rx_connHashTable_lock);
1935 /* if we make it here, there are no interesting packets */
1936 tconn.cid = htonl(0xffffffff); /* means end */
1937 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1940 ap->length = sizeof(struct rx_debugConn);
1941 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1947 * Pass back all the peer structures we have available
1950 case RX_DEBUGI_GETPEER:{
1952 register struct rx_peer *tp;
1953 struct rx_debugPeer tpeer;
1956 tl = sizeof(struct rx_debugPeer) - ap->length;
1958 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1962 memset((char *)&tpeer, 0, sizeof(tpeer));
1963 for (i = 0; i < rx_hashTableSize; i++) {
1964 #if !defined(KERNEL)
1965 /* the time complexity of the algorithm used here
1966 * exponentially increses with the number of peers.
1968 * Yielding after processing each hash table entry
1969 * and dropping rx_peerHashTable_lock.
1970 * also increases the risk that we will miss a new
1971 * entry - but we are willing to live with this
1972 * limitation since this is meant for debugging only
1974 #ifdef AFS_PTHREAD_ENV
1980 MUTEX_ENTER(&rx_peerHashTable_lock);
1981 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1982 if (tin.index-- <= 0) {
1983 tpeer.host = tp->host;
1984 tpeer.port = tp->port;
1985 tpeer.ifMTU = htons(tp->ifMTU);
1986 tpeer.idleWhen = htonl(tp->idleWhen);
1987 tpeer.refCount = htons(rx_AtomicPeek_NL(tp->refCount));
1988 tpeer.burstSize = tp->burstSize;
1989 tpeer.burst = tp->burst;
1990 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1991 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1992 tpeer.rtt = htonl(tp->rtt);
1993 tpeer.rtt_dev = htonl(tp->rtt_dev);
1994 tpeer.timeout.sec = htonl(tp->timeout.sec);
1995 tpeer.timeout.usec = htonl(tp->timeout.usec);
1996 tpeer.nSent = htonl(tp->nSent);
1997 tpeer.reSends = htonl(tp->reSends);
1998 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1999 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2000 tpeer.rateFlag = htonl(tp->rateFlag);
2001 tpeer.natMTU = htons(tp->natMTU);
2002 tpeer.maxMTU = htons(tp->maxMTU);
2003 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2004 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2005 tpeer.MTU = htons(tp->MTU);
2006 tpeer.cwind = htons(tp->cwind);
2007 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2008 tpeer.congestSeq = htons(tp->congestSeq);
2009 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2010 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2011 tpeer.bytesReceived.high =
2012 htonl(tp->bytesReceived.high);
2013 tpeer.bytesReceived.low =
2014 htonl(tp->bytesReceived.low);
2016 MUTEX_EXIT(&rx_peerHashTable_lock);
2017 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2020 ap->length = sizeof(struct rx_debugPeer);
2021 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2027 MUTEX_EXIT(&rx_peerHashTable_lock);
2029 /* if we make it here, there are no interesting packets */
2030 tpeer.host = htonl(0xffffffff); /* means end */
2031 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2034 ap->length = sizeof(struct rx_debugPeer);
2035 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2040 case RX_DEBUGI_RXSTATS:{
2044 tl = sizeof(rx_stats) - ap->length;
2046 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2050 /* Since its all int32s convert to network order with a loop. */
2051 if (rx_stats_active)
2052 MUTEX_ENTER(&rx_stats_mutex);
2053 s = (afs_int32 *) & rx_stats;
2054 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2055 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2058 ap->length = sizeof(rx_stats);
2059 if (rx_stats_active)
2060 MUTEX_EXIT(&rx_stats_mutex);
2061 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2067 /* error response packet */
2068 tin.type = htonl(RX_DEBUGI_BADTYPE);
2069 tin.index = tin.type;
2070 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2072 ap->length = sizeof(struct rx_debugIn);
2073 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2081 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2082 afs_int32 ahost, short aport, int istack)
2087 * Only respond to client-initiated version requests, and
2088 * clear that flag in the response.
2090 if (ap->header.flags & RX_CLIENT_INITIATED) {
2093 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2094 rxi_EncodePacketHeader(ap);
2095 memset(buf, 0, sizeof(buf));
2096 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2097 rx_packetwrite(ap, 0, 65, buf);
2100 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2108 /* send a debug packet back to the sender */
2110 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2111 afs_int32 ahost, short aport, afs_int32 istack)
2113 struct sockaddr_in taddr;
2119 int waslocked = ISAFS_GLOCK();
2122 taddr.sin_family = AF_INET;
2123 taddr.sin_port = aport;
2124 taddr.sin_addr.s_addr = ahost;
2125 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2126 taddr.sin_len = sizeof(struct sockaddr_in);
2129 /* We need to trim the niovecs. */
2130 nbytes = apacket->length;
2131 for (i = 1; i < apacket->niovecs; i++) {
2132 if (nbytes <= apacket->wirevec[i].iov_len) {
2133 savelen = apacket->wirevec[i].iov_len;
2134 saven = apacket->niovecs;
2135 apacket->wirevec[i].iov_len = nbytes;
2136 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2138 nbytes -= apacket->wirevec[i].iov_len;
2141 #ifdef RX_KERNEL_TRACE
2142 if (ICL_SETACTIVE(afs_iclSetp)) {
2145 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2146 "before osi_NetSend()");
2154 /* debug packets are not reliably delivered, hence the cast below. */
2155 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2156 apacket->length + RX_HEADER_SIZE, istack);
2158 #ifdef RX_KERNEL_TRACE
2159 if (ICL_SETACTIVE(afs_iclSetp)) {
2161 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2162 "after osi_NetSend()");
2171 if (saven) { /* means we truncated the packet above. */
2172 apacket->wirevec[i - 1].iov_len = savelen;
2173 apacket->niovecs = saven;
2178 /* Send the packet to appropriate destination for the specified
2179 * call. The header is first encoded and placed in the packet.
2182 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2183 struct rx_packet *p, int istack)
2189 struct sockaddr_in addr;
2190 register struct rx_peer *peer = conn->peer;
2193 char deliveryType = 'S';
2195 /* The address we're sending the packet to */
2196 memset(&addr, 0, sizeof(addr));
2197 addr.sin_family = AF_INET;
2198 addr.sin_port = peer->port;
2199 addr.sin_addr.s_addr = peer->host;
2201 /* This stuff should be revamped, I think, so that most, if not
2202 * all, of the header stuff is always added here. We could
2203 * probably do away with the encode/decode routines. XXXXX */
2205 /* Stamp each packet with a unique serial number. The serial
2206 * number is maintained on a connection basis because some types
2207 * of security may be based on the serial number of the packet,
2208 * and security is handled on a per authenticated-connection
2210 /* Pre-increment, to guarantee no zero serial number; a zero
2211 * serial number means the packet was never sent. */
2212 MUTEX_ENTER(&conn->conn_data_lock);
2213 p->header.serial = ++conn->serial;
2214 MUTEX_EXIT(&conn->conn_data_lock);
2215 /* This is so we can adjust retransmit time-outs better in the face of
2216 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2218 if (p->firstSerial == 0) {
2219 p->firstSerial = p->header.serial;
2222 /* If an output tracer function is defined, call it with the packet and
2223 * network address. Note this function may modify its arguments. */
2224 if (rx_almostSent) {
2225 int drop = (*rx_almostSent) (p, &addr);
2226 /* drop packet if return value is non-zero? */
2228 deliveryType = 'D'; /* Drop the packet */
2232 /* Get network byte order header */
2233 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2234 * touch ALL the fields */
2236 /* Send the packet out on the same socket that related packets are being
2240 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2243 /* Possibly drop this packet, for testing purposes */
2244 if ((deliveryType == 'D')
2245 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2246 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2247 deliveryType = 'D'; /* Drop the packet */
2249 deliveryType = 'S'; /* Send the packet */
2250 #endif /* RXDEBUG */
2252 /* Loop until the packet is sent. We'd prefer just to use a
2253 * blocking socket, but unfortunately the interface doesn't
2254 * allow us to have the socket block in send mode, and not
2255 * block in receive mode */
2257 waslocked = ISAFS_GLOCK();
2258 #ifdef RX_KERNEL_TRACE
2259 if (ICL_SETACTIVE(afs_iclSetp)) {
2262 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2263 "before osi_NetSend()");
2272 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2273 p->length + RX_HEADER_SIZE, istack)) != 0) {
2274 /* send failed, so let's hurry up the resend, eh? */
2275 if (rx_stats_active)
2276 rx_AtomicIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2277 p->retryTime = p->timeSent; /* resend it very soon */
2278 clock_Addmsec(&(p->retryTime),
2279 10 + (((afs_uint32) p->backoff) << 8));
2280 /* Some systems are nice and tell us right away that we cannot
2281 * reach this recipient by returning an error code.
2282 * So, when this happens let's "down" the host NOW so
2283 * we don't sit around waiting for this host to timeout later.
2287 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2288 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2289 code == -ENETUNREACH
2290 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2291 code == EHOSTUNREACH
2296 call->lastReceiveTime = 0;
2299 #ifdef RX_KERNEL_TRACE
2300 if (ICL_SETACTIVE(afs_iclSetp)) {
2302 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2303 "after osi_NetSend()");
2314 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2316 if (rx_stats_active)
2317 rx_AtomicIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2318 MUTEX_ENTER(&peer->peer_lock);
2319 hadd32(peer->bytesSent, p->length);
2320 MUTEX_EXIT(&peer->peer_lock);
2323 /* Send a list of packets to appropriate destination for the specified
2324 * connection. The headers are first encoded and placed in the packets.
2327 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2328 struct rx_packet **list, int len, int istack)
2330 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2333 struct sockaddr_in addr;
2334 register struct rx_peer *peer = conn->peer;
2336 struct rx_packet *p = NULL;
2337 struct iovec wirevec[RX_MAXIOVECS];
2338 int i, length, code;
2341 struct rx_jumboHeader *jp;
2343 char deliveryType = 'S';
2345 /* The address we're sending the packet to */
2346 addr.sin_family = AF_INET;
2347 addr.sin_port = peer->port;
2348 addr.sin_addr.s_addr = peer->host;
2350 if (len + 1 > RX_MAXIOVECS) {
2351 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2355 * Stamp the packets in this jumbogram with consecutive serial numbers
2357 MUTEX_ENTER(&conn->conn_data_lock);
2358 serial = conn->serial;
2359 conn->serial += len;
2360 MUTEX_EXIT(&conn->conn_data_lock);
2363 /* This stuff should be revamped, I think, so that most, if not
2364 * all, of the header stuff is always added here. We could
2365 * probably do away with the encode/decode routines. XXXXX */
2368 length = RX_HEADER_SIZE;
2369 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2370 wirevec[0].iov_len = RX_HEADER_SIZE;
2371 for (i = 0; i < len; i++) {
2374 /* The whole 3.5 jumbogram scheme relies on packets fitting
2375 * in a single packet buffer. */
2376 if (p->niovecs > 2) {
2377 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2380 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2383 if (p->length != RX_JUMBOBUFFERSIZE) {
2384 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2386 p->header.flags |= RX_JUMBO_PACKET;
2387 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2388 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2390 wirevec[i + 1].iov_len = p->length;
2391 length += p->length;
2393 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2395 /* Convert jumbo packet header to network byte order */
2396 temp = (afs_uint32) (p->header.flags) << 24;
2397 temp |= (afs_uint32) (p->header.spare);
2398 *(afs_uint32 *) jp = htonl(temp);
2400 jp = (struct rx_jumboHeader *)
2401 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2403 /* Stamp each packet with a unique serial number. The serial
2404 * number is maintained on a connection basis because some types
2405 * of security may be based on the serial number of the packet,
2406 * and security is handled on a per authenticated-connection
2408 /* Pre-increment, to guarantee no zero serial number; a zero
2409 * serial number means the packet was never sent. */
2410 p->header.serial = ++serial;
2411 /* This is so we can adjust retransmit time-outs better in the face of
2412 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2414 if (p->firstSerial == 0) {
2415 p->firstSerial = p->header.serial;
2418 /* If an output tracer function is defined, call it with the packet and
2419 * network address. Note this function may modify its arguments. */
2420 if (rx_almostSent) {
2421 int drop = (*rx_almostSent) (p, &addr);
2422 /* drop packet if return value is non-zero? */
2424 deliveryType = 'D'; /* Drop the packet */
2428 /* Get network byte order header */
2429 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2430 * touch ALL the fields */
2433 /* Send the packet out on the same socket that related packets are being
2437 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2440 /* Possibly drop this packet, for testing purposes */
2441 if ((deliveryType == 'D')
2442 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2443 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2444 deliveryType = 'D'; /* Drop the packet */
2446 deliveryType = 'S'; /* Send the packet */
2447 #endif /* RXDEBUG */
2449 /* Loop until the packet is sent. We'd prefer just to use a
2450 * blocking socket, but unfortunately the interface doesn't
2451 * allow us to have the socket block in send mode, and not
2452 * block in receive mode */
2453 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2454 waslocked = ISAFS_GLOCK();
2455 if (!istack && waslocked)
2459 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2461 /* send failed, so let's hurry up the resend, eh? */
2462 if (rx_stats_active)
2463 rx_AtomicIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2464 for (i = 0; i < len; i++) {
2466 p->retryTime = p->timeSent; /* resend it very soon */
2467 clock_Addmsec(&(p->retryTime),
2468 10 + (((afs_uint32) p->backoff) << 8));
2470 /* Some systems are nice and tell us right away that we cannot
2471 * reach this recipient by returning an error code.
2472 * So, when this happens let's "down" the host NOW so
2473 * we don't sit around waiting for this host to timeout later.
2477 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2478 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2479 code == -ENETUNREACH
2480 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2481 code == EHOSTUNREACH
2486 call->lastReceiveTime = 0;
2488 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2489 if (!istack && waslocked)
2497 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2500 if (rx_stats_active)
2501 rx_AtomicIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2502 MUTEX_ENTER(&peer->peer_lock);
2503 hadd32(peer->bytesSent, p->length);
2504 MUTEX_EXIT(&peer->peer_lock);
2508 /* Send a "special" packet to the peer connection. If call is
2509 * specified, then the packet is directed to a specific call channel
2510 * associated with the connection, otherwise it is directed to the
2511 * connection only. Uses optionalPacket if it is supplied, rather than
2512 * allocating a new packet buffer. Nbytes is the length of the data
2513 * portion of the packet. If data is non-null, nbytes of data are
2514 * copied into the packet. Type is the type of the packet, as defined
2515 * in rx.h. Bug: there's a lot of duplication between this and other
2516 * routines. This needs to be cleaned up. */
2518 rxi_SendSpecial(register struct rx_call *call,
2519 register struct rx_connection *conn,
2520 struct rx_packet *optionalPacket, int type, char *data,
2521 int nbytes, int istack)
2523 /* Some of the following stuff should be common code for all
2524 * packet sends (it's repeated elsewhere) */
2525 register struct rx_packet *p;
2527 int savelen = 0, saven = 0;
2528 int channel, callNumber;
2530 channel = call->channel;
2531 callNumber = *call->callNumber;
2532 /* BUSY packets refer to the next call on this connection */
2533 if (type == RX_PACKET_TYPE_BUSY) {
2542 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2544 osi_Panic("rxi_SendSpecial failure");
2551 p->header.serviceId = conn->serviceId;
2552 p->header.securityIndex = conn->securityIndex;
2553 p->header.cid = (conn->cid | channel);
2554 p->header.callNumber = callNumber;
2556 p->header.epoch = conn->epoch;
2557 p->header.type = type;
2558 p->header.flags = 0;
2559 if (conn->type == RX_CLIENT_CONNECTION)
2560 p->header.flags |= RX_CLIENT_INITIATED;
2562 rx_packetwrite(p, 0, nbytes, data);
2564 for (i = 1; i < p->niovecs; i++) {
2565 if (nbytes <= p->wirevec[i].iov_len) {
2566 savelen = p->wirevec[i].iov_len;
2568 p->wirevec[i].iov_len = nbytes;
2569 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2571 nbytes -= p->wirevec[i].iov_len;
2575 rxi_Send(call, p, istack);
2577 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2578 if (saven) { /* means we truncated the packet above. We probably don't */
2579 /* really need to do this, but it seems safer this way, given that */
2580 /* sneaky optionalPacket... */
2581 p->wirevec[i - 1].iov_len = savelen;
2584 if (!optionalPacket)
2586 return optionalPacket;
2590 /* Encode the packet's header (from the struct header in the packet to
2591 * the net byte order representation in the wire representation of the
2592 * packet, which is what is actually sent out on the wire) */
2594 rxi_EncodePacketHeader(register struct rx_packet *p)
2596 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2598 memset((char *)buf, 0, RX_HEADER_SIZE);
2599 *buf++ = htonl(p->header.epoch);
2600 *buf++ = htonl(p->header.cid);
2601 *buf++ = htonl(p->header.callNumber);
2602 *buf++ = htonl(p->header.seq);
2603 *buf++ = htonl(p->header.serial);
2604 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2605 | (((afs_uint32) p->header.flags) << 16)
2606 | (p->header.userStatus << 8) | p->header.securityIndex);
2607 /* Note: top 16 bits of this next word were reserved */
2608 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2611 /* Decode the packet's header (from net byte order to a struct header) */
2613 rxi_DecodePacketHeader(register struct rx_packet *p)
2615 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2618 p->header.epoch = ntohl(*buf);
2620 p->header.cid = ntohl(*buf);
2622 p->header.callNumber = ntohl(*buf);
2624 p->header.seq = ntohl(*buf);
2626 p->header.serial = ntohl(*buf);
2632 /* C will truncate byte fields to bytes for me */
2633 p->header.type = temp >> 24;
2634 p->header.flags = temp >> 16;
2635 p->header.userStatus = temp >> 8;
2636 p->header.securityIndex = temp >> 0;
2641 p->header.serviceId = (temp & 0xffff);
2642 p->header.spare = temp >> 16;
2643 /* Note: top 16 bits of this last word are the security checksum */
2647 rxi_PrepareSendPacket(register struct rx_call *call,
2648 register struct rx_packet *p, register int last)
2650 register struct rx_connection *conn = call->conn;
2652 ssize_t len; /* len must be a signed type; it can go negative */
2654 p->flags &= ~RX_PKTFLAG_ACKED;
2655 p->header.cid = (conn->cid | call->channel);
2656 p->header.serviceId = conn->serviceId;
2657 p->header.securityIndex = conn->securityIndex;
2659 /* No data packets on call 0. Where do these come from? */
2660 if (*call->callNumber == 0)
2661 *call->callNumber = 1;
2663 p->header.callNumber = *call->callNumber;
2664 p->header.seq = call->tnext++;
2665 p->header.epoch = conn->epoch;
2666 p->header.type = RX_PACKET_TYPE_DATA;
2667 p->header.flags = 0;
2668 p->header.spare = 0;
2669 if (conn->type == RX_CLIENT_CONNECTION)
2670 p->header.flags |= RX_CLIENT_INITIATED;
2673 p->header.flags |= RX_LAST_PACKET;
2675 clock_Zero(&p->retryTime); /* Never yet transmitted */
2676 clock_Zero(&p->firstSent); /* Never yet transmitted */
2677 p->header.serial = 0; /* Another way of saying never transmitted... */
2680 /* Now that we're sure this is the last data on the call, make sure
2681 * that the "length" and the sum of the iov_lens matches. */
2682 len = p->length + call->conn->securityHeaderSize;
2684 for (i = 1; i < p->niovecs && len > 0; i++) {
2685 len -= p->wirevec[i].iov_len;
2688 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2689 } else if (i < p->niovecs) {
2690 /* Free any extra elements in the wirevec */
2691 #if defined(RX_ENABLE_TSFPQ)
2692 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2693 #else /* !RX_ENABLE_TSFPQ */
2694 MUTEX_ENTER(&rx_freePktQ_lock);
2695 rxi_FreeDataBufsNoLock(p, i);
2696 MUTEX_EXIT(&rx_freePktQ_lock);
2697 #endif /* !RX_ENABLE_TSFPQ */
2702 p->wirevec[i - 1].iov_len += len;
2703 RXS_PreparePacket(conn->securityObject, call, p);
2706 /* Given an interface MTU size, calculate an adjusted MTU size that
2707 * will make efficient use of the RX buffers when the peer is sending
2708 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2710 rxi_AdjustIfMTU(int mtu)
2715 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2717 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2718 if (mtu <= adjMTU) {
2725 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2726 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2729 /* Given an interface MTU size, and the peer's advertised max receive
2730 * size, calculate an adjisted maxMTU size that makes efficient use
2731 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2733 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2735 int maxMTU = mtu * rxi_nSendFrags;
2736 maxMTU = MIN(maxMTU, peerMaxMTU);
2737 return rxi_AdjustIfMTU(maxMTU);
2740 /* Given a packet size, figure out how many datagram packet will fit.
2741 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2742 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2743 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2745 rxi_AdjustDgramPackets(int frags, int mtu)
2748 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2751 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2752 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2753 /* subtract the size of the first and last packets */
2754 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2758 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2763 * This function can be used by the Windows Cache Manager
2764 * to dump the list of all rx packets so that we can determine
2765 * where the packet leakage is.
2767 int rx_DumpPackets(FILE *outputFile, char *cookie)
2769 #ifdef RXDEBUG_PACKET
2771 struct rx_packet *p;
2775 MUTEX_ENTER(&rx_freePktQ_lock);
2776 sprintf(output, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2777 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2779 for (p = rx_mallocedP; p; p = p->allNextp) {
2780 sprintf(output, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2781 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2782 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2783 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2784 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2785 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2786 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2789 sprintf(output, "%s - End dumping all Rx Packets\r\n", cookie);
2790 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2792 MUTEX_EXIT(&rx_freePktQ_lock);
2794 #endif /* RXDEBUG_PACKET */
2797 #endif /* AFS_NT40_ENV */