2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 # include "afs/sysincludes.h"
23 # include "afsincludes.h"
24 # include "rx/rx_kcommon.h"
25 # include "rx/rx_clock.h"
26 # include "rx/rx_queue.h"
27 # include "rx/rx_packet.h"
28 # else /* defined(UKERNEL) */
29 # ifdef RX_KERNEL_TRACE
30 # include "../rx/rx_kcommon.h"
33 # ifndef AFS_LINUX20_ENV
36 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 # include "afs/sysincludes.h"
39 # if defined(AFS_OBSD_ENV)
42 # include "h/socket.h"
43 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 # include "sys/mount.h" /* it gets pulled in by something later anyway */
49 # include "netinet/in.h"
50 # include "afs/afs_osi.h"
51 # include "rx_kmutex.h"
52 # include "rx/rx_clock.h"
53 # include "rx/rx_queue.h"
55 # include <sys/sysmacros.h>
57 # include "rx/rx_packet.h"
58 # endif /* defined(UKERNEL) */
59 # include "rx/rx_internal.h"
60 # include "rx/rx_globals.h"
62 # include "sys/types.h"
63 # include <sys/stat.h>
65 # if defined(AFS_NT40_ENV)
66 # include <winsock2.h>
68 # define EWOULDBLOCK WSAEWOULDBLOCK
71 # include "rx_xmit_nt.h"
74 # include <sys/socket.h>
75 # include <netinet/in.h>
77 # include "rx_clock.h"
78 # include "rx_internal.h"
80 # include "rx_queue.h"
82 # include <sys/sysmacros.h>
84 # include "rx_packet.h"
85 # include "rx_globals.h"
95 /* rxdb_fileID is used to identify the lock location, along with line#. */
96 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
97 #endif /* RX_LOCKS_DB */
98 static struct rx_packet *rx_mallocedP = 0;
100 static afs_uint32 rx_packet_id = 0;
103 extern char cml_version_number[];
105 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
107 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
108 afs_int32 ahost, short aport,
111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
113 struct rx_queue * q);
114 #ifdef RX_ENABLE_TSFPQ
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 /* some rules about packets:
120 * 1. When a packet is allocated, the final iov_buf contains room for
121 * a security trailer, but iov_len masks that fact. If the security
122 * package wants to add the trailer, it may do so, and then extend
123 * iov_len appropriately. For this reason, packet's niovecs and
124 * iov_len fields should be accurate before calling PreparePacket.
128 * all packet buffers (iov_base) are integral multiples of
130 * offset is an integral multiple of the word size.
133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the word size.
151 * offset is an integral multiple of the word size.
154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
160 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
161 (offset - l))) = data;
164 l += packet->wirevec[i].iov_len;
171 * all packet buffers (iov_base) are integral multiples of the
173 * offset is an integral multiple of the word size.
175 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
181 unsigned int i, j, l, r;
182 for (l = 0, i = 1; i < packet->niovecs; i++) {
183 if (l + packet->wirevec[i].iov_len > offset) {
186 l += packet->wirevec[i].iov_len;
189 /* i is the iovec which contains the first little bit of data in which we
190 * are interested. l is the total length of everything prior to this iovec.
191 * j is the number of bytes we can safely copy out of this iovec.
192 * offset only applies to the first iovec.
195 while ((resid > 0) && (i < packet->niovecs)) {
196 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
197 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
200 l += packet->wirevec[i].iov_len;
205 return (resid ? (r - resid) : r);
210 * all packet buffers (iov_base) are integral multiples of the
212 * offset is an integral multiple of the word size.
215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
220 for (l = 0, i = 1; i < packet->niovecs; i++) {
221 if (l + packet->wirevec[i].iov_len > offset) {
224 l += packet->wirevec[i].iov_len;
227 /* i is the iovec which contains the first little bit of data in which we
228 * are interested. l is the total length of everything prior to this iovec.
229 * j is the number of bytes we can safely copy out of this iovec.
230 * offset only applies to the first iovec.
233 while ((resid > 0) && (i < RX_MAXWVECS)) {
234 if (i >= packet->niovecs)
235 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
238 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
239 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
243 l += packet->wirevec[i].iov_len;
248 return (resid ? (r - resid) : r);
252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
254 register struct rx_packet *p, *np;
256 num_pkts = AllocPacketBufs(class, num_pkts, q);
258 for (queue_Scan(q, p, np, rx_packet)) {
259 RX_PACKET_IOV_FULLINIT(p);
265 #ifdef RX_ENABLE_TSFPQ
267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
269 register struct rx_ts_info_t * rx_ts_info;
273 RX_TS_INFO_GET(rx_ts_info);
275 transfer = num_pkts - rx_ts_info->_FPQ.len;
278 MUTEX_ENTER(&rx_freePktQ_lock);
279 transfer = MAX(transfer, rx_TSFPQGlobSize);
280 if (transfer > rx_nFreePackets) {
281 /* alloc enough for us, plus a few globs for other threads */
282 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
285 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
287 MUTEX_EXIT(&rx_freePktQ_lock);
291 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
295 #else /* RX_ENABLE_TSFPQ */
297 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
308 MUTEX_ENTER(&rx_freePktQ_lock);
311 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
312 num_pkts--, overq++);
315 rxi_NeedMorePackets = TRUE;
317 case RX_PACKET_CLASS_RECEIVE:
318 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
320 case RX_PACKET_CLASS_SEND:
321 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_SPECIAL:
324 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_RECV_CBUF:
327 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
329 case RX_PACKET_CLASS_SEND_CBUF:
330 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
335 if (rx_nFreePackets < num_pkts)
336 num_pkts = rx_nFreePackets;
339 rxi_NeedMorePackets = TRUE;
343 if (rx_nFreePackets < num_pkts) {
344 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
348 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
350 i++, c=queue_Next(c, rx_packet)) {
354 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
356 rx_nFreePackets -= num_pkts;
361 MUTEX_EXIT(&rx_freePktQ_lock);
366 #endif /* RX_ENABLE_TSFPQ */
369 * Free a packet currently used as a continuation buffer
371 #ifdef RX_ENABLE_TSFPQ
372 /* num_pkts=0 means queue length is unknown */
374 rxi_FreePackets(int num_pkts, struct rx_queue * q)
376 register struct rx_ts_info_t * rx_ts_info;
377 register struct rx_packet *c, *nc;
380 osi_Assert(num_pkts >= 0);
381 RX_TS_INFO_GET(rx_ts_info);
384 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
385 rxi_FreeDataBufsTSFPQ(c, 2, 0);
388 for (queue_Scan(q, c, nc, rx_packet)) {
389 rxi_FreeDataBufsTSFPQ(c, 2, 0);
394 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
397 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
399 MUTEX_ENTER(&rx_freePktQ_lock);
401 RX_TS_FPQ_LTOG(rx_ts_info);
403 /* Wakeup anyone waiting for packets */
406 MUTEX_EXIT(&rx_freePktQ_lock);
412 #else /* RX_ENABLE_TSFPQ */
413 /* num_pkts=0 means queue length is unknown */
415 rxi_FreePackets(int num_pkts, struct rx_queue *q)
418 register struct rx_packet *p, *np;
422 osi_Assert(num_pkts >= 0);
426 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
427 if (p->niovecs > 2) {
428 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
435 for (queue_Scan(q, p, np, rx_packet)) {
436 if (p->niovecs > 2) {
437 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
444 queue_SpliceAppend(q, &cbs);
450 MUTEX_ENTER(&rx_freePktQ_lock);
452 queue_SpliceAppend(&rx_freePacketQueue, q);
453 rx_nFreePackets += qlen;
455 /* Wakeup anyone waiting for packets */
458 MUTEX_EXIT(&rx_freePktQ_lock);
463 #endif /* RX_ENABLE_TSFPQ */
465 /* this one is kind of awful.
466 * In rxkad, the packet has been all shortened, and everything, ready for
467 * sending. All of a sudden, we discover we need some of that space back.
468 * This isn't terribly general, because it knows that the packets are only
469 * rounded up to the EBS (userdata + security header).
472 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
476 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
477 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
478 p->wirevec[i].iov_len += nb;
482 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
483 p->wirevec[i].iov_len += nb;
491 /* get sufficient space to store nb bytes of data (or more), and hook
492 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
493 * returns the number of bytes >0 which it failed to come up with.
494 * Don't need to worry about locking on packet, since only
495 * one thread can manipulate one at a time. Locking on continution
496 * packets is handled by AllocPacketBufs */
497 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
499 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
503 register struct rx_packet *cb, *ncb;
505 /* compute the number of cbuf's we need */
506 nv = nb / RX_CBUFFERSIZE;
507 if ((nv * RX_CBUFFERSIZE) < nb)
509 if ((nv + p->niovecs) > RX_MAXWVECS)
510 nv = RX_MAXWVECS - p->niovecs;
514 /* allocate buffers */
516 nv = AllocPacketBufs(class, nv, &q);
518 /* setup packet iovs */
519 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
521 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
522 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
525 nb -= (nv * RX_CBUFFERSIZE);
526 p->length += (nv * RX_CBUFFERSIZE);
532 /* Add more packet buffers */
533 #ifdef RX_ENABLE_TSFPQ
535 rxi_MorePackets(int apackets)
537 struct rx_packet *p, *e;
538 register struct rx_ts_info_t * rx_ts_info;
542 getme = apackets * sizeof(struct rx_packet);
543 p = (struct rx_packet *)osi_Alloc(getme);
546 PIN(p, getme); /* XXXXX */
547 memset((char *)p, 0, getme);
548 RX_TS_INFO_GET(rx_ts_info);
550 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
551 /* TSFPQ patch also needs to keep track of total packets */
553 MUTEX_ENTER(&rx_packets_mutex);
554 rx_nPackets += apackets;
555 RX_TS_FPQ_COMPUTE_LIMITS;
556 MUTEX_EXIT(&rx_packets_mutex);
558 for (e = p + apackets; p < e; p++) {
559 RX_PACKET_IOV_INIT(p);
562 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
565 MUTEX_ENTER(&rx_freePktQ_lock);
566 #ifdef RXDEBUG_PACKET
567 p->packetId = rx_packet_id++;
568 p->allNextp = rx_mallocedP;
569 #endif /* RXDEBUG_PACKET */
571 MUTEX_EXIT(&rx_freePktQ_lock);
574 rx_ts_info->_FPQ.delta += apackets;
576 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
578 MUTEX_ENTER(&rx_freePktQ_lock);
580 RX_TS_FPQ_LTOG(rx_ts_info);
581 rxi_NeedMorePackets = FALSE;
584 MUTEX_EXIT(&rx_freePktQ_lock);
588 #else /* RX_ENABLE_TSFPQ */
590 rxi_MorePackets(int apackets)
592 struct rx_packet *p, *e;
596 getme = apackets * sizeof(struct rx_packet);
597 p = (struct rx_packet *)osi_Alloc(getme);
600 PIN(p, getme); /* XXXXX */
601 memset((char *)p, 0, getme);
603 MUTEX_ENTER(&rx_freePktQ_lock);
605 for (e = p + apackets; p < e; p++) {
606 RX_PACKET_IOV_INIT(p);
607 p->flags |= RX_PKTFLAG_FREE;
610 queue_Append(&rx_freePacketQueue, p);
611 #ifdef RXDEBUG_PACKET
612 p->packetId = rx_packet_id++;
613 p->allNextp = rx_mallocedP;
614 #endif /* RXDEBUG_PACKET */
618 rx_nFreePackets += apackets;
619 rxi_NeedMorePackets = FALSE;
622 MUTEX_EXIT(&rx_freePktQ_lock);
625 #endif /* RX_ENABLE_TSFPQ */
627 #ifdef RX_ENABLE_TSFPQ
629 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
631 struct rx_packet *p, *e;
632 register struct rx_ts_info_t * rx_ts_info;
636 getme = apackets * sizeof(struct rx_packet);
637 p = (struct rx_packet *)osi_Alloc(getme);
639 PIN(p, getme); /* XXXXX */
640 memset((char *)p, 0, getme);
641 RX_TS_INFO_GET(rx_ts_info);
643 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
644 /* TSFPQ patch also needs to keep track of total packets */
645 MUTEX_ENTER(&rx_packets_mutex);
646 rx_nPackets += apackets;
647 RX_TS_FPQ_COMPUTE_LIMITS;
648 MUTEX_EXIT(&rx_packets_mutex);
650 for (e = p + apackets; p < e; p++) {
651 RX_PACKET_IOV_INIT(p);
653 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
656 MUTEX_ENTER(&rx_freePktQ_lock);
657 #ifdef RXDEBUG_PACKET
658 p->packetId = rx_packet_id++;
659 p->allNextp = rx_mallocedP;
660 #endif /* RXDEBUG_PACKET */
662 MUTEX_EXIT(&rx_freePktQ_lock);
665 rx_ts_info->_FPQ.delta += apackets;
668 (num_keep_local < apackets)) {
670 MUTEX_ENTER(&rx_freePktQ_lock);
672 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
673 rxi_NeedMorePackets = FALSE;
676 MUTEX_EXIT(&rx_freePktQ_lock);
680 #endif /* RX_ENABLE_TSFPQ */
683 /* Add more packet buffers */
685 rxi_MorePacketsNoLock(int apackets)
687 #ifdef RX_ENABLE_TSFPQ
688 register struct rx_ts_info_t * rx_ts_info;
689 #endif /* RX_ENABLE_TSFPQ */
690 struct rx_packet *p, *e;
693 /* allocate enough packets that 1/4 of the packets will be able
694 * to hold maximal amounts of data */
695 apackets += (apackets / 4)
696 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
698 getme = apackets * sizeof(struct rx_packet);
699 p = (struct rx_packet *)osi_Alloc(getme);
701 apackets -= apackets / 4;
702 osi_Assert(apackets > 0);
705 memset((char *)p, 0, getme);
707 #ifdef RX_ENABLE_TSFPQ
708 RX_TS_INFO_GET(rx_ts_info);
709 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
710 #endif /* RX_ENABLE_TSFPQ */
712 for (e = p + apackets; p < e; p++) {
713 RX_PACKET_IOV_INIT(p);
714 p->flags |= RX_PKTFLAG_FREE;
717 queue_Append(&rx_freePacketQueue, p);
718 #ifdef RXDEBUG_PACKET
719 p->packetId = rx_packet_id++;
720 p->allNextp = rx_mallocedP;
721 #endif /* RXDEBUG_PACKET */
725 rx_nFreePackets += apackets;
726 #ifdef RX_ENABLE_TSFPQ
727 /* TSFPQ patch also needs to keep track of total packets */
728 MUTEX_ENTER(&rx_packets_mutex);
729 rx_nPackets += apackets;
730 RX_TS_FPQ_COMPUTE_LIMITS;
731 MUTEX_EXIT(&rx_packets_mutex);
732 #endif /* RX_ENABLE_TSFPQ */
733 rxi_NeedMorePackets = FALSE;
739 rxi_FreeAllPackets(void)
741 /* must be called at proper interrupt level, etcetera */
742 /* MTUXXX need to free all Packets */
743 osi_Free(rx_mallocedP,
744 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
745 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
748 #ifdef RX_ENABLE_TSFPQ
750 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
752 register struct rx_ts_info_t * rx_ts_info;
756 RX_TS_INFO_GET(rx_ts_info);
758 if (num_keep_local != rx_ts_info->_FPQ.len) {
760 MUTEX_ENTER(&rx_freePktQ_lock);
761 if (num_keep_local < rx_ts_info->_FPQ.len) {
762 xfer = rx_ts_info->_FPQ.len - num_keep_local;
763 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
766 xfer = num_keep_local - rx_ts_info->_FPQ.len;
767 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
768 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
769 if (rx_nFreePackets < xfer) {
770 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
772 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
774 MUTEX_EXIT(&rx_freePktQ_lock);
780 rxi_FlushLocalPacketsTSFPQ(void)
782 rxi_AdjustLocalPacketsTSFPQ(0, 0);
784 #endif /* RX_ENABLE_TSFPQ */
786 /* Allocate more packets iff we need more continuation buffers */
787 /* In kernel, can't page in memory with interrupts disabled, so we
788 * don't use the event mechanism. */
790 rx_CheckPackets(void)
792 if (rxi_NeedMorePackets) {
793 rxi_MorePackets(rx_initSendWindow);
797 /* In the packet freeing routine below, the assumption is that
798 we want all of the packets to be used equally frequently, so that we
799 don't get packet buffers paging out. It would be just as valid to
800 assume that we DO want them to page out if not many are being used.
801 In any event, we assume the former, and append the packets to the end
803 /* This explanation is bogus. The free list doesn't remain in any kind of
804 useful order for afs_int32: the packets in use get pretty much randomly scattered
805 across all the pages. In order to permit unused {packets,bufs} to page out, they
806 must be stored so that packets which are adjacent in memory are adjacent in the
807 free list. An array springs rapidly to mind.
810 /* Actually free the packet p. */
811 #ifdef RX_ENABLE_TSFPQ
813 rxi_FreePacketNoLock(struct rx_packet *p)
815 register struct rx_ts_info_t * rx_ts_info;
816 dpf(("Free %lx\n", (unsigned long)p));
818 RX_TS_INFO_GET(rx_ts_info);
819 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
820 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
821 RX_TS_FPQ_LTOG(rx_ts_info);
824 #else /* RX_ENABLE_TSFPQ */
826 rxi_FreePacketNoLock(struct rx_packet *p)
828 dpf(("Free %lx\n", (unsigned long)p));
832 queue_Append(&rx_freePacketQueue, p);
834 #endif /* RX_ENABLE_TSFPQ */
836 #ifdef RX_ENABLE_TSFPQ
838 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
840 register struct rx_ts_info_t * rx_ts_info;
841 dpf(("Free %lx\n", (unsigned long)p));
843 RX_TS_INFO_GET(rx_ts_info);
844 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
846 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
848 MUTEX_ENTER(&rx_freePktQ_lock);
850 RX_TS_FPQ_LTOG(rx_ts_info);
852 /* Wakeup anyone waiting for packets */
855 MUTEX_EXIT(&rx_freePktQ_lock);
859 #endif /* RX_ENABLE_TSFPQ */
862 * free continuation buffers off a packet into a queue
864 * [IN] p -- packet from which continuation buffers will be freed
865 * [IN] first -- iovec offset of first continuation buffer to free
866 * [IN] q -- queue into which continuation buffers will be chained
869 * number of continuation buffers freed
871 #ifndef RX_ENABLE_TSFPQ
873 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
876 struct rx_packet * cb;
879 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
880 iov = &p->wirevec[first];
882 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
883 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
884 RX_FPQ_MARK_FREE(cb);
895 * free packet continuation buffers into the global free packet pool
897 * [IN] p -- packet from which to free continuation buffers
898 * [IN] first -- iovec offset of first continuation buffer to free
904 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
908 for (first = MAX(2, first); first < p->niovecs; first++) {
909 iov = &p->wirevec[first];
911 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
912 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
920 #ifdef RX_ENABLE_TSFPQ
922 * free packet continuation buffers into the thread-local free pool
924 * [IN] p -- packet from which continuation buffers will be freed
925 * [IN] first -- iovec offset of first continuation buffer to free
926 * any value less than 2, the min number of iovecs,
927 * is treated as if it is 2.
928 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
929 * global free pool before returning
935 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
938 register struct rx_ts_info_t * rx_ts_info;
940 RX_TS_INFO_GET(rx_ts_info);
942 for (first = MAX(2, first); first < p->niovecs; first++) {
943 iov = &p->wirevec[first];
945 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
946 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
951 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
953 MUTEX_ENTER(&rx_freePktQ_lock);
955 RX_TS_FPQ_LTOG(rx_ts_info);
957 /* Wakeup anyone waiting for packets */
960 MUTEX_EXIT(&rx_freePktQ_lock);
965 #endif /* RX_ENABLE_TSFPQ */
967 int rxi_nBadIovecs = 0;
969 /* rxi_RestoreDataBufs
971 * Restore the correct sizes to the iovecs. Called when reusing a packet
972 * for reading off the wire.
975 rxi_RestoreDataBufs(struct rx_packet *p)
978 struct iovec *iov = &p->wirevec[2];
980 RX_PACKET_IOV_INIT(p);
982 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
983 if (!iov->iov_base) {
988 iov->iov_len = RX_CBUFFERSIZE;
992 #ifdef RX_ENABLE_TSFPQ
994 rxi_TrimDataBufs(struct rx_packet *p, int first)
997 struct iovec *iov, *end;
998 register struct rx_ts_info_t * rx_ts_info;
1002 osi_Panic("TrimDataBufs 1: first must be 1");
1004 /* Skip over continuation buffers containing message data */
1005 iov = &p->wirevec[2];
1006 end = iov + (p->niovecs - 2);
1007 length = p->length - p->wirevec[1].iov_len;
1008 for (; iov < end && length > 0; iov++) {
1010 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1011 length -= iov->iov_len;
1014 /* iov now points to the first empty data buffer. */
1018 RX_TS_INFO_GET(rx_ts_info);
1019 for (; iov < end; iov++) {
1021 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1022 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1025 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1027 MUTEX_ENTER(&rx_freePktQ_lock);
1029 RX_TS_FPQ_LTOG(rx_ts_info);
1030 rxi_PacketsUnWait();
1032 MUTEX_EXIT(&rx_freePktQ_lock);
1038 #else /* RX_ENABLE_TSFPQ */
1040 rxi_TrimDataBufs(struct rx_packet *p, int first)
1043 struct iovec *iov, *end;
1047 osi_Panic("TrimDataBufs 1: first must be 1");
1049 /* Skip over continuation buffers containing message data */
1050 iov = &p->wirevec[2];
1051 end = iov + (p->niovecs - 2);
1052 length = p->length - p->wirevec[1].iov_len;
1053 for (; iov < end && length > 0; iov++) {
1055 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1056 length -= iov->iov_len;
1059 /* iov now points to the first empty data buffer. */
1064 MUTEX_ENTER(&rx_freePktQ_lock);
1066 for (; iov < end; iov++) {
1068 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1069 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1072 rxi_PacketsUnWait();
1074 MUTEX_EXIT(&rx_freePktQ_lock);
1079 #endif /* RX_ENABLE_TSFPQ */
1081 /* Free the packet p. P is assumed not to be on any queue, i.e.
1082 * remove it yourself first if you call this routine. */
1083 #ifdef RX_ENABLE_TSFPQ
1085 rxi_FreePacket(struct rx_packet *p)
1087 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1088 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1090 #else /* RX_ENABLE_TSFPQ */
1092 rxi_FreePacket(struct rx_packet *p)
1097 MUTEX_ENTER(&rx_freePktQ_lock);
1099 rxi_FreeDataBufsNoLock(p, 2);
1100 rxi_FreePacketNoLock(p);
1101 /* Wakeup anyone waiting for packets */
1102 rxi_PacketsUnWait();
1104 MUTEX_EXIT(&rx_freePktQ_lock);
1107 #endif /* RX_ENABLE_TSFPQ */
1109 /* rxi_AllocPacket sets up p->length so it reflects the number of
1110 * bytes in the packet at this point, **not including** the header.
1111 * The header is absolutely necessary, besides, this is the way the
1112 * length field is usually used */
1113 #ifdef RX_ENABLE_TSFPQ
1115 rxi_AllocPacketNoLock(int class)
1117 register struct rx_packet *p;
1118 register struct rx_ts_info_t * rx_ts_info;
1120 RX_TS_INFO_GET(rx_ts_info);
1123 if (rxi_OverQuota(class)) {
1124 rxi_NeedMorePackets = TRUE;
1126 case RX_PACKET_CLASS_RECEIVE:
1127 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1129 case RX_PACKET_CLASS_SEND:
1130 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1132 case RX_PACKET_CLASS_SPECIAL:
1133 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1135 case RX_PACKET_CLASS_RECV_CBUF:
1136 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1138 case RX_PACKET_CLASS_SEND_CBUF:
1139 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1142 return (struct rx_packet *)0;
1146 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1147 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1150 if (queue_IsEmpty(&rx_freePacketQueue))
1151 osi_Panic("rxi_AllocPacket error");
1153 if (queue_IsEmpty(&rx_freePacketQueue))
1154 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1158 RX_TS_FPQ_GTOL(rx_ts_info);
1161 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1163 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1166 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1167 * order to truncate outbound packets. In the near future, may need
1168 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1170 RX_PACKET_IOV_FULLINIT(p);
1173 #else /* RX_ENABLE_TSFPQ */
1175 rxi_AllocPacketNoLock(int class)
1177 register struct rx_packet *p;
1180 if (rxi_OverQuota(class)) {
1181 rxi_NeedMorePackets = TRUE;
1183 case RX_PACKET_CLASS_RECEIVE:
1184 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1186 case RX_PACKET_CLASS_SEND:
1187 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1189 case RX_PACKET_CLASS_SPECIAL:
1190 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1192 case RX_PACKET_CLASS_RECV_CBUF:
1193 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1195 case RX_PACKET_CLASS_SEND_CBUF:
1196 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1199 return (struct rx_packet *)0;
1203 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1206 if (queue_IsEmpty(&rx_freePacketQueue))
1207 osi_Panic("rxi_AllocPacket error");
1209 if (queue_IsEmpty(&rx_freePacketQueue))
1210 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1214 p = queue_First(&rx_freePacketQueue, rx_packet);
1216 RX_FPQ_MARK_USED(p);
1218 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1221 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1222 * order to truncate outbound packets. In the near future, may need
1223 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1225 RX_PACKET_IOV_FULLINIT(p);
1228 #endif /* RX_ENABLE_TSFPQ */
1230 #ifdef RX_ENABLE_TSFPQ
1232 rxi_AllocPacketTSFPQ(int class, int pull_global)
1234 register struct rx_packet *p;
1235 register struct rx_ts_info_t * rx_ts_info;
1237 RX_TS_INFO_GET(rx_ts_info);
1239 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1240 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1241 MUTEX_ENTER(&rx_freePktQ_lock);
1243 if (queue_IsEmpty(&rx_freePacketQueue))
1244 rxi_MorePacketsNoLock(4 * rx_initSendWindow);
1246 RX_TS_FPQ_GTOL(rx_ts_info);
1248 MUTEX_EXIT(&rx_freePktQ_lock);
1249 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1253 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1255 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1257 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1258 * order to truncate outbound packets. In the near future, may need
1259 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1261 RX_PACKET_IOV_FULLINIT(p);
1264 #endif /* RX_ENABLE_TSFPQ */
1266 #ifdef RX_ENABLE_TSFPQ
1268 rxi_AllocPacket(int class)
1270 register struct rx_packet *p;
1272 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1275 #else /* RX_ENABLE_TSFPQ */
1277 rxi_AllocPacket(int class)
1279 register struct rx_packet *p;
1281 MUTEX_ENTER(&rx_freePktQ_lock);
1282 p = rxi_AllocPacketNoLock(class);
1283 MUTEX_EXIT(&rx_freePktQ_lock);
1286 #endif /* RX_ENABLE_TSFPQ */
1288 /* This guy comes up with as many buffers as it {takes,can get} given
1289 * the MTU for this call. It also sets the packet length before
1290 * returning. caution: this is often called at NETPRI
1291 * Called with call locked.
1294 rxi_AllocSendPacket(register struct rx_call *call, int want)
1296 register struct rx_packet *p = (struct rx_packet *)0;
1298 register unsigned delta;
1301 mud = call->MTU - RX_HEADER_SIZE;
1303 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1304 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1306 #ifdef RX_ENABLE_TSFPQ
1307 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1309 want = MIN(want, mud);
1311 if ((unsigned)want > p->length)
1312 (void)rxi_AllocDataBuf(p, (want - p->length),
1313 RX_PACKET_CLASS_SEND_CBUF);
1315 if ((unsigned)p->length > mud)
1318 if (delta >= p->length) {
1326 #endif /* RX_ENABLE_TSFPQ */
1328 while (!(call->error)) {
1329 MUTEX_ENTER(&rx_freePktQ_lock);
1330 /* if an error occurred, or we get the packet we want, we're done */
1331 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1332 MUTEX_EXIT(&rx_freePktQ_lock);
1335 want = MIN(want, mud);
1337 if ((unsigned)want > p->length)
1338 (void)rxi_AllocDataBuf(p, (want - p->length),
1339 RX_PACKET_CLASS_SEND_CBUF);
1341 if ((unsigned)p->length > mud)
1344 if (delta >= p->length) {
1353 /* no error occurred, and we didn't get a packet, so we sleep.
1354 * At this point, we assume that packets will be returned
1355 * sooner or later, as packets are acknowledged, and so we
1358 call->flags |= RX_CALL_WAIT_PACKETS;
1359 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1360 MUTEX_EXIT(&call->lock);
1361 rx_waitingForPackets = 1;
1363 #ifdef RX_ENABLE_LOCKS
1364 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1366 osi_rxSleep(&rx_waitingForPackets);
1368 MUTEX_EXIT(&rx_freePktQ_lock);
1369 MUTEX_ENTER(&call->lock);
1370 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1371 call->flags &= ~RX_CALL_WAIT_PACKETS;
1380 /* Windows does not use file descriptors. */
1381 #define CountFDs(amax) 0
1383 /* count the number of used FDs */
1385 CountFDs(register int amax)
1388 register int i, code;
1392 for (i = 0; i < amax; i++) {
1393 code = fstat(i, &tstat);
1399 #endif /* AFS_NT40_ENV */
1402 #define CountFDs(amax) amax
1406 #if !defined(KERNEL) || defined(UKERNEL)
1408 /* This function reads a single packet from the interface into the
1409 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1410 * (host,port) of the sender are stored in the supplied variables, and
1411 * the data length of the packet is stored in the packet structure.
1412 * The header is decoded. */
1414 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1417 struct sockaddr_in from;
1420 register afs_int32 tlen, savelen;
1422 rx_computelen(p, tlen);
1423 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1425 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1426 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1427 * it once in order to avoid races. */
1430 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1438 /* Extend the last iovec for padding, it's just to make sure that the
1439 * read doesn't return more data than we expect, and is done to get around
1440 * our problems caused by the lack of a length field in the rx header.
1441 * Use the extra buffer that follows the localdata in each packet
1443 savelen = p->wirevec[p->niovecs - 1].iov_len;
1444 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1446 memset((char *)&msg, 0, sizeof(msg));
1447 msg.msg_name = (char *)&from;
1448 msg.msg_namelen = sizeof(struct sockaddr_in);
1449 msg.msg_iov = p->wirevec;
1450 msg.msg_iovlen = p->niovecs;
1451 nbytes = rxi_Recvmsg(socket, &msg, 0);
1453 /* restore the vec to its correct state */
1454 p->wirevec[p->niovecs - 1].iov_len = savelen;
1456 p->length = (nbytes - RX_HEADER_SIZE);
1457 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1458 if (nbytes < 0 && errno == EWOULDBLOCK) {
1459 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1460 } else if (nbytes <= 0) {
1461 MUTEX_ENTER(&rx_stats_mutex);
1462 rx_stats.bogusPacketOnRead++;
1463 rx_stats.bogusHost = from.sin_addr.s_addr;
1464 MUTEX_EXIT(&rx_stats_mutex);
1465 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1466 ntohs(from.sin_port), nbytes));
1471 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1472 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1473 rxi_DecodePacketHeader(p);
1475 *host = from.sin_addr.s_addr;
1476 *port = from.sin_port;
1478 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1479 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1480 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1482 rxi_TrimDataBufs(p, 1);
1487 /* Extract packet header. */
1488 rxi_DecodePacketHeader(p);
1490 *host = from.sin_addr.s_addr;
1491 *port = from.sin_port;
1492 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1493 struct rx_peer *peer;
1494 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1496 * Try to look up this peer structure. If it doesn't exist,
1497 * don't create a new one -
1498 * we don't keep count of the bytes sent/received if a peer
1499 * structure doesn't already exist.
1501 * The peer/connection cleanup code assumes that there is 1 peer
1502 * per connection. If we actually created a peer structure here
1503 * and this packet was an rxdebug packet, the peer structure would
1504 * never be cleaned up.
1506 peer = rxi_FindPeer(*host, *port, 0, 0);
1507 /* Since this may not be associated with a connection,
1508 * it may have no refCount, meaning we could race with
1511 if (peer && (peer->refCount > 0)) {
1512 MUTEX_ENTER(&peer->peer_lock);
1513 hadd32(peer->bytesReceived, p->length);
1514 MUTEX_EXIT(&peer->peer_lock);
1518 /* Free any empty packet buffers at the end of this packet */
1519 rxi_TrimDataBufs(p, 1);
1525 #endif /* !KERNEL || UKERNEL */
1527 /* This function splits off the first packet in a jumbo packet.
1528 * As of AFS 3.5, jumbograms contain more than one fixed size
1529 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1530 * last packet header. All packets (except the last) are padded to
1531 * fall on RX_CBUFFERSIZE boundaries.
1532 * HACK: We store the length of the first n-1 packets in the
1533 * last two pad bytes. */
1536 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1539 struct rx_packet *np;
1540 struct rx_jumboHeader *jp;
1546 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1547 * bytes in length. All but the first packet are preceded by
1548 * an abbreviated four byte header. The length of the last packet
1549 * is calculated from the size of the jumbogram. */
1550 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1552 if ((int)p->length < length) {
1553 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1556 niov = p->niovecs - 2;
1558 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1561 iov = &p->wirevec[2];
1562 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1564 /* Get a pointer to the abbreviated packet header */
1565 jp = (struct rx_jumboHeader *)
1566 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1568 /* Set up the iovecs for the next packet */
1569 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1570 np->wirevec[0].iov_len = sizeof(struct rx_header);
1571 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1572 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1573 np->niovecs = niov + 1;
1574 for (i = 2, iov++; i <= niov; i++, iov++) {
1575 np->wirevec[i] = *iov;
1577 np->length = p->length - length;
1578 p->length = RX_JUMBOBUFFERSIZE;
1581 /* Convert the jumbo packet header to host byte order */
1582 temp = ntohl(*(afs_uint32 *) jp);
1583 jp->flags = (u_char) (temp >> 24);
1584 jp->cksum = (u_short) (temp);
1586 /* Fill in the packet header */
1587 np->header = p->header;
1588 np->header.serial = p->header.serial + 1;
1589 np->header.seq = p->header.seq + 1;
1590 np->header.flags = jp->flags;
1591 np->header.spare = jp->cksum;
1597 /* Send a udp datagram */
1599 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1600 int length, int istack)
1605 memset(&msg, 0, sizeof(msg));
1607 msg.msg_iovlen = nvecs;
1608 msg.msg_name = addr;
1609 msg.msg_namelen = sizeof(struct sockaddr_in);
1611 ret = rxi_Sendmsg(socket, &msg, 0);
1615 #elif !defined(UKERNEL)
1617 * message receipt is done in rxk_input or rx_put.
1620 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1622 * Copy an mblock to the contiguous area pointed to by cp.
1623 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1624 * but it doesn't really.
1625 * Returns the number of bytes not transferred.
1626 * The message is NOT changed.
1629 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1633 for (; mp && len > 0; mp = mp->b_cont) {
1634 if (mp->b_datap->db_type != M_DATA) {
1637 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1638 memcpy(cp, (char *)mp->b_rptr, n);
1646 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1647 * but it doesn't really.
1648 * This sucks, anyway, do it like m_cpy.... below
1651 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1654 register int m, n, o, t, i;
1656 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1657 if (mp->b_datap->db_type != M_DATA) {
1660 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1666 t = iovs[i].iov_len;
1669 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1679 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1680 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1682 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1684 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1687 unsigned int l1, l2, i, t;
1689 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1690 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1693 if (m->m_len <= off) {
1703 p1 = mtod(m, caddr_t) + off;
1704 l1 = m->m_len - off;
1706 p2 = iovs[0].iov_base;
1707 l2 = iovs[0].iov_len;
1710 t = MIN(l1, MIN(l2, (unsigned int)len));
1721 p1 = mtod(m, caddr_t);
1727 p2 = iovs[i].iov_base;
1728 l2 = iovs[i].iov_len;
1736 #endif /* AFS_SUN5_ENV */
1738 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1740 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1741 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1747 struct rx_packet *phandle;
1748 int hdr_len, data_len;
1753 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1760 #endif /*KERNEL && !UKERNEL */
1763 /* send a response to a debug packet */
1766 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1767 afs_int32 ahost, short aport, int istack)
1769 struct rx_debugIn tin;
1771 struct rx_serverQueueEntry *np, *nqe;
1774 * Only respond to client-initiated Rx debug packets,
1775 * and clear the client flag in the response.
1777 if (ap->header.flags & RX_CLIENT_INITIATED) {
1778 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1779 rxi_EncodePacketHeader(ap);
1784 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1785 /* all done with packet, now set length to the truth, so we can
1786 * reuse this packet */
1787 rx_computelen(ap, ap->length);
1789 tin.type = ntohl(tin.type);
1790 tin.index = ntohl(tin.index);
1792 case RX_DEBUGI_GETSTATS:{
1793 struct rx_debugStats tstat;
1795 /* get basic stats */
1796 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1797 tstat.version = RX_DEBUGI_VERSION;
1798 #ifndef RX_ENABLE_LOCKS
1799 tstat.waitingForPackets = rx_waitingForPackets;
1801 MUTEX_ENTER(&rx_serverPool_lock);
1802 tstat.nFreePackets = htonl(rx_nFreePackets);
1803 tstat.nPackets = htonl(rx_nPackets);
1804 tstat.callsExecuted = htonl(rxi_nCalls);
1805 tstat.packetReclaims = htonl(rx_packetReclaims);
1806 tstat.usedFDs = CountFDs(64);
1807 tstat.nWaiting = htonl(rx_nWaiting);
1808 tstat.nWaited = htonl(rx_nWaited);
1809 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1811 MUTEX_EXIT(&rx_serverPool_lock);
1812 tstat.idleThreads = htonl(tstat.idleThreads);
1813 tl = sizeof(struct rx_debugStats) - ap->length;
1815 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1818 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1820 ap->length = sizeof(struct rx_debugStats);
1821 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1822 rx_computelen(ap, ap->length);
1827 case RX_DEBUGI_GETALLCONN:
1828 case RX_DEBUGI_GETCONN:{
1830 register struct rx_connection *tc;
1831 struct rx_call *tcall;
1832 struct rx_debugConn tconn;
1833 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1836 tl = sizeof(struct rx_debugConn) - ap->length;
1838 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1842 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1843 /* get N'th (maybe) "interesting" connection info */
1844 for (i = 0; i < rx_hashTableSize; i++) {
1845 #if !defined(KERNEL)
1846 /* the time complexity of the algorithm used here
1847 * exponentially increses with the number of connections.
1849 #ifdef AFS_PTHREAD_ENV
1855 MUTEX_ENTER(&rx_connHashTable_lock);
1856 /* We might be slightly out of step since we are not
1857 * locking each call, but this is only debugging output.
1859 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1860 if ((all || rxi_IsConnInteresting(tc))
1861 && tin.index-- <= 0) {
1862 tconn.host = tc->peer->host;
1863 tconn.port = tc->peer->port;
1864 tconn.cid = htonl(tc->cid);
1865 tconn.epoch = htonl(tc->epoch);
1866 tconn.serial = htonl(tc->serial);
1867 for (j = 0; j < RX_MAXCALLS; j++) {
1868 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1869 if ((tcall = tc->call[j])) {
1870 tconn.callState[j] = tcall->state;
1871 tconn.callMode[j] = tcall->mode;
1872 tconn.callFlags[j] = tcall->flags;
1873 if (queue_IsNotEmpty(&tcall->rq))
1874 tconn.callOther[j] |= RX_OTHER_IN;
1875 if (queue_IsNotEmpty(&tcall->tq))
1876 tconn.callOther[j] |= RX_OTHER_OUT;
1878 tconn.callState[j] = RX_STATE_NOTINIT;
1881 tconn.natMTU = htonl(tc->peer->natMTU);
1882 tconn.error = htonl(tc->error);
1883 tconn.flags = tc->flags;
1884 tconn.type = tc->type;
1885 tconn.securityIndex = tc->securityIndex;
1886 if (tc->securityObject) {
1887 RXS_GetStats(tc->securityObject, tc,
1889 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1890 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1893 DOHTONL(packetsReceived);
1894 DOHTONL(packetsSent);
1895 DOHTONL(bytesReceived);
1899 sizeof(tconn.secStats.spares) /
1904 sizeof(tconn.secStats.sparel) /
1905 sizeof(afs_int32); i++)
1909 MUTEX_EXIT(&rx_connHashTable_lock);
1910 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1913 ap->length = sizeof(struct rx_debugConn);
1914 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1920 MUTEX_EXIT(&rx_connHashTable_lock);
1922 /* if we make it here, there are no interesting packets */
1923 tconn.cid = htonl(0xffffffff); /* means end */
1924 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1927 ap->length = sizeof(struct rx_debugConn);
1928 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1934 * Pass back all the peer structures we have available
1937 case RX_DEBUGI_GETPEER:{
1939 register struct rx_peer *tp;
1940 struct rx_debugPeer tpeer;
1943 tl = sizeof(struct rx_debugPeer) - ap->length;
1945 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1949 memset((char *)&tpeer, 0, sizeof(tpeer));
1950 for (i = 0; i < rx_hashTableSize; i++) {
1951 #if !defined(KERNEL)
1952 /* the time complexity of the algorithm used here
1953 * exponentially increses with the number of peers.
1955 * Yielding after processing each hash table entry
1956 * and dropping rx_peerHashTable_lock.
1957 * also increases the risk that we will miss a new
1958 * entry - but we are willing to live with this
1959 * limitation since this is meant for debugging only
1961 #ifdef AFS_PTHREAD_ENV
1967 MUTEX_ENTER(&rx_peerHashTable_lock);
1968 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1969 if (tin.index-- <= 0) {
1970 tpeer.host = tp->host;
1971 tpeer.port = tp->port;
1972 tpeer.ifMTU = htons(tp->ifMTU);
1973 tpeer.idleWhen = htonl(tp->idleWhen);
1974 tpeer.refCount = htons(tp->refCount);
1975 tpeer.burstSize = tp->burstSize;
1976 tpeer.burst = tp->burst;
1977 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1978 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1979 tpeer.rtt = htonl(tp->rtt);
1980 tpeer.rtt_dev = htonl(tp->rtt_dev);
1981 tpeer.timeout.sec = htonl(tp->timeout.sec);
1982 tpeer.timeout.usec = htonl(tp->timeout.usec);
1983 tpeer.nSent = htonl(tp->nSent);
1984 tpeer.reSends = htonl(tp->reSends);
1985 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1986 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1987 tpeer.rateFlag = htonl(tp->rateFlag);
1988 tpeer.natMTU = htons(tp->natMTU);
1989 tpeer.maxMTU = htons(tp->maxMTU);
1990 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1991 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1992 tpeer.MTU = htons(tp->MTU);
1993 tpeer.cwind = htons(tp->cwind);
1994 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1995 tpeer.congestSeq = htons(tp->congestSeq);
1996 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1997 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1998 tpeer.bytesReceived.high =
1999 htonl(tp->bytesReceived.high);
2000 tpeer.bytesReceived.low =
2001 htonl(tp->bytesReceived.low);
2003 MUTEX_EXIT(&rx_peerHashTable_lock);
2004 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2007 ap->length = sizeof(struct rx_debugPeer);
2008 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2014 MUTEX_EXIT(&rx_peerHashTable_lock);
2016 /* if we make it here, there are no interesting packets */
2017 tpeer.host = htonl(0xffffffff); /* means end */
2018 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2021 ap->length = sizeof(struct rx_debugPeer);
2022 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2027 case RX_DEBUGI_RXSTATS:{
2031 tl = sizeof(rx_stats) - ap->length;
2033 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2037 /* Since its all int32s convert to network order with a loop. */
2038 MUTEX_ENTER(&rx_stats_mutex);
2039 s = (afs_int32 *) & rx_stats;
2040 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2041 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2044 ap->length = sizeof(rx_stats);
2045 MUTEX_EXIT(&rx_stats_mutex);
2046 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2052 /* error response packet */
2053 tin.type = htonl(RX_DEBUGI_BADTYPE);
2054 tin.index = tin.type;
2055 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2057 ap->length = sizeof(struct rx_debugIn);
2058 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2066 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2067 afs_int32 ahost, short aport, int istack)
2072 * Only respond to client-initiated version requests, and
2073 * clear that flag in the response.
2075 if (ap->header.flags & RX_CLIENT_INITIATED) {
2078 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2079 rxi_EncodePacketHeader(ap);
2080 memset(buf, 0, sizeof(buf));
2081 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2082 rx_packetwrite(ap, 0, 65, buf);
2085 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2093 /* send a debug packet back to the sender */
2095 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2096 afs_int32 ahost, short aport, afs_int32 istack)
2098 struct sockaddr_in taddr;
2104 int waslocked = ISAFS_GLOCK();
2107 taddr.sin_family = AF_INET;
2108 taddr.sin_port = aport;
2109 taddr.sin_addr.s_addr = ahost;
2110 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2111 taddr.sin_len = sizeof(struct sockaddr_in);
2114 /* We need to trim the niovecs. */
2115 nbytes = apacket->length;
2116 for (i = 1; i < apacket->niovecs; i++) {
2117 if (nbytes <= apacket->wirevec[i].iov_len) {
2118 savelen = apacket->wirevec[i].iov_len;
2119 saven = apacket->niovecs;
2120 apacket->wirevec[i].iov_len = nbytes;
2121 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2123 nbytes -= apacket->wirevec[i].iov_len;
2126 #ifdef RX_KERNEL_TRACE
2127 if (ICL_SETACTIVE(afs_iclSetp)) {
2130 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2131 "before osi_NetSend()");
2139 /* debug packets are not reliably delivered, hence the cast below. */
2140 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2141 apacket->length + RX_HEADER_SIZE, istack);
2143 #ifdef RX_KERNEL_TRACE
2144 if (ICL_SETACTIVE(afs_iclSetp)) {
2146 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2147 "after osi_NetSend()");
2156 if (saven) { /* means we truncated the packet above. */
2157 apacket->wirevec[i - 1].iov_len = savelen;
2158 apacket->niovecs = saven;
2163 /* Send the packet to appropriate destination for the specified
2164 * call. The header is first encoded and placed in the packet.
2167 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2168 struct rx_packet *p, int istack)
2174 struct sockaddr_in addr;
2175 register struct rx_peer *peer = conn->peer;
2178 char deliveryType = 'S';
2180 /* The address we're sending the packet to */
2181 memset(&addr, 0, sizeof(addr));
2182 addr.sin_family = AF_INET;
2183 addr.sin_port = peer->port;
2184 addr.sin_addr.s_addr = peer->host;
2186 /* This stuff should be revamped, I think, so that most, if not
2187 * all, of the header stuff is always added here. We could
2188 * probably do away with the encode/decode routines. XXXXX */
2190 /* Stamp each packet with a unique serial number. The serial
2191 * number is maintained on a connection basis because some types
2192 * of security may be based on the serial number of the packet,
2193 * and security is handled on a per authenticated-connection
2195 /* Pre-increment, to guarantee no zero serial number; a zero
2196 * serial number means the packet was never sent. */
2197 MUTEX_ENTER(&conn->conn_data_lock);
2198 p->header.serial = ++conn->serial;
2199 MUTEX_EXIT(&conn->conn_data_lock);
2200 /* This is so we can adjust retransmit time-outs better in the face of
2201 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2203 if (p->firstSerial == 0) {
2204 p->firstSerial = p->header.serial;
2207 /* If an output tracer function is defined, call it with the packet and
2208 * network address. Note this function may modify its arguments. */
2209 if (rx_almostSent) {
2210 int drop = (*rx_almostSent) (p, &addr);
2211 /* drop packet if return value is non-zero? */
2213 deliveryType = 'D'; /* Drop the packet */
2217 /* Get network byte order header */
2218 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2219 * touch ALL the fields */
2221 /* Send the packet out on the same socket that related packets are being
2225 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2228 /* Possibly drop this packet, for testing purposes */
2229 if ((deliveryType == 'D')
2230 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2231 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2232 deliveryType = 'D'; /* Drop the packet */
2234 deliveryType = 'S'; /* Send the packet */
2235 #endif /* RXDEBUG */
2237 /* Loop until the packet is sent. We'd prefer just to use a
2238 * blocking socket, but unfortunately the interface doesn't
2239 * allow us to have the socket block in send mode, and not
2240 * block in receive mode */
2242 waslocked = ISAFS_GLOCK();
2243 #ifdef RX_KERNEL_TRACE
2244 if (ICL_SETACTIVE(afs_iclSetp)) {
2247 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2248 "before osi_NetSend()");
2257 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2258 p->length + RX_HEADER_SIZE, istack)) != 0) {
2259 /* send failed, so let's hurry up the resend, eh? */
2260 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2261 p->retryTime = p->timeSent; /* resend it very soon */
2262 clock_Addmsec(&(p->retryTime),
2263 10 + (((afs_uint32) p->backoff) << 8));
2264 /* Some systems are nice and tell us right away that we cannot
2265 * reach this recipient by returning an error code.
2266 * So, when this happens let's "down" the host NOW so
2267 * we don't sit around waiting for this host to timeout later.
2271 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2272 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2273 code == -ENETUNREACH
2274 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2275 code == EHOSTUNREACH
2280 call->lastReceiveTime = 0;
2283 #ifdef RX_KERNEL_TRACE
2284 if (ICL_SETACTIVE(afs_iclSetp)) {
2286 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2287 "after osi_NetSend()");
2298 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2300 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2301 MUTEX_ENTER(&peer->peer_lock);
2302 hadd32(peer->bytesSent, p->length);
2303 MUTEX_EXIT(&peer->peer_lock);
2306 /* Send a list of packets to appropriate destination for the specified
2307 * connection. The headers are first encoded and placed in the packets.
2310 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2311 struct rx_packet **list, int len, int istack)
2313 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2316 struct sockaddr_in addr;
2317 register struct rx_peer *peer = conn->peer;
2319 struct rx_packet *p = NULL;
2320 struct iovec wirevec[RX_MAXIOVECS];
2321 int i, length, code;
2324 struct rx_jumboHeader *jp;
2326 char deliveryType = 'S';
2328 /* The address we're sending the packet to */
2329 addr.sin_family = AF_INET;
2330 addr.sin_port = peer->port;
2331 addr.sin_addr.s_addr = peer->host;
2333 if (len + 1 > RX_MAXIOVECS) {
2334 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2338 * Stamp the packets in this jumbogram with consecutive serial numbers
2340 MUTEX_ENTER(&conn->conn_data_lock);
2341 serial = conn->serial;
2342 conn->serial += len;
2343 MUTEX_EXIT(&conn->conn_data_lock);
2346 /* This stuff should be revamped, I think, so that most, if not
2347 * all, of the header stuff is always added here. We could
2348 * probably do away with the encode/decode routines. XXXXX */
2351 length = RX_HEADER_SIZE;
2352 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2353 wirevec[0].iov_len = RX_HEADER_SIZE;
2354 for (i = 0; i < len; i++) {
2357 /* The whole 3.5 jumbogram scheme relies on packets fitting
2358 * in a single packet buffer. */
2359 if (p->niovecs > 2) {
2360 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2363 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2366 if (p->length != RX_JUMBOBUFFERSIZE) {
2367 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2369 p->header.flags |= RX_JUMBO_PACKET;
2370 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2371 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2373 wirevec[i + 1].iov_len = p->length;
2374 length += p->length;
2376 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2378 /* Convert jumbo packet header to network byte order */
2379 temp = (afs_uint32) (p->header.flags) << 24;
2380 temp |= (afs_uint32) (p->header.spare);
2381 *(afs_uint32 *) jp = htonl(temp);
2383 jp = (struct rx_jumboHeader *)
2384 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2386 /* Stamp each packet with a unique serial number. The serial
2387 * number is maintained on a connection basis because some types
2388 * of security may be based on the serial number of the packet,
2389 * and security is handled on a per authenticated-connection
2391 /* Pre-increment, to guarantee no zero serial number; a zero
2392 * serial number means the packet was never sent. */
2393 p->header.serial = ++serial;
2394 /* This is so we can adjust retransmit time-outs better in the face of
2395 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2397 if (p->firstSerial == 0) {
2398 p->firstSerial = p->header.serial;
2401 /* If an output tracer function is defined, call it with the packet and
2402 * network address. Note this function may modify its arguments. */
2403 if (rx_almostSent) {
2404 int drop = (*rx_almostSent) (p, &addr);
2405 /* drop packet if return value is non-zero? */
2407 deliveryType = 'D'; /* Drop the packet */
2411 /* Get network byte order header */
2412 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2413 * touch ALL the fields */
2416 /* Send the packet out on the same socket that related packets are being
2420 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2423 /* Possibly drop this packet, for testing purposes */
2424 if ((deliveryType == 'D')
2425 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2426 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2427 deliveryType = 'D'; /* Drop the packet */
2429 deliveryType = 'S'; /* Send the packet */
2430 #endif /* RXDEBUG */
2432 /* Loop until the packet is sent. We'd prefer just to use a
2433 * blocking socket, but unfortunately the interface doesn't
2434 * allow us to have the socket block in send mode, and not
2435 * block in receive mode */
2436 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2437 waslocked = ISAFS_GLOCK();
2438 if (!istack && waslocked)
2442 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2444 /* send failed, so let's hurry up the resend, eh? */
2445 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2446 for (i = 0; i < len; i++) {
2448 p->retryTime = p->timeSent; /* resend it very soon */
2449 clock_Addmsec(&(p->retryTime),
2450 10 + (((afs_uint32) p->backoff) << 8));
2452 /* Some systems are nice and tell us right away that we cannot
2453 * reach this recipient by returning an error code.
2454 * So, when this happens let's "down" the host NOW so
2455 * we don't sit around waiting for this host to timeout later.
2459 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2460 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2461 code == -ENETUNREACH
2462 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2463 code == EHOSTUNREACH
2468 call->lastReceiveTime = 0;
2470 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2471 if (!istack && waslocked)
2479 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2482 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2483 MUTEX_ENTER(&peer->peer_lock);
2484 hadd32(peer->bytesSent, p->length);
2485 MUTEX_EXIT(&peer->peer_lock);
2489 /* Send a "special" packet to the peer connection. If call is
2490 * specified, then the packet is directed to a specific call channel
2491 * associated with the connection, otherwise it is directed to the
2492 * connection only. Uses optionalPacket if it is supplied, rather than
2493 * allocating a new packet buffer. Nbytes is the length of the data
2494 * portion of the packet. If data is non-null, nbytes of data are
2495 * copied into the packet. Type is the type of the packet, as defined
2496 * in rx.h. Bug: there's a lot of duplication between this and other
2497 * routines. This needs to be cleaned up. */
2499 rxi_SendSpecial(register struct rx_call *call,
2500 register struct rx_connection *conn,
2501 struct rx_packet *optionalPacket, int type, char *data,
2502 int nbytes, int istack)
2504 /* Some of the following stuff should be common code for all
2505 * packet sends (it's repeated elsewhere) */
2506 register struct rx_packet *p;
2508 int savelen = 0, saven = 0;
2509 int channel, callNumber;
2511 channel = call->channel;
2512 callNumber = *call->callNumber;
2513 /* BUSY packets refer to the next call on this connection */
2514 if (type == RX_PACKET_TYPE_BUSY) {
2523 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2525 osi_Panic("rxi_SendSpecial failure");
2532 p->header.serviceId = conn->serviceId;
2533 p->header.securityIndex = conn->securityIndex;
2534 p->header.cid = (conn->cid | channel);
2535 p->header.callNumber = callNumber;
2537 p->header.epoch = conn->epoch;
2538 p->header.type = type;
2539 p->header.flags = 0;
2540 if (conn->type == RX_CLIENT_CONNECTION)
2541 p->header.flags |= RX_CLIENT_INITIATED;
2543 rx_packetwrite(p, 0, nbytes, data);
2545 for (i = 1; i < p->niovecs; i++) {
2546 if (nbytes <= p->wirevec[i].iov_len) {
2547 savelen = p->wirevec[i].iov_len;
2549 p->wirevec[i].iov_len = nbytes;
2550 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2552 nbytes -= p->wirevec[i].iov_len;
2556 rxi_Send(call, p, istack);
2558 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2559 if (saven) { /* means we truncated the packet above. We probably don't */
2560 /* really need to do this, but it seems safer this way, given that */
2561 /* sneaky optionalPacket... */
2562 p->wirevec[i - 1].iov_len = savelen;
2565 if (!optionalPacket)
2567 return optionalPacket;
2571 /* Encode the packet's header (from the struct header in the packet to
2572 * the net byte order representation in the wire representation of the
2573 * packet, which is what is actually sent out on the wire) */
2575 rxi_EncodePacketHeader(register struct rx_packet *p)
2577 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2579 memset((char *)buf, 0, RX_HEADER_SIZE);
2580 *buf++ = htonl(p->header.epoch);
2581 *buf++ = htonl(p->header.cid);
2582 *buf++ = htonl(p->header.callNumber);
2583 *buf++ = htonl(p->header.seq);
2584 *buf++ = htonl(p->header.serial);
2585 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2586 | (((afs_uint32) p->header.flags) << 16)
2587 | (p->header.userStatus << 8) | p->header.securityIndex);
2588 /* Note: top 16 bits of this next word were reserved */
2589 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2592 /* Decode the packet's header (from net byte order to a struct header) */
2594 rxi_DecodePacketHeader(register struct rx_packet *p)
2596 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2599 p->header.epoch = ntohl(*buf);
2601 p->header.cid = ntohl(*buf);
2603 p->header.callNumber = ntohl(*buf);
2605 p->header.seq = ntohl(*buf);
2607 p->header.serial = ntohl(*buf);
2613 /* C will truncate byte fields to bytes for me */
2614 p->header.type = temp >> 24;
2615 p->header.flags = temp >> 16;
2616 p->header.userStatus = temp >> 8;
2617 p->header.securityIndex = temp >> 0;
2622 p->header.serviceId = (temp & 0xffff);
2623 p->header.spare = temp >> 16;
2624 /* Note: top 16 bits of this last word are the security checksum */
2628 rxi_PrepareSendPacket(register struct rx_call *call,
2629 register struct rx_packet *p, register int last)
2631 register struct rx_connection *conn = call->conn;
2633 ssize_t len; /* len must be a signed type; it can go negative */
2635 p->flags &= ~RX_PKTFLAG_ACKED;
2636 p->header.cid = (conn->cid | call->channel);
2637 p->header.serviceId = conn->serviceId;
2638 p->header.securityIndex = conn->securityIndex;
2640 /* No data packets on call 0. Where do these come from? */
2641 if (*call->callNumber == 0)
2642 *call->callNumber = 1;
2644 p->header.callNumber = *call->callNumber;
2645 p->header.seq = call->tnext++;
2646 p->header.epoch = conn->epoch;
2647 p->header.type = RX_PACKET_TYPE_DATA;
2648 p->header.flags = 0;
2649 p->header.spare = 0;
2650 if (conn->type == RX_CLIENT_CONNECTION)
2651 p->header.flags |= RX_CLIENT_INITIATED;
2654 p->header.flags |= RX_LAST_PACKET;
2656 clock_Zero(&p->retryTime); /* Never yet transmitted */
2657 clock_Zero(&p->firstSent); /* Never yet transmitted */
2658 p->header.serial = 0; /* Another way of saying never transmitted... */
2661 /* Now that we're sure this is the last data on the call, make sure
2662 * that the "length" and the sum of the iov_lens matches. */
2663 len = p->length + call->conn->securityHeaderSize;
2665 for (i = 1; i < p->niovecs && len > 0; i++) {
2666 len -= p->wirevec[i].iov_len;
2669 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2670 } else if (i < p->niovecs) {
2671 /* Free any extra elements in the wirevec */
2672 #if defined(RX_ENABLE_TSFPQ)
2673 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2674 #else /* !RX_ENABLE_TSFPQ */
2675 MUTEX_ENTER(&rx_freePktQ_lock);
2676 rxi_FreeDataBufsNoLock(p, i);
2677 MUTEX_EXIT(&rx_freePktQ_lock);
2678 #endif /* !RX_ENABLE_TSFPQ */
2683 p->wirevec[i - 1].iov_len += len;
2684 RXS_PreparePacket(conn->securityObject, call, p);
2687 /* Given an interface MTU size, calculate an adjusted MTU size that
2688 * will make efficient use of the RX buffers when the peer is sending
2689 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2691 rxi_AdjustIfMTU(int mtu)
2696 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2698 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2699 if (mtu <= adjMTU) {
2706 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2707 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2710 /* Given an interface MTU size, and the peer's advertised max receive
2711 * size, calculate an adjisted maxMTU size that makes efficient use
2712 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2714 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2716 int maxMTU = mtu * rxi_nSendFrags;
2717 maxMTU = MIN(maxMTU, peerMaxMTU);
2718 return rxi_AdjustIfMTU(maxMTU);
2721 /* Given a packet size, figure out how many datagram packet will fit.
2722 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2723 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2724 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2726 rxi_AdjustDgramPackets(int frags, int mtu)
2729 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2732 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2733 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2734 /* subtract the size of the first and last packets */
2735 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2739 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2744 * This function can be used by the Windows Cache Manager
2745 * to dump the list of all rx packets so that we can determine
2746 * where the packet leakage is.
2748 int rx_DumpPackets(FILE *outputFile, char *cookie)
2750 #ifdef RXDEBUG_PACKET
2752 struct rx_packet *p;
2756 MUTEX_ENTER(&rx_freePktQ_lock);
2757 sprintf(output, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2758 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2760 for (p = rx_mallocedP; p; p = p->allNextp) {
2761 sprintf(output, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2762 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2763 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2764 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2765 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2766 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2767 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2770 sprintf(output, "%s - End dumping all Rx Packets\r\n", cookie);
2771 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2773 MUTEX_EXIT(&rx_freePktQ_lock);
2775 #endif /* RXDEBUG_PACKET */
2778 #endif /* AFS_NT40_ENV */