2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
46 # if defined(AFS_NT40_ENV)
48 # define EWOULDBLOCK WSAEWOULDBLOCK
51 # include "rx_xmit_nt.h"
57 # include <sys/sysmacros.h>
63 #include "rx_packet.h"
64 #include "rx_atomic.h"
65 #include "rx_globals.h"
66 #include "rx_internal.h"
74 /* rxdb_fileID is used to identify the lock location, along with line#. */
75 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
76 #endif /* RX_LOCKS_DB */
77 static struct rx_packet *rx_mallocedP = 0;
79 static afs_uint32 rx_packet_id = 0;
82 extern char cml_version_number[];
84 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
86 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
87 afs_uint32 ahost, short aport,
89 static struct rx_packet *rxi_AllocPacketNoLock(int class);
92 static void rxi_MorePacketsNoLock(int apackets);
95 #ifdef RX_ENABLE_TSFPQ
96 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
98 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
99 int allow_overcommit);
101 static void rxi_FreePacketNoLock(struct rx_packet *p);
102 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
103 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
104 struct rx_queue * q);
107 /* some rules about packets:
108 * 1. When a packet is allocated, the final iov_buf contains room for
109 * a security trailer, but iov_len masks that fact. If the security
110 * package wants to add the trailer, it may do so, and then extend
111 * iov_len appropriately. For this reason, packet's niovecs and
112 * iov_len fields should be accurate before calling PreparePacket.
116 * all packet buffers (iov_base) are integral multiples of
118 * offset is an integral multiple of the word size.
121 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
125 for (l = 0, i = 1; i < packet->niovecs; i++) {
126 if (l + packet->wirevec[i].iov_len > offset) {
128 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
131 l += packet->wirevec[i].iov_len;
138 * all packet buffers (iov_base) are integral multiples of the word size.
139 * offset is an integral multiple of the word size.
142 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
146 for (l = 0, i = 1; i < packet->niovecs; i++) {
147 if (l + packet->wirevec[i].iov_len > offset) {
148 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
149 (offset - l))) = data;
152 l += packet->wirevec[i].iov_len;
159 * all packet buffers (iov_base) are integral multiples of the
161 * offset is an integral multiple of the word size.
163 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
166 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
169 unsigned int i, j, l, r;
170 for (l = 0, i = 1; i < packet->niovecs; i++) {
171 if (l + packet->wirevec[i].iov_len > offset) {
174 l += packet->wirevec[i].iov_len;
177 /* i is the iovec which contains the first little bit of data in which we
178 * are interested. l is the total length of everything prior to this iovec.
179 * j is the number of bytes we can safely copy out of this iovec.
180 * offset only applies to the first iovec.
183 while ((r > 0) && (i < packet->niovecs)) {
184 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
185 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
188 l += packet->wirevec[i].iov_len;
193 return (r ? (resid - r) : resid);
198 * all packet buffers (iov_base) are integral multiples of the
200 * offset is an integral multiple of the word size.
203 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
205 unsigned int i, j, l, o, r;
208 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
209 if (l + packet->wirevec[i].iov_len > o) {
212 l += packet->wirevec[i].iov_len;
215 /* i is the iovec which contains the first little bit of data in which we
216 * are interested. l is the total length of everything prior to this iovec.
217 * j is the number of bytes we can safely copy out of this iovec.
218 * offset only applies to the first iovec.
221 while ((r > 0) && (i <= RX_MAXWVECS)) {
222 if (i >= packet->niovecs)
223 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
226 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
227 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
231 l += packet->wirevec[i].iov_len;
236 return (r ? (resid - r) : resid);
240 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
242 struct rx_packet *p, *np;
244 num_pkts = AllocPacketBufs(class, num_pkts, q);
246 for (queue_Scan(q, p, np, rx_packet)) {
247 RX_PACKET_IOV_FULLINIT(p);
253 #ifdef RX_ENABLE_TSFPQ
255 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
257 struct rx_ts_info_t * rx_ts_info;
261 RX_TS_INFO_GET(rx_ts_info);
263 transfer = num_pkts - rx_ts_info->_FPQ.len;
266 MUTEX_ENTER(&rx_freePktQ_lock);
267 transfer = MAX(transfer, rx_TSFPQGlobSize);
268 if (transfer > rx_nFreePackets) {
269 /* alloc enough for us, plus a few globs for other threads */
270 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
273 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
275 MUTEX_EXIT(&rx_freePktQ_lock);
279 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
283 #else /* RX_ENABLE_TSFPQ */
285 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
296 MUTEX_ENTER(&rx_freePktQ_lock);
299 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
300 num_pkts--, overq++);
303 rxi_NeedMorePackets = TRUE;
304 if (rx_stats_active) {
306 case RX_PACKET_CLASS_RECEIVE:
307 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
309 case RX_PACKET_CLASS_SEND:
310 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
312 case RX_PACKET_CLASS_SPECIAL:
313 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
315 case RX_PACKET_CLASS_RECV_CBUF:
316 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
318 case RX_PACKET_CLASS_SEND_CBUF:
319 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
325 if (rx_nFreePackets < num_pkts)
326 num_pkts = rx_nFreePackets;
329 rxi_NeedMorePackets = TRUE;
333 if (rx_nFreePackets < num_pkts) {
334 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
338 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
340 i++, c=queue_Next(c, rx_packet)) {
344 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
346 rx_nFreePackets -= num_pkts;
351 MUTEX_EXIT(&rx_freePktQ_lock);
356 #endif /* RX_ENABLE_TSFPQ */
359 * Free a packet currently used as a continuation buffer
361 #ifdef RX_ENABLE_TSFPQ
362 /* num_pkts=0 means queue length is unknown */
364 rxi_FreePackets(int num_pkts, struct rx_queue * q)
366 struct rx_ts_info_t * rx_ts_info;
367 struct rx_packet *c, *nc;
370 osi_Assert(num_pkts >= 0);
371 RX_TS_INFO_GET(rx_ts_info);
374 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
375 rxi_FreeDataBufsTSFPQ(c, 2, 0);
378 for (queue_Scan(q, c, nc, rx_packet)) {
379 rxi_FreeDataBufsTSFPQ(c, 2, 0);
384 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
387 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
389 MUTEX_ENTER(&rx_freePktQ_lock);
391 RX_TS_FPQ_LTOG(rx_ts_info);
393 /* Wakeup anyone waiting for packets */
396 MUTEX_EXIT(&rx_freePktQ_lock);
402 #else /* RX_ENABLE_TSFPQ */
403 /* num_pkts=0 means queue length is unknown */
405 rxi_FreePackets(int num_pkts, struct rx_queue *q)
408 struct rx_packet *p, *np;
412 osi_Assert(num_pkts >= 0);
416 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
417 if (p->niovecs > 2) {
418 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
425 for (queue_Scan(q, p, np, rx_packet)) {
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
434 queue_SpliceAppend(q, &cbs);
440 MUTEX_ENTER(&rx_freePktQ_lock);
442 queue_SpliceAppend(&rx_freePacketQueue, q);
443 rx_nFreePackets += qlen;
445 /* Wakeup anyone waiting for packets */
448 MUTEX_EXIT(&rx_freePktQ_lock);
453 #endif /* RX_ENABLE_TSFPQ */
455 /* this one is kind of awful.
456 * In rxkad, the packet has been all shortened, and everything, ready for
457 * sending. All of a sudden, we discover we need some of that space back.
458 * This isn't terribly general, because it knows that the packets are only
459 * rounded up to the EBS (userdata + security header).
462 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
466 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
467 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
468 p->wirevec[i].iov_len += nb;
472 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
473 p->wirevec[i].iov_len += nb;
481 /* get sufficient space to store nb bytes of data (or more), and hook
482 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
483 * returns the number of bytes >0 which it failed to come up with.
484 * Don't need to worry about locking on packet, since only
485 * one thread can manipulate one at a time. Locking on continution
486 * packets is handled by AllocPacketBufs */
487 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
489 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
493 struct rx_packet *cb, *ncb;
495 /* compute the number of cbuf's we need */
496 nv = nb / RX_CBUFFERSIZE;
497 if ((nv * RX_CBUFFERSIZE) < nb)
499 if ((nv + p->niovecs) > RX_MAXWVECS)
500 nv = RX_MAXWVECS - p->niovecs;
504 /* allocate buffers */
506 nv = AllocPacketBufs(class, nv, &q);
508 /* setup packet iovs */
509 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
511 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
512 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
515 nb -= (nv * RX_CBUFFERSIZE);
516 p->length += (nv * RX_CBUFFERSIZE);
522 /* Add more packet buffers */
523 #ifdef RX_ENABLE_TSFPQ
525 rxi_MorePackets(int apackets)
527 struct rx_packet *p, *e;
528 struct rx_ts_info_t * rx_ts_info;
532 getme = apackets * sizeof(struct rx_packet);
533 p = osi_Alloc(getme);
536 PIN(p, getme); /* XXXXX */
538 RX_TS_INFO_GET(rx_ts_info);
540 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
541 /* TSFPQ patch also needs to keep track of total packets */
543 MUTEX_ENTER(&rx_packets_mutex);
544 rx_nPackets += apackets;
545 RX_TS_FPQ_COMPUTE_LIMITS;
546 MUTEX_EXIT(&rx_packets_mutex);
548 for (e = p + apackets; p < e; p++) {
549 RX_PACKET_IOV_INIT(p);
552 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
555 MUTEX_ENTER(&rx_freePktQ_lock);
556 #ifdef RXDEBUG_PACKET
557 p->packetId = rx_packet_id++;
558 p->allNextp = rx_mallocedP;
559 #endif /* RXDEBUG_PACKET */
561 MUTEX_EXIT(&rx_freePktQ_lock);
564 rx_ts_info->_FPQ.delta += apackets;
566 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
568 MUTEX_ENTER(&rx_freePktQ_lock);
570 RX_TS_FPQ_LTOG(rx_ts_info);
571 rxi_NeedMorePackets = FALSE;
574 MUTEX_EXIT(&rx_freePktQ_lock);
578 #else /* RX_ENABLE_TSFPQ */
580 rxi_MorePackets(int apackets)
582 struct rx_packet *p, *e;
586 getme = apackets * sizeof(struct rx_packet);
587 p = osi_Alloc(getme);
590 PIN(p, getme); /* XXXXX */
593 MUTEX_ENTER(&rx_freePktQ_lock);
595 for (e = p + apackets; p < e; p++) {
596 RX_PACKET_IOV_INIT(p);
597 #ifdef RX_TRACK_PACKETS
598 p->flags |= RX_PKTFLAG_FREE;
602 queue_Append(&rx_freePacketQueue, p);
603 #ifdef RXDEBUG_PACKET
604 p->packetId = rx_packet_id++;
605 p->allNextp = rx_mallocedP;
606 #endif /* RXDEBUG_PACKET */
610 rx_nPackets += apackets;
611 rx_nFreePackets += apackets;
612 rxi_NeedMorePackets = FALSE;
615 MUTEX_EXIT(&rx_freePktQ_lock);
618 #endif /* RX_ENABLE_TSFPQ */
620 #ifdef RX_ENABLE_TSFPQ
622 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
624 struct rx_packet *p, *e;
625 struct rx_ts_info_t * rx_ts_info;
629 getme = apackets * sizeof(struct rx_packet);
630 p = osi_Alloc(getme);
632 PIN(p, getme); /* XXXXX */
634 RX_TS_INFO_GET(rx_ts_info);
636 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
637 /* TSFPQ patch also needs to keep track of total packets */
638 MUTEX_ENTER(&rx_packets_mutex);
639 rx_nPackets += apackets;
640 RX_TS_FPQ_COMPUTE_LIMITS;
641 MUTEX_EXIT(&rx_packets_mutex);
643 for (e = p + apackets; p < e; p++) {
644 RX_PACKET_IOV_INIT(p);
646 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
649 MUTEX_ENTER(&rx_freePktQ_lock);
650 #ifdef RXDEBUG_PACKET
651 p->packetId = rx_packet_id++;
652 p->allNextp = rx_mallocedP;
653 #endif /* RXDEBUG_PACKET */
655 MUTEX_EXIT(&rx_freePktQ_lock);
658 rx_ts_info->_FPQ.delta += apackets;
661 (num_keep_local < apackets)) {
663 MUTEX_ENTER(&rx_freePktQ_lock);
665 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
666 rxi_NeedMorePackets = FALSE;
669 MUTEX_EXIT(&rx_freePktQ_lock);
673 #endif /* RX_ENABLE_TSFPQ */
676 /* Add more packet buffers */
678 rxi_MorePacketsNoLock(int apackets)
680 #ifdef RX_ENABLE_TSFPQ
681 struct rx_ts_info_t * rx_ts_info;
682 #endif /* RX_ENABLE_TSFPQ */
683 struct rx_packet *p, *e;
686 /* allocate enough packets that 1/4 of the packets will be able
687 * to hold maximal amounts of data */
688 apackets += (apackets / 4)
689 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
691 getme = apackets * sizeof(struct rx_packet);
692 p = osi_Alloc(getme);
694 apackets -= apackets / 4;
695 osi_Assert(apackets > 0);
700 #ifdef RX_ENABLE_TSFPQ
701 RX_TS_INFO_GET(rx_ts_info);
702 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
703 #endif /* RX_ENABLE_TSFPQ */
705 for (e = p + apackets; p < e; p++) {
706 RX_PACKET_IOV_INIT(p);
707 #ifdef RX_TRACK_PACKETS
708 p->flags |= RX_PKTFLAG_FREE;
712 queue_Append(&rx_freePacketQueue, p);
713 #ifdef RXDEBUG_PACKET
714 p->packetId = rx_packet_id++;
715 p->allNextp = rx_mallocedP;
716 #endif /* RXDEBUG_PACKET */
720 rx_nFreePackets += apackets;
721 MUTEX_ENTER(&rx_packets_mutex);
722 rx_nPackets += apackets;
723 #ifdef RX_ENABLE_TSFPQ
724 RX_TS_FPQ_COMPUTE_LIMITS;
725 #endif /* RX_ENABLE_TSFPQ */
726 MUTEX_EXIT(&rx_packets_mutex);
727 rxi_NeedMorePackets = FALSE;
733 rxi_FreeAllPackets(void)
735 /* must be called at proper interrupt level, etcetera */
736 /* MTUXXX need to free all Packets */
737 osi_Free(rx_mallocedP,
738 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
739 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
742 #ifdef RX_ENABLE_TSFPQ
744 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
746 struct rx_ts_info_t * rx_ts_info;
750 RX_TS_INFO_GET(rx_ts_info);
752 if (num_keep_local != rx_ts_info->_FPQ.len) {
754 MUTEX_ENTER(&rx_freePktQ_lock);
755 if (num_keep_local < rx_ts_info->_FPQ.len) {
756 xfer = rx_ts_info->_FPQ.len - num_keep_local;
757 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
760 xfer = num_keep_local - rx_ts_info->_FPQ.len;
761 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
762 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
763 if (rx_nFreePackets < xfer) {
764 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
766 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
768 MUTEX_EXIT(&rx_freePktQ_lock);
774 rxi_FlushLocalPacketsTSFPQ(void)
776 rxi_AdjustLocalPacketsTSFPQ(0, 0);
778 #endif /* RX_ENABLE_TSFPQ */
780 /* Allocate more packets iff we need more continuation buffers */
781 /* In kernel, can't page in memory with interrupts disabled, so we
782 * don't use the event mechanism. */
784 rx_CheckPackets(void)
786 if (rxi_NeedMorePackets) {
787 rxi_MorePackets(rx_maxSendWindow);
791 /* In the packet freeing routine below, the assumption is that
792 we want all of the packets to be used equally frequently, so that we
793 don't get packet buffers paging out. It would be just as valid to
794 assume that we DO want them to page out if not many are being used.
795 In any event, we assume the former, and append the packets to the end
797 /* This explanation is bogus. The free list doesn't remain in any kind of
798 useful order for afs_int32: the packets in use get pretty much randomly scattered
799 across all the pages. In order to permit unused {packets,bufs} to page out, they
800 must be stored so that packets which are adjacent in memory are adjacent in the
801 free list. An array springs rapidly to mind.
804 /* Actually free the packet p. */
805 #ifndef RX_ENABLE_TSFPQ
807 rxi_FreePacketNoLock(struct rx_packet *p)
809 dpf(("Free %"AFS_PTR_FMT"\n", p));
813 queue_Append(&rx_freePacketQueue, p);
815 #endif /* RX_ENABLE_TSFPQ */
817 #ifdef RX_ENABLE_TSFPQ
819 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
821 struct rx_ts_info_t * rx_ts_info;
822 dpf(("Free %"AFS_PTR_FMT"\n", p));
824 RX_TS_INFO_GET(rx_ts_info);
825 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
827 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
829 MUTEX_ENTER(&rx_freePktQ_lock);
831 RX_TS_FPQ_LTOG(rx_ts_info);
833 /* Wakeup anyone waiting for packets */
836 MUTEX_EXIT(&rx_freePktQ_lock);
840 #endif /* RX_ENABLE_TSFPQ */
843 * free continuation buffers off a packet into a queue
845 * [IN] p -- packet from which continuation buffers will be freed
846 * [IN] first -- iovec offset of first continuation buffer to free
847 * [IN] q -- queue into which continuation buffers will be chained
850 * number of continuation buffers freed
852 #ifndef RX_ENABLE_TSFPQ
854 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
857 struct rx_packet * cb;
860 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
861 iov = &p->wirevec[first];
863 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
864 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
865 RX_FPQ_MARK_FREE(cb);
875 * free packet continuation buffers into the global free packet pool
877 * [IN] p -- packet from which to free continuation buffers
878 * [IN] first -- iovec offset of first continuation buffer to free
884 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
888 for (first = MAX(2, first); first < p->niovecs; first++) {
889 iov = &p->wirevec[first];
891 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
892 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
903 * free packet continuation buffers into the thread-local free pool
905 * [IN] p -- packet from which continuation buffers will be freed
906 * [IN] first -- iovec offset of first continuation buffer to free
907 * any value less than 2, the min number of iovecs,
908 * is treated as if it is 2.
909 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
910 * global free pool before returning
916 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
919 struct rx_ts_info_t * rx_ts_info;
921 RX_TS_INFO_GET(rx_ts_info);
923 for (first = MAX(2, first); first < p->niovecs; first++) {
924 iov = &p->wirevec[first];
926 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
927 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
932 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
934 MUTEX_ENTER(&rx_freePktQ_lock);
936 RX_TS_FPQ_LTOG(rx_ts_info);
938 /* Wakeup anyone waiting for packets */
941 MUTEX_EXIT(&rx_freePktQ_lock);
946 #endif /* RX_ENABLE_TSFPQ */
948 int rxi_nBadIovecs = 0;
950 /* rxi_RestoreDataBufs
952 * Restore the correct sizes to the iovecs. Called when reusing a packet
953 * for reading off the wire.
956 rxi_RestoreDataBufs(struct rx_packet *p)
961 RX_PACKET_IOV_INIT(p);
963 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
964 if (!iov->iov_base) {
969 iov->iov_len = RX_CBUFFERSIZE;
973 #ifdef RX_ENABLE_TSFPQ
975 rxi_TrimDataBufs(struct rx_packet *p, int first)
978 struct iovec *iov, *end;
979 struct rx_ts_info_t * rx_ts_info;
983 osi_Panic("TrimDataBufs 1: first must be 1");
985 /* Skip over continuation buffers containing message data */
986 iov = &p->wirevec[2];
987 end = iov + (p->niovecs - 2);
988 length = p->length - p->wirevec[1].iov_len;
989 for (; iov < end && length > 0; iov++) {
991 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
992 length -= iov->iov_len;
995 /* iov now points to the first empty data buffer. */
999 RX_TS_INFO_GET(rx_ts_info);
1000 for (; iov < end; iov++) {
1002 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1003 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1006 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1008 MUTEX_ENTER(&rx_freePktQ_lock);
1010 RX_TS_FPQ_LTOG(rx_ts_info);
1011 rxi_PacketsUnWait();
1013 MUTEX_EXIT(&rx_freePktQ_lock);
1019 #else /* RX_ENABLE_TSFPQ */
1021 rxi_TrimDataBufs(struct rx_packet *p, int first)
1024 struct iovec *iov, *end;
1028 osi_Panic("TrimDataBufs 1: first must be 1");
1030 /* Skip over continuation buffers containing message data */
1031 iov = &p->wirevec[2];
1032 end = iov + (p->niovecs - 2);
1033 length = p->length - p->wirevec[1].iov_len;
1034 for (; iov < end && length > 0; iov++) {
1036 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1037 length -= iov->iov_len;
1040 /* iov now points to the first empty data buffer. */
1045 MUTEX_ENTER(&rx_freePktQ_lock);
1047 for (; iov < end; iov++) {
1049 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1050 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1053 rxi_PacketsUnWait();
1055 MUTEX_EXIT(&rx_freePktQ_lock);
1060 #endif /* RX_ENABLE_TSFPQ */
1062 /* Free the packet p. P is assumed not to be on any queue, i.e.
1063 * remove it yourself first if you call this routine. */
1064 #ifdef RX_ENABLE_TSFPQ
1066 rxi_FreePacket(struct rx_packet *p)
1068 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1069 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1071 #else /* RX_ENABLE_TSFPQ */
1073 rxi_FreePacket(struct rx_packet *p)
1078 MUTEX_ENTER(&rx_freePktQ_lock);
1080 rxi_FreeDataBufsNoLock(p, 2);
1081 rxi_FreePacketNoLock(p);
1082 /* Wakeup anyone waiting for packets */
1083 rxi_PacketsUnWait();
1085 MUTEX_EXIT(&rx_freePktQ_lock);
1088 #endif /* RX_ENABLE_TSFPQ */
1090 /* rxi_AllocPacket sets up p->length so it reflects the number of
1091 * bytes in the packet at this point, **not including** the header.
1092 * The header is absolutely necessary, besides, this is the way the
1093 * length field is usually used */
1094 #ifdef RX_ENABLE_TSFPQ
1095 static struct rx_packet *
1096 rxi_AllocPacketNoLock(int class)
1098 struct rx_packet *p;
1099 struct rx_ts_info_t * rx_ts_info;
1101 RX_TS_INFO_GET(rx_ts_info);
1104 if (rxi_OverQuota(class)) {
1105 rxi_NeedMorePackets = TRUE;
1106 if (rx_stats_active) {
1108 case RX_PACKET_CLASS_RECEIVE:
1109 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1111 case RX_PACKET_CLASS_SEND:
1112 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1114 case RX_PACKET_CLASS_SPECIAL:
1115 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1117 case RX_PACKET_CLASS_RECV_CBUF:
1118 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1120 case RX_PACKET_CLASS_SEND_CBUF:
1121 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1125 return (struct rx_packet *)0;
1129 if (rx_stats_active)
1130 rx_atomic_inc(&rx_stats.packetRequests);
1131 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1134 if (queue_IsEmpty(&rx_freePacketQueue))
1135 osi_Panic("rxi_AllocPacket error");
1137 if (queue_IsEmpty(&rx_freePacketQueue))
1138 rxi_MorePacketsNoLock(rx_maxSendWindow);
1142 RX_TS_FPQ_GTOL(rx_ts_info);
1145 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1147 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1150 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1151 * order to truncate outbound packets. In the near future, may need
1152 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1154 RX_PACKET_IOV_FULLINIT(p);
1157 #else /* RX_ENABLE_TSFPQ */
1158 static struct rx_packet *
1159 rxi_AllocPacketNoLock(int class)
1161 struct rx_packet *p;
1164 if (rxi_OverQuota(class)) {
1165 rxi_NeedMorePackets = TRUE;
1166 if (rx_stats_active) {
1168 case RX_PACKET_CLASS_RECEIVE:
1169 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1171 case RX_PACKET_CLASS_SEND:
1172 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1174 case RX_PACKET_CLASS_SPECIAL:
1175 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1177 case RX_PACKET_CLASS_RECV_CBUF:
1178 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1180 case RX_PACKET_CLASS_SEND_CBUF:
1181 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1185 return (struct rx_packet *)0;
1189 if (rx_stats_active)
1190 rx_atomic_inc(&rx_stats.packetRequests);
1193 if (queue_IsEmpty(&rx_freePacketQueue))
1194 osi_Panic("rxi_AllocPacket error");
1196 if (queue_IsEmpty(&rx_freePacketQueue))
1197 rxi_MorePacketsNoLock(rx_maxSendWindow);
1201 p = queue_First(&rx_freePacketQueue, rx_packet);
1203 RX_FPQ_MARK_USED(p);
1205 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1208 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1209 * order to truncate outbound packets. In the near future, may need
1210 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1212 RX_PACKET_IOV_FULLINIT(p);
1215 #endif /* RX_ENABLE_TSFPQ */
1217 #ifdef RX_ENABLE_TSFPQ
1218 static struct rx_packet *
1219 rxi_AllocPacketTSFPQ(int class, int pull_global)
1221 struct rx_packet *p;
1222 struct rx_ts_info_t * rx_ts_info;
1224 RX_TS_INFO_GET(rx_ts_info);
1226 if (rx_stats_active)
1227 rx_atomic_inc(&rx_stats.packetRequests);
1228 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1229 MUTEX_ENTER(&rx_freePktQ_lock);
1231 if (queue_IsEmpty(&rx_freePacketQueue))
1232 rxi_MorePacketsNoLock(rx_maxSendWindow);
1234 RX_TS_FPQ_GTOL(rx_ts_info);
1236 MUTEX_EXIT(&rx_freePktQ_lock);
1237 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1241 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1243 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1245 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1246 * order to truncate outbound packets. In the near future, may need
1247 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1249 RX_PACKET_IOV_FULLINIT(p);
1252 #endif /* RX_ENABLE_TSFPQ */
1254 #ifdef RX_ENABLE_TSFPQ
1256 rxi_AllocPacket(int class)
1258 struct rx_packet *p;
1260 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1263 #else /* RX_ENABLE_TSFPQ */
1265 rxi_AllocPacket(int class)
1267 struct rx_packet *p;
1269 MUTEX_ENTER(&rx_freePktQ_lock);
1270 p = rxi_AllocPacketNoLock(class);
1271 MUTEX_EXIT(&rx_freePktQ_lock);
1274 #endif /* RX_ENABLE_TSFPQ */
1276 /* This guy comes up with as many buffers as it {takes,can get} given
1277 * the MTU for this call. It also sets the packet length before
1278 * returning. caution: this is often called at NETPRI
1279 * Called with call locked.
1282 rxi_AllocSendPacket(struct rx_call *call, int want)
1284 struct rx_packet *p = (struct rx_packet *)0;
1289 mud = call->MTU - RX_HEADER_SIZE;
1291 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1292 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1294 #ifdef RX_ENABLE_TSFPQ
1295 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1297 want = MIN(want, mud);
1299 if ((unsigned)want > p->length)
1300 (void)rxi_AllocDataBuf(p, (want - p->length),
1301 RX_PACKET_CLASS_SEND_CBUF);
1303 if (p->length > mud)
1306 if (delta >= p->length) {
1314 #endif /* RX_ENABLE_TSFPQ */
1316 while (!(call->error)) {
1317 MUTEX_ENTER(&rx_freePktQ_lock);
1318 /* if an error occurred, or we get the packet we want, we're done */
1319 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1320 MUTEX_EXIT(&rx_freePktQ_lock);
1323 want = MIN(want, mud);
1325 if ((unsigned)want > p->length)
1326 (void)rxi_AllocDataBuf(p, (want - p->length),
1327 RX_PACKET_CLASS_SEND_CBUF);
1329 if (p->length > mud)
1332 if (delta >= p->length) {
1341 /* no error occurred, and we didn't get a packet, so we sleep.
1342 * At this point, we assume that packets will be returned
1343 * sooner or later, as packets are acknowledged, and so we
1346 call->flags |= RX_CALL_WAIT_PACKETS;
1347 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1348 MUTEX_EXIT(&call->lock);
1349 rx_waitingForPackets = 1;
1351 #ifdef RX_ENABLE_LOCKS
1352 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1354 osi_rxSleep(&rx_waitingForPackets);
1356 MUTEX_EXIT(&rx_freePktQ_lock);
1357 MUTEX_ENTER(&call->lock);
1358 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1359 call->flags &= ~RX_CALL_WAIT_PACKETS;
1368 /* Windows does not use file descriptors. */
1369 #define CountFDs(amax) 0
1371 /* count the number of used FDs */
1380 for (i = 0; i < amax; i++) {
1381 code = fstat(i, &tstat);
1387 #endif /* AFS_NT40_ENV */
1390 #define CountFDs(amax) amax
1394 #if !defined(KERNEL) || defined(UKERNEL)
1396 /* This function reads a single packet from the interface into the
1397 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1398 * (host,port) of the sender are stored in the supplied variables, and
1399 * the data length of the packet is stored in the packet structure.
1400 * The header is decoded. */
1402 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1405 struct sockaddr_in from;
1408 afs_uint32 tlen, savelen;
1410 rx_computelen(p, tlen);
1411 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1413 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1414 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1415 * it once in order to avoid races. */
1418 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1426 /* Extend the last iovec for padding, it's just to make sure that the
1427 * read doesn't return more data than we expect, and is done to get around
1428 * our problems caused by the lack of a length field in the rx header.
1429 * Use the extra buffer that follows the localdata in each packet
1431 savelen = p->wirevec[p->niovecs - 1].iov_len;
1432 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1434 memset(&msg, 0, sizeof(msg));
1435 msg.msg_name = (char *)&from;
1436 msg.msg_namelen = sizeof(struct sockaddr_in);
1437 msg.msg_iov = p->wirevec;
1438 msg.msg_iovlen = p->niovecs;
1439 nbytes = rxi_Recvmsg(socket, &msg, 0);
1441 /* restore the vec to its correct state */
1442 p->wirevec[p->niovecs - 1].iov_len = savelen;
1444 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1445 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1446 if (nbytes < 0 && errno == EWOULDBLOCK) {
1447 if (rx_stats_active)
1448 rx_atomic_inc(&rx_stats.noPacketOnRead);
1449 } else if (nbytes <= 0) {
1450 if (rx_stats_active) {
1451 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1452 rx_stats.bogusHost = from.sin_addr.s_addr;
1454 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1455 ntohs(from.sin_port), nbytes));
1460 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1461 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1462 rxi_DecodePacketHeader(p);
1464 *host = from.sin_addr.s_addr;
1465 *port = from.sin_port;
1467 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1468 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1469 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1471 #ifdef RX_TRIMDATABUFS
1472 rxi_TrimDataBufs(p, 1);
1478 /* Extract packet header. */
1479 rxi_DecodePacketHeader(p);
1481 *host = from.sin_addr.s_addr;
1482 *port = from.sin_port;
1484 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1486 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1489 #ifdef RX_TRIMDATABUFS
1490 /* Free any empty packet buffers at the end of this packet */
1491 rxi_TrimDataBufs(p, 1);
1497 #endif /* !KERNEL || UKERNEL */
1499 /* This function splits off the first packet in a jumbo packet.
1500 * As of AFS 3.5, jumbograms contain more than one fixed size
1501 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1502 * last packet header. All packets (except the last) are padded to
1503 * fall on RX_CBUFFERSIZE boundaries.
1504 * HACK: We store the length of the first n-1 packets in the
1505 * last two pad bytes. */
1508 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1511 struct rx_packet *np;
1512 struct rx_jumboHeader *jp;
1518 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1519 * bytes in length. All but the first packet are preceded by
1520 * an abbreviated four byte header. The length of the last packet
1521 * is calculated from the size of the jumbogram. */
1522 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1524 if ((int)p->length < length) {
1525 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1528 niov = p->niovecs - 2;
1530 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1533 iov = &p->wirevec[2];
1534 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1536 /* Get a pointer to the abbreviated packet header */
1537 jp = (struct rx_jumboHeader *)
1538 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1540 /* Set up the iovecs for the next packet */
1541 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1542 np->wirevec[0].iov_len = sizeof(struct rx_header);
1543 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1544 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1545 np->niovecs = niov + 1;
1546 for (i = 2, iov++; i <= niov; i++, iov++) {
1547 np->wirevec[i] = *iov;
1549 np->length = p->length - length;
1550 p->length = RX_JUMBOBUFFERSIZE;
1553 /* Convert the jumbo packet header to host byte order */
1554 temp = ntohl(*(afs_uint32 *) jp);
1555 jp->flags = (u_char) (temp >> 24);
1556 jp->cksum = (u_short) (temp);
1558 /* Fill in the packet header */
1559 np->header = p->header;
1560 np->header.serial = p->header.serial + 1;
1561 np->header.seq = p->header.seq + 1;
1562 np->header.flags = jp->flags;
1563 np->header.spare = jp->cksum;
1569 /* Send a udp datagram */
1571 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1572 int length, int istack)
1577 memset(&msg, 0, sizeof(msg));
1579 msg.msg_iovlen = nvecs;
1580 msg.msg_name = addr;
1581 msg.msg_namelen = sizeof(struct sockaddr_in);
1583 ret = rxi_Sendmsg(socket, &msg, 0);
1587 #elif !defined(UKERNEL)
1589 * message receipt is done in rxk_input or rx_put.
1592 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1594 * Copy an mblock to the contiguous area pointed to by cp.
1595 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1596 * but it doesn't really.
1597 * Returns the number of bytes not transferred.
1598 * The message is NOT changed.
1601 cpytoc(mblk_t * mp, int off, int len, char *cp)
1605 for (; mp && len > 0; mp = mp->b_cont) {
1606 if (mp->b_datap->db_type != M_DATA) {
1609 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1610 memcpy(cp, (char *)mp->b_rptr, n);
1618 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1619 * but it doesn't really.
1620 * This sucks, anyway, do it like m_cpy.... below
1623 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1628 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1629 if (mp->b_datap->db_type != M_DATA) {
1632 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1638 t = iovs[i].iov_len;
1641 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1651 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1652 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1654 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1656 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1659 unsigned int l1, l2, i, t;
1661 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1662 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1665 if (m->m_len <= off) {
1675 p1 = mtod(m, caddr_t) + off;
1676 l1 = m->m_len - off;
1678 p2 = iovs[0].iov_base;
1679 l2 = iovs[0].iov_len;
1682 t = MIN(l1, MIN(l2, (unsigned int)len));
1693 p1 = mtod(m, caddr_t);
1699 p2 = iovs[i].iov_base;
1700 l2 = iovs[i].iov_len;
1708 #endif /* AFS_SUN5_ENV */
1710 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1711 #if defined(AFS_NBSD_ENV)
1713 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1716 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1717 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1723 struct rx_packet *phandle;
1724 int hdr_len, data_len;
1725 #endif /* AFS_NBSD_ENV */
1730 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1737 #endif /*KERNEL && !UKERNEL */
1740 /* send a response to a debug packet */
1743 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1744 afs_uint32 ahost, short aport, int istack)
1746 struct rx_debugIn tin;
1748 struct rx_serverQueueEntry *np, *nqe;
1751 * Only respond to client-initiated Rx debug packets,
1752 * and clear the client flag in the response.
1754 if (ap->header.flags & RX_CLIENT_INITIATED) {
1755 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1756 rxi_EncodePacketHeader(ap);
1761 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1762 /* all done with packet, now set length to the truth, so we can
1763 * reuse this packet */
1764 rx_computelen(ap, ap->length);
1766 tin.type = ntohl(tin.type);
1767 tin.index = ntohl(tin.index);
1769 case RX_DEBUGI_GETSTATS:{
1770 struct rx_debugStats tstat;
1772 /* get basic stats */
1773 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1774 tstat.version = RX_DEBUGI_VERSION;
1775 #ifndef RX_ENABLE_LOCKS
1776 tstat.waitingForPackets = rx_waitingForPackets;
1778 MUTEX_ENTER(&rx_serverPool_lock);
1779 tstat.nFreePackets = htonl(rx_nFreePackets);
1780 tstat.nPackets = htonl(rx_nPackets);
1781 tstat.callsExecuted = htonl(rxi_nCalls);
1782 tstat.packetReclaims = htonl(rx_packetReclaims);
1783 tstat.usedFDs = CountFDs(64);
1784 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1785 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1786 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1788 MUTEX_EXIT(&rx_serverPool_lock);
1789 tstat.idleThreads = htonl(tstat.idleThreads);
1790 tl = sizeof(struct rx_debugStats) - ap->length;
1792 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1795 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1797 ap->length = sizeof(struct rx_debugStats);
1798 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1799 rx_computelen(ap, ap->length);
1804 case RX_DEBUGI_GETALLCONN:
1805 case RX_DEBUGI_GETCONN:{
1807 struct rx_connection *tc;
1808 struct rx_call *tcall;
1809 struct rx_debugConn tconn;
1810 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1813 tl = sizeof(struct rx_debugConn) - ap->length;
1815 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1819 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1820 /* get N'th (maybe) "interesting" connection info */
1821 for (i = 0; i < rx_hashTableSize; i++) {
1822 #if !defined(KERNEL)
1823 /* the time complexity of the algorithm used here
1824 * exponentially increses with the number of connections.
1826 #ifdef AFS_PTHREAD_ENV
1832 MUTEX_ENTER(&rx_connHashTable_lock);
1833 /* We might be slightly out of step since we are not
1834 * locking each call, but this is only debugging output.
1836 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1837 if ((all || rxi_IsConnInteresting(tc))
1838 && tin.index-- <= 0) {
1839 tconn.host = tc->peer->host;
1840 tconn.port = tc->peer->port;
1841 tconn.cid = htonl(tc->cid);
1842 tconn.epoch = htonl(tc->epoch);
1843 tconn.serial = htonl(tc->serial);
1844 for (j = 0; j < RX_MAXCALLS; j++) {
1845 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1846 if ((tcall = tc->call[j])) {
1847 tconn.callState[j] = tcall->state;
1848 tconn.callMode[j] = tcall->mode;
1849 tconn.callFlags[j] = tcall->flags;
1850 if (queue_IsNotEmpty(&tcall->rq))
1851 tconn.callOther[j] |= RX_OTHER_IN;
1852 if (queue_IsNotEmpty(&tcall->tq))
1853 tconn.callOther[j] |= RX_OTHER_OUT;
1855 tconn.callState[j] = RX_STATE_NOTINIT;
1858 tconn.natMTU = htonl(tc->peer->natMTU);
1859 tconn.error = htonl(tc->error);
1860 tconn.flags = tc->flags;
1861 tconn.type = tc->type;
1862 tconn.securityIndex = tc->securityIndex;
1863 if (tc->securityObject) {
1864 RXS_GetStats(tc->securityObject, tc,
1866 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1867 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1870 DOHTONL(packetsReceived);
1871 DOHTONL(packetsSent);
1872 DOHTONL(bytesReceived);
1876 sizeof(tconn.secStats.spares) /
1881 sizeof(tconn.secStats.sparel) /
1882 sizeof(afs_int32); i++)
1886 MUTEX_EXIT(&rx_connHashTable_lock);
1887 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1890 ap->length = sizeof(struct rx_debugConn);
1891 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1897 MUTEX_EXIT(&rx_connHashTable_lock);
1899 /* if we make it here, there are no interesting packets */
1900 tconn.cid = htonl(0xffffffff); /* means end */
1901 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1904 ap->length = sizeof(struct rx_debugConn);
1905 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1911 * Pass back all the peer structures we have available
1914 case RX_DEBUGI_GETPEER:{
1917 struct rx_debugPeer tpeer;
1920 tl = sizeof(struct rx_debugPeer) - ap->length;
1922 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1926 memset(&tpeer, 0, sizeof(tpeer));
1927 for (i = 0; i < rx_hashTableSize; i++) {
1928 #if !defined(KERNEL)
1929 /* the time complexity of the algorithm used here
1930 * exponentially increses with the number of peers.
1932 * Yielding after processing each hash table entry
1933 * and dropping rx_peerHashTable_lock.
1934 * also increases the risk that we will miss a new
1935 * entry - but we are willing to live with this
1936 * limitation since this is meant for debugging only
1938 #ifdef AFS_PTHREAD_ENV
1944 MUTEX_ENTER(&rx_peerHashTable_lock);
1945 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1946 if (tin.index-- <= 0) {
1948 MUTEX_EXIT(&rx_peerHashTable_lock);
1950 MUTEX_ENTER(&tp->peer_lock);
1951 tpeer.host = tp->host;
1952 tpeer.port = tp->port;
1953 tpeer.ifMTU = htons(tp->ifMTU);
1954 tpeer.idleWhen = htonl(tp->idleWhen);
1955 tpeer.refCount = htons(tp->refCount);
1956 tpeer.burstSize = 0;
1958 tpeer.burstWait.sec = 0;
1959 tpeer.burstWait.usec = 0;
1960 tpeer.rtt = htonl(tp->rtt);
1961 tpeer.rtt_dev = htonl(tp->rtt_dev);
1962 tpeer.nSent = htonl(tp->nSent);
1963 tpeer.reSends = htonl(tp->reSends);
1964 tpeer.natMTU = htons(tp->natMTU);
1965 tpeer.maxMTU = htons(tp->maxMTU);
1966 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1967 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1968 tpeer.MTU = htons(tp->MTU);
1969 tpeer.cwind = htons(tp->cwind);
1970 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1971 tpeer.congestSeq = htons(tp->congestSeq);
1972 tpeer.bytesSent.high =
1973 htonl(tp->bytesSent >> 32);
1974 tpeer.bytesSent.low =
1975 htonl(tp->bytesSent & MAX_AFS_UINT32);
1976 tpeer.bytesReceived.high =
1977 htonl(tp->bytesReceived >> 32);
1978 tpeer.bytesReceived.low =
1979 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1980 MUTEX_EXIT(&tp->peer_lock);
1982 MUTEX_ENTER(&rx_peerHashTable_lock);
1984 MUTEX_EXIT(&rx_peerHashTable_lock);
1986 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1989 ap->length = sizeof(struct rx_debugPeer);
1990 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1996 MUTEX_EXIT(&rx_peerHashTable_lock);
1998 /* if we make it here, there are no interesting packets */
1999 tpeer.host = htonl(0xffffffff); /* means end */
2000 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2003 ap->length = sizeof(struct rx_debugPeer);
2004 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2009 case RX_DEBUGI_RXSTATS:{
2013 tl = sizeof(rx_stats) - ap->length;
2015 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2019 /* Since its all int32s convert to network order with a loop. */
2020 if (rx_stats_active)
2021 MUTEX_ENTER(&rx_stats_mutex);
2022 s = (afs_int32 *) & rx_stats;
2023 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2024 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2027 ap->length = sizeof(rx_stats);
2028 if (rx_stats_active)
2029 MUTEX_EXIT(&rx_stats_mutex);
2030 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2036 /* error response packet */
2037 tin.type = htonl(RX_DEBUGI_BADTYPE);
2038 tin.index = tin.type;
2039 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2041 ap->length = sizeof(struct rx_debugIn);
2042 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2050 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2051 afs_uint32 ahost, short aport, int istack)
2056 * Only respond to client-initiated version requests, and
2057 * clear that flag in the response.
2059 if (ap->header.flags & RX_CLIENT_INITIATED) {
2062 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2063 rxi_EncodePacketHeader(ap);
2064 memset(buf, 0, sizeof(buf));
2065 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2066 rx_packetwrite(ap, 0, 65, buf);
2069 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2077 /* send a debug packet back to the sender */
2079 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2080 afs_uint32 ahost, short aport, afs_int32 istack)
2082 struct sockaddr_in taddr;
2083 unsigned int i, nbytes, savelen = 0;
2086 int waslocked = ISAFS_GLOCK();
2089 taddr.sin_family = AF_INET;
2090 taddr.sin_port = aport;
2091 taddr.sin_addr.s_addr = ahost;
2092 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2093 taddr.sin_len = sizeof(struct sockaddr_in);
2096 /* We need to trim the niovecs. */
2097 nbytes = apacket->length;
2098 for (i = 1; i < apacket->niovecs; i++) {
2099 if (nbytes <= apacket->wirevec[i].iov_len) {
2100 savelen = apacket->wirevec[i].iov_len;
2101 saven = apacket->niovecs;
2102 apacket->wirevec[i].iov_len = nbytes;
2103 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2105 nbytes -= apacket->wirevec[i].iov_len;
2108 #ifdef RX_KERNEL_TRACE
2109 if (ICL_SETACTIVE(afs_iclSetp)) {
2112 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2113 "before osi_NetSend()");
2121 /* debug packets are not reliably delivered, hence the cast below. */
2122 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2123 apacket->length + RX_HEADER_SIZE, istack);
2125 #ifdef RX_KERNEL_TRACE
2126 if (ICL_SETACTIVE(afs_iclSetp)) {
2128 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2129 "after osi_NetSend()");
2138 if (saven) { /* means we truncated the packet above. */
2139 apacket->wirevec[i - 1].iov_len = savelen;
2140 apacket->niovecs = saven;
2145 /* Send the packet to appropriate destination for the specified
2146 * call. The header is first encoded and placed in the packet.
2149 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2150 struct rx_packet *p, int istack)
2156 struct sockaddr_in addr;
2157 struct rx_peer *peer = conn->peer;
2160 char deliveryType = 'S';
2162 /* The address we're sending the packet to */
2163 memset(&addr, 0, sizeof(addr));
2164 addr.sin_family = AF_INET;
2165 addr.sin_port = peer->port;
2166 addr.sin_addr.s_addr = peer->host;
2168 /* This stuff should be revamped, I think, so that most, if not
2169 * all, of the header stuff is always added here. We could
2170 * probably do away with the encode/decode routines. XXXXX */
2172 /* Stamp each packet with a unique serial number. The serial
2173 * number is maintained on a connection basis because some types
2174 * of security may be based on the serial number of the packet,
2175 * and security is handled on a per authenticated-connection
2177 /* Pre-increment, to guarantee no zero serial number; a zero
2178 * serial number means the packet was never sent. */
2179 MUTEX_ENTER(&conn->conn_data_lock);
2180 p->header.serial = ++conn->serial;
2181 if (p->length > conn->peer->maxPacketSize) {
2182 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2183 (p->header.flags & RX_REQUEST_ACK)) {
2184 conn->lastPingSize = p->length;
2185 conn->lastPingSizeSer = p->header.serial;
2186 } else if (p->header.seq != 0) {
2187 conn->lastPacketSize = p->length;
2188 conn->lastPacketSizeSeq = p->header.seq;
2191 MUTEX_EXIT(&conn->conn_data_lock);
2192 /* This is so we can adjust retransmit time-outs better in the face of
2193 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2195 if (p->firstSerial == 0) {
2196 p->firstSerial = p->header.serial;
2199 /* If an output tracer function is defined, call it with the packet and
2200 * network address. Note this function may modify its arguments. */
2201 if (rx_almostSent) {
2202 int drop = (*rx_almostSent) (p, &addr);
2203 /* drop packet if return value is non-zero? */
2205 deliveryType = 'D'; /* Drop the packet */
2209 /* Get network byte order header */
2210 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2211 * touch ALL the fields */
2213 /* Send the packet out on the same socket that related packets are being
2217 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2220 /* Possibly drop this packet, for testing purposes */
2221 if ((deliveryType == 'D')
2222 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2223 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2224 deliveryType = 'D'; /* Drop the packet */
2226 deliveryType = 'S'; /* Send the packet */
2227 #endif /* RXDEBUG */
2229 /* Loop until the packet is sent. We'd prefer just to use a
2230 * blocking socket, but unfortunately the interface doesn't
2231 * allow us to have the socket block in send mode, and not
2232 * block in receive mode */
2234 waslocked = ISAFS_GLOCK();
2235 #ifdef RX_KERNEL_TRACE
2236 if (ICL_SETACTIVE(afs_iclSetp)) {
2239 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2240 "before osi_NetSend()");
2249 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2250 p->length + RX_HEADER_SIZE, istack)) != 0) {
2251 /* send failed, so let's hurry up the resend, eh? */
2252 if (rx_stats_active)
2253 rx_atomic_inc(&rx_stats.netSendFailures);
2254 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2256 /* Some systems are nice and tell us right away that we cannot
2257 * reach this recipient by returning an error code.
2258 * So, when this happens let's "down" the host NOW so
2259 * we don't sit around waiting for this host to timeout later.
2263 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2264 #elif defined(AFS_LINUX20_ENV)
2265 code == -ENETUNREACH
2266 #elif defined(AFS_DARWIN_ENV)
2267 code == EHOSTUNREACH
2272 call->lastReceiveTime = 0;
2275 #ifdef RX_KERNEL_TRACE
2276 if (ICL_SETACTIVE(afs_iclSetp)) {
2278 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2279 "after osi_NetSend()");
2290 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2291 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2292 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2293 p->header.seq, p->header.flags, p, p->length));
2295 if (rx_stats_active) {
2296 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2297 MUTEX_ENTER(&peer->peer_lock);
2298 peer->bytesSent += p->length;
2299 MUTEX_EXIT(&peer->peer_lock);
2303 /* Send a list of packets to appropriate destination for the specified
2304 * connection. The headers are first encoded and placed in the packets.
2307 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2308 struct rx_packet **list, int len, int istack)
2310 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2313 struct sockaddr_in addr;
2314 struct rx_peer *peer = conn->peer;
2316 struct rx_packet *p = NULL;
2317 struct iovec wirevec[RX_MAXIOVECS];
2318 int i, length, code;
2321 struct rx_jumboHeader *jp;
2323 char deliveryType = 'S';
2325 /* The address we're sending the packet to */
2326 addr.sin_family = AF_INET;
2327 addr.sin_port = peer->port;
2328 addr.sin_addr.s_addr = peer->host;
2330 if (len + 1 > RX_MAXIOVECS) {
2331 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2335 * Stamp the packets in this jumbogram with consecutive serial numbers
2337 MUTEX_ENTER(&conn->conn_data_lock);
2338 serial = conn->serial;
2339 conn->serial += len;
2340 for (i = 0; i < len; i++) {
2342 if (p->length > conn->peer->maxPacketSize) {
2343 /* a ping *or* a sequenced packet can count */
2344 if ((p->length > conn->peer->maxPacketSize)) {
2345 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2346 (p->header.flags & RX_REQUEST_ACK)) &&
2347 ((i == 0) || (p->length >= conn->lastPingSize))) {
2348 conn->lastPingSize = p->length;
2349 conn->lastPingSizeSer = serial + i;
2350 } else if ((p->header.seq != 0) &&
2351 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2352 conn->lastPacketSize = p->length;
2353 conn->lastPacketSizeSeq = p->header.seq;
2358 MUTEX_EXIT(&conn->conn_data_lock);
2361 /* This stuff should be revamped, I think, so that most, if not
2362 * all, of the header stuff is always added here. We could
2363 * probably do away with the encode/decode routines. XXXXX */
2366 length = RX_HEADER_SIZE;
2367 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2368 wirevec[0].iov_len = RX_HEADER_SIZE;
2369 for (i = 0; i < len; i++) {
2372 /* The whole 3.5 jumbogram scheme relies on packets fitting
2373 * in a single packet buffer. */
2374 if (p->niovecs > 2) {
2375 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2378 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2381 if (p->length != RX_JUMBOBUFFERSIZE) {
2382 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2384 p->header.flags |= RX_JUMBO_PACKET;
2385 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2386 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2388 wirevec[i + 1].iov_len = p->length;
2389 length += p->length;
2391 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2393 /* Convert jumbo packet header to network byte order */
2394 temp = (afs_uint32) (p->header.flags) << 24;
2395 temp |= (afs_uint32) (p->header.spare);
2396 *(afs_uint32 *) jp = htonl(temp);
2398 jp = (struct rx_jumboHeader *)
2399 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2401 /* Stamp each packet with a unique serial number. The serial
2402 * number is maintained on a connection basis because some types
2403 * of security may be based on the serial number of the packet,
2404 * and security is handled on a per authenticated-connection
2406 /* Pre-increment, to guarantee no zero serial number; a zero
2407 * serial number means the packet was never sent. */
2408 p->header.serial = ++serial;
2409 /* This is so we can adjust retransmit time-outs better in the face of
2410 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2412 if (p->firstSerial == 0) {
2413 p->firstSerial = p->header.serial;
2416 /* If an output tracer function is defined, call it with the packet and
2417 * network address. Note this function may modify its arguments. */
2418 if (rx_almostSent) {
2419 int drop = (*rx_almostSent) (p, &addr);
2420 /* drop packet if return value is non-zero? */
2422 deliveryType = 'D'; /* Drop the packet */
2426 /* Get network byte order header */
2427 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2428 * touch ALL the fields */
2431 /* Send the packet out on the same socket that related packets are being
2435 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2438 /* Possibly drop this packet, for testing purposes */
2439 if ((deliveryType == 'D')
2440 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2441 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2442 deliveryType = 'D'; /* Drop the packet */
2444 deliveryType = 'S'; /* Send the packet */
2445 #endif /* RXDEBUG */
2447 /* Loop until the packet is sent. We'd prefer just to use a
2448 * blocking socket, but unfortunately the interface doesn't
2449 * allow us to have the socket block in send mode, and not
2450 * block in receive mode */
2451 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2452 waslocked = ISAFS_GLOCK();
2453 if (!istack && waslocked)
2457 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2459 /* send failed, so let's hurry up the resend, eh? */
2460 if (rx_stats_active)
2461 rx_atomic_inc(&rx_stats.netSendFailures);
2462 for (i = 0; i < len; i++) {
2464 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2466 /* Some systems are nice and tell us right away that we cannot
2467 * reach this recipient by returning an error code.
2468 * So, when this happens let's "down" the host NOW so
2469 * we don't sit around waiting for this host to timeout later.
2473 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2474 #elif defined(AFS_LINUX20_ENV)
2475 code == -ENETUNREACH
2476 #elif defined(AFS_DARWIN_ENV)
2477 code == EHOSTUNREACH
2482 call->lastReceiveTime = 0;
2484 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2485 if (!istack && waslocked)
2491 osi_Assert(p != NULL);
2493 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2494 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2495 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2496 p->header.seq, p->header.flags, p, p->length));
2499 if (rx_stats_active) {
2500 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2501 MUTEX_ENTER(&peer->peer_lock);
2502 peer->bytesSent += p->length;
2503 MUTEX_EXIT(&peer->peer_lock);
2507 /* Send a raw abort packet, without any call or connection structures */
2509 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2510 afs_int32 error, struct rx_packet *source, int istack)
2512 struct rx_header theader;
2513 struct sockaddr_in addr;
2514 struct iovec iov[2];
2516 memset(&theader, 0, sizeof(theader));
2517 theader.epoch = htonl(source->header.epoch);
2518 theader.callNumber = htonl(source->header.callNumber);
2519 theader.serial = htonl(1);
2520 theader.type = RX_PACKET_TYPE_ABORT;
2521 theader.serviceId = htons(source->header.serviceId);
2522 theader.securityIndex = source->header.securityIndex;
2523 theader.cid = htonl(source->header.cid);
2525 error = htonl(error);
2527 iov[0].iov_base = &theader;
2528 iov[0].iov_len = sizeof(struct rx_header);
2529 iov[1].iov_base = &error;
2530 iov[1].iov_len = sizeof(error);
2532 addr.sin_family = AF_INET;
2533 addr.sin_addr.s_addr = host;
2534 addr.sin_port = port;
2535 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2536 addr.sin_len = sizeof(struct sockaddr_in);
2539 osi_NetSend(socket, &addr, iov, 2,
2540 sizeof(struct rx_header) + sizeof(error), istack);
2543 /* Send a "special" packet to the peer connection. If call is
2544 * specified, then the packet is directed to a specific call channel
2545 * associated with the connection, otherwise it is directed to the
2546 * connection only. Uses optionalPacket if it is supplied, rather than
2547 * allocating a new packet buffer. Nbytes is the length of the data
2548 * portion of the packet. If data is non-null, nbytes of data are
2549 * copied into the packet. Type is the type of the packet, as defined
2550 * in rx.h. Bug: there's a lot of duplication between this and other
2551 * routines. This needs to be cleaned up. */
2553 rxi_SendSpecial(struct rx_call *call,
2554 struct rx_connection *conn,
2555 struct rx_packet *optionalPacket, int type, char *data,
2556 int nbytes, int istack)
2558 /* Some of the following stuff should be common code for all
2559 * packet sends (it's repeated elsewhere) */
2560 struct rx_packet *p;
2562 int savelen = 0, saven = 0;
2563 int channel, callNumber;
2565 channel = call->channel;
2566 callNumber = *call->callNumber;
2567 /* BUSY packets refer to the next call on this connection */
2568 if (type == RX_PACKET_TYPE_BUSY) {
2577 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2579 osi_Panic("rxi_SendSpecial failure");
2586 p->header.serviceId = conn->serviceId;
2587 p->header.securityIndex = conn->securityIndex;
2588 p->header.cid = (conn->cid | channel);
2589 p->header.callNumber = callNumber;
2591 p->header.epoch = conn->epoch;
2592 p->header.type = type;
2593 p->header.flags = 0;
2594 if (conn->type == RX_CLIENT_CONNECTION)
2595 p->header.flags |= RX_CLIENT_INITIATED;
2597 rx_packetwrite(p, 0, nbytes, data);
2599 for (i = 1; i < p->niovecs; i++) {
2600 if (nbytes <= p->wirevec[i].iov_len) {
2601 savelen = p->wirevec[i].iov_len;
2603 p->wirevec[i].iov_len = nbytes;
2604 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2606 nbytes -= p->wirevec[i].iov_len;
2610 rxi_Send(call, p, istack);
2612 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2613 if (saven) { /* means we truncated the packet above. We probably don't */
2614 /* really need to do this, but it seems safer this way, given that */
2615 /* sneaky optionalPacket... */
2616 p->wirevec[i - 1].iov_len = savelen;
2619 if (!optionalPacket)
2621 return optionalPacket;
2625 /* Encode the packet's header (from the struct header in the packet to
2626 * the net byte order representation in the wire representation of the
2627 * packet, which is what is actually sent out on the wire) */
2629 rxi_EncodePacketHeader(struct rx_packet *p)
2631 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2633 memset(buf, 0, RX_HEADER_SIZE);
2634 *buf++ = htonl(p->header.epoch);
2635 *buf++ = htonl(p->header.cid);
2636 *buf++ = htonl(p->header.callNumber);
2637 *buf++ = htonl(p->header.seq);
2638 *buf++ = htonl(p->header.serial);
2639 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2640 | (((afs_uint32) p->header.flags) << 16)
2641 | (p->header.userStatus << 8) | p->header.securityIndex);
2642 /* Note: top 16 bits of this next word were reserved */
2643 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2646 /* Decode the packet's header (from net byte order to a struct header) */
2648 rxi_DecodePacketHeader(struct rx_packet *p)
2650 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2653 p->header.epoch = ntohl(*buf);
2655 p->header.cid = ntohl(*buf);
2657 p->header.callNumber = ntohl(*buf);
2659 p->header.seq = ntohl(*buf);
2661 p->header.serial = ntohl(*buf);
2667 /* C will truncate byte fields to bytes for me */
2668 p->header.type = temp >> 24;
2669 p->header.flags = temp >> 16;
2670 p->header.userStatus = temp >> 8;
2671 p->header.securityIndex = temp >> 0;
2676 p->header.serviceId = (temp & 0xffff);
2677 p->header.spare = temp >> 16;
2678 /* Note: top 16 bits of this last word are the security checksum */
2682 * LOCKS HELD: called with call->lock held.
2684 * PrepareSendPacket is the only place in the code that
2685 * can increment call->tnext. This could become an atomic
2686 * in the future. Beyond that there is nothing in this
2687 * function that requires the call being locked. This
2688 * function can only be called by the application thread.
2691 rxi_PrepareSendPacket(struct rx_call *call,
2692 struct rx_packet *p, int last)
2694 struct rx_connection *conn = call->conn;
2695 afs_uint32 seq = call->tnext++;
2697 afs_int32 len; /* len must be a signed type; it can go negative */
2699 /* No data packets on call 0. Where do these come from? */
2700 if (*call->callNumber == 0)
2701 *call->callNumber = 1;
2703 MUTEX_EXIT(&call->lock);
2704 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2706 p->header.cid = (conn->cid | call->channel);
2707 p->header.serviceId = conn->serviceId;
2708 p->header.securityIndex = conn->securityIndex;
2710 p->header.callNumber = *call->callNumber;
2711 p->header.seq = seq;
2712 p->header.epoch = conn->epoch;
2713 p->header.type = RX_PACKET_TYPE_DATA;
2714 p->header.flags = 0;
2715 p->header.spare = 0;
2716 if (conn->type == RX_CLIENT_CONNECTION)
2717 p->header.flags |= RX_CLIENT_INITIATED;
2720 p->header.flags |= RX_LAST_PACKET;
2722 clock_Zero(&p->firstSent); /* Never yet transmitted */
2723 p->header.serial = 0; /* Another way of saying never transmitted... */
2725 /* Now that we're sure this is the last data on the call, make sure
2726 * that the "length" and the sum of the iov_lens matches. */
2727 len = p->length + call->conn->securityHeaderSize;
2729 for (i = 1; i < p->niovecs && len > 0; i++) {
2730 len -= p->wirevec[i].iov_len;
2733 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2734 } else if (i < p->niovecs) {
2735 /* Free any extra elements in the wirevec */
2736 #if defined(RX_ENABLE_TSFPQ)
2737 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2738 #else /* !RX_ENABLE_TSFPQ */
2739 MUTEX_ENTER(&rx_freePktQ_lock);
2740 rxi_FreeDataBufsNoLock(p, i);
2741 MUTEX_EXIT(&rx_freePktQ_lock);
2742 #endif /* !RX_ENABLE_TSFPQ */
2747 p->wirevec[i - 1].iov_len += len;
2748 MUTEX_ENTER(&call->lock);
2749 RXS_PreparePacket(conn->securityObject, call, p);
2752 /* Given an interface MTU size, calculate an adjusted MTU size that
2753 * will make efficient use of the RX buffers when the peer is sending
2754 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2756 rxi_AdjustIfMTU(int mtu)
2761 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2763 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2764 if (mtu <= adjMTU) {
2771 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2772 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2775 /* Given an interface MTU size, and the peer's advertised max receive
2776 * size, calculate an adjisted maxMTU size that makes efficient use
2777 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2779 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2781 int maxMTU = mtu * rxi_nSendFrags;
2782 maxMTU = MIN(maxMTU, peerMaxMTU);
2783 return rxi_AdjustIfMTU(maxMTU);
2786 /* Given a packet size, figure out how many datagram packet will fit.
2787 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2788 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2789 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2791 rxi_AdjustDgramPackets(int frags, int mtu)
2794 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2797 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2798 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2799 /* subtract the size of the first and last packets */
2800 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2804 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2809 * This function can be used by the Windows Cache Manager
2810 * to dump the list of all rx packets so that we can determine
2811 * where the packet leakage is.
2813 int rx_DumpPackets(FILE *outputFile, char *cookie)
2815 #ifdef RXDEBUG_PACKET
2816 struct rx_packet *p;
2820 #define RXDPRINTF sprintf
2821 #define RXDPRINTOUT output
2823 #define RXDPRINTF fprintf
2824 #define RXDPRINTOUT outputFile
2828 MUTEX_ENTER(&rx_freePktQ_lock);
2829 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2831 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2834 for (p = rx_mallocedP; p; p = p->allNextp) {
2835 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2836 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2837 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2838 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2839 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2840 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2842 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2846 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2848 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2851 MUTEX_EXIT(&rx_freePktQ_lock);
2853 #endif /* RXDEBUG_PACKET */