2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
47 # if defined(AFS_NT40_ENV)
49 # define EWOULDBLOCK WSAEWOULDBLOCK
52 # include "rx_xmit_nt.h"
58 # include <sys/sysmacros.h>
61 #include <opr/queue.h>
65 #include "rx_packet.h"
66 #include "rx_atomic.h"
67 #include "rx_globals.h"
68 #include "rx_internal.h"
76 /* rxdb_fileID is used to identify the lock location, along with line#. */
77 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
78 #endif /* RX_LOCKS_DB */
79 static struct rx_packet *rx_mallocedP = 0;
81 static afs_uint32 rx_packet_id = 0;
84 extern char cml_version_number[];
86 static int AllocPacketBufs(int class, int num_pkts, struct opr_queue *q);
88 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
89 afs_uint32 ahost, short aport,
91 static struct rx_packet *rxi_AllocPacketNoLock(int class);
94 static void rxi_MorePacketsNoLock(int apackets);
97 #ifdef RX_ENABLE_TSFPQ
98 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
100 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
101 int allow_overcommit);
103 static void rxi_FreePacketNoLock(struct rx_packet *p);
104 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
105 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
106 struct opr_queue * q);
109 extern struct opr_queue rx_idleServerQueue;
111 /* some rules about packets:
112 * 1. When a packet is allocated, the final iov_buf contains room for
113 * a security trailer, but iov_len masks that fact. If the security
114 * package wants to add the trailer, it may do so, and then extend
115 * iov_len appropriately. For this reason, packet's niovecs and
116 * iov_len fields should be accurate before calling PreparePacket.
120 * all packet buffers (iov_base) are integral multiples of
122 * offset is an integral multiple of the word size.
125 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
129 for (l = 0, i = 1; i < packet->niovecs; i++) {
130 if (l + packet->wirevec[i].iov_len > offset) {
132 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
135 l += packet->wirevec[i].iov_len;
142 * all packet buffers (iov_base) are integral multiples of the word size.
143 * offset is an integral multiple of the word size.
146 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
150 for (l = 0, i = 1; i < packet->niovecs; i++) {
151 if (l + packet->wirevec[i].iov_len > offset) {
152 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
153 (offset - l))) = data;
156 l += packet->wirevec[i].iov_len;
163 * all packet buffers (iov_base) are integral multiples of the
165 * offset is an integral multiple of the word size.
167 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
170 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
173 unsigned int i, j, l, r;
174 for (l = 0, i = 1; i < packet->niovecs; i++) {
175 if (l + packet->wirevec[i].iov_len > offset) {
178 l += packet->wirevec[i].iov_len;
181 /* i is the iovec which contains the first little bit of data in which we
182 * are interested. l is the total length of everything prior to this iovec.
183 * j is the number of bytes we can safely copy out of this iovec.
184 * offset only applies to the first iovec.
187 while ((r > 0) && (i < packet->niovecs)) {
188 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
189 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
192 l += packet->wirevec[i].iov_len;
197 return (r ? (resid - r) : resid);
202 * all packet buffers (iov_base) are integral multiples of the
204 * offset is an integral multiple of the word size.
207 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
209 unsigned int i, j, l, o, r;
212 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
213 if (l + packet->wirevec[i].iov_len > o) {
216 l += packet->wirevec[i].iov_len;
219 /* i is the iovec which contains the first little bit of data in which we
220 * are interested. l is the total length of everything prior to this iovec.
221 * j is the number of bytes we can safely copy out of this iovec.
222 * offset only applies to the first iovec.
225 while ((r > 0) && (i <= RX_MAXWVECS)) {
226 if (i >= packet->niovecs)
227 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
230 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
231 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
235 l += packet->wirevec[i].iov_len;
240 return (r ? (resid - r) : resid);
244 rxi_AllocPackets(int class, int num_pkts, struct opr_queue * q)
248 num_pkts = AllocPacketBufs(class, num_pkts, q);
250 for (opr_queue_Scan(q, c)) {
251 RX_PACKET_IOV_FULLINIT(opr_queue_Entry(c, struct rx_packet, entry));
257 #ifdef RX_ENABLE_TSFPQ
259 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
261 struct rx_ts_info_t * rx_ts_info;
265 RX_TS_INFO_GET(rx_ts_info);
267 transfer = num_pkts - rx_ts_info->_FPQ.len;
270 MUTEX_ENTER(&rx_freePktQ_lock);
271 transfer = MAX(transfer, rx_TSFPQGlobSize);
272 if (transfer > rx_nFreePackets) {
273 /* alloc enough for us, plus a few globs for other threads */
274 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
277 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
279 MUTEX_EXIT(&rx_freePktQ_lock);
283 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
287 #else /* RX_ENABLE_TSFPQ */
289 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
300 MUTEX_ENTER(&rx_freePktQ_lock);
303 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
304 num_pkts--, overq++);
307 rxi_NeedMorePackets = TRUE;
308 if (rx_stats_active) {
310 case RX_PACKET_CLASS_RECEIVE:
311 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
313 case RX_PACKET_CLASS_SEND:
314 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
316 case RX_PACKET_CLASS_SPECIAL:
317 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
319 case RX_PACKET_CLASS_RECV_CBUF:
320 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
322 case RX_PACKET_CLASS_SEND_CBUF:
323 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
329 if (rx_nFreePackets < num_pkts)
330 num_pkts = rx_nFreePackets;
333 rxi_NeedMorePackets = TRUE;
337 if (rx_nFreePackets < num_pkts) {
338 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
342 for (i=0, c=opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
344 i++, c=opr_queue_Next(&c->entry, struct rx_packet, entry)) {
348 opr_queue_SplitBeforeAppend(&rx_freePacketQueue, q, &c->entry);
350 rx_nFreePackets -= num_pkts;
355 MUTEX_EXIT(&rx_freePktQ_lock);
360 #endif /* RX_ENABLE_TSFPQ */
363 * Free a packet currently used as a continuation buffer
365 #ifdef RX_ENABLE_TSFPQ
366 /* num_pkts=0 means queue length is unknown */
368 rxi_FreePackets(int num_pkts, struct opr_queue * q)
370 struct rx_ts_info_t * rx_ts_info;
371 struct opr_queue *cursor, *store;
374 osi_Assert(num_pkts >= 0);
375 RX_TS_INFO_GET(rx_ts_info);
378 for (opr_queue_ScanSafe(q, cursor, store)) {
380 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
384 for (opr_queue_ScanSafe(q, cursor, store)) {
385 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
391 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
394 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
396 MUTEX_ENTER(&rx_freePktQ_lock);
398 RX_TS_FPQ_LTOG(rx_ts_info);
400 /* Wakeup anyone waiting for packets */
403 MUTEX_EXIT(&rx_freePktQ_lock);
409 #else /* RX_ENABLE_TSFPQ */
410 /* num_pkts=0 means queue length is unknown */
412 rxi_FreePackets(int num_pkts, struct opr_queue *q)
414 struct opr_queue cbs;
415 struct opr_queue *cursor, *store;
419 osi_Assert(num_pkts >= 0);
420 opr_queue_Init(&cbs);
423 for (opr_queue_ScanSafe(q, cursor, store)) {
425 = opr_queue_Entry(cursor, struct rx_packet, entry);
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
435 for (opr_queue_ScanSafe(q, cursor, store)) {
437 = opr_queue_Entry(cursor, struct rx_packet, entry);
439 if (p->niovecs > 2) {
440 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
447 opr_queue_SpliceAppend(q, &cbs);
453 MUTEX_ENTER(&rx_freePktQ_lock);
455 opr_queue_SpliceAppend(&rx_freePacketQueue, q);
456 rx_nFreePackets += qlen;
458 /* Wakeup anyone waiting for packets */
461 MUTEX_EXIT(&rx_freePktQ_lock);
466 #endif /* RX_ENABLE_TSFPQ */
468 /* this one is kind of awful.
469 * In rxkad, the packet has been all shortened, and everything, ready for
470 * sending. All of a sudden, we discover we need some of that space back.
471 * This isn't terribly general, because it knows that the packets are only
472 * rounded up to the EBS (userdata + security header).
475 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
479 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
480 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
481 p->wirevec[i].iov_len += nb;
485 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
486 p->wirevec[i].iov_len += nb;
494 /* get sufficient space to store nb bytes of data (or more), and hook
495 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
496 * returns the number of bytes >0 which it failed to come up with.
497 * Don't need to worry about locking on packet, since only
498 * one thread can manipulate one at a time. Locking on continution
499 * packets is handled by AllocPacketBufs */
500 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
502 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
505 struct opr_queue q, *cursor, *store;
507 /* compute the number of cbuf's we need */
508 nv = nb / RX_CBUFFERSIZE;
509 if ((nv * RX_CBUFFERSIZE) < nb)
511 if ((nv + p->niovecs) > RX_MAXWVECS)
512 nv = RX_MAXWVECS - p->niovecs;
516 /* allocate buffers */
518 nv = AllocPacketBufs(class, nv, &q);
520 /* setup packet iovs */
522 for (opr_queue_ScanSafe(&q, cursor, store)) {
524 = opr_queue_Entry(cursor, struct rx_packet, entry);
526 opr_queue_Remove(&cb->entry);
527 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
528 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
532 nb -= (nv * RX_CBUFFERSIZE);
533 p->length += (nv * RX_CBUFFERSIZE);
539 /* Add more packet buffers */
540 #ifdef RX_ENABLE_TSFPQ
542 rxi_MorePackets(int apackets)
544 struct rx_packet *p, *e;
545 struct rx_ts_info_t * rx_ts_info;
549 getme = apackets * sizeof(struct rx_packet);
550 p = osi_Alloc(getme);
553 PIN(p, getme); /* XXXXX */
555 RX_TS_INFO_GET(rx_ts_info);
557 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
558 /* TSFPQ patch also needs to keep track of total packets */
560 MUTEX_ENTER(&rx_packets_mutex);
561 rx_nPackets += apackets;
562 RX_TS_FPQ_COMPUTE_LIMITS;
563 MUTEX_EXIT(&rx_packets_mutex);
565 for (e = p + apackets; p < e; p++) {
566 RX_PACKET_IOV_INIT(p);
569 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
572 MUTEX_ENTER(&rx_freePktQ_lock);
573 #ifdef RXDEBUG_PACKET
574 p->packetId = rx_packet_id++;
575 p->allNextp = rx_mallocedP;
576 #endif /* RXDEBUG_PACKET */
578 MUTEX_EXIT(&rx_freePktQ_lock);
581 rx_ts_info->_FPQ.delta += apackets;
583 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
585 MUTEX_ENTER(&rx_freePktQ_lock);
587 RX_TS_FPQ_LTOG(rx_ts_info);
588 rxi_NeedMorePackets = FALSE;
591 MUTEX_EXIT(&rx_freePktQ_lock);
595 #else /* RX_ENABLE_TSFPQ */
597 rxi_MorePackets(int apackets)
599 struct rx_packet *p, *e;
603 getme = apackets * sizeof(struct rx_packet);
604 p = osi_Alloc(getme);
607 PIN(p, getme); /* XXXXX */
610 MUTEX_ENTER(&rx_freePktQ_lock);
612 for (e = p + apackets; p < e; p++) {
613 RX_PACKET_IOV_INIT(p);
614 #ifdef RX_TRACK_PACKETS
615 p->flags |= RX_PKTFLAG_FREE;
619 opr_queue_Append(&rx_freePacketQueue, &p->entry);
620 #ifdef RXDEBUG_PACKET
621 p->packetId = rx_packet_id++;
622 p->allNextp = rx_mallocedP;
623 #endif /* RXDEBUG_PACKET */
627 rx_nPackets += apackets;
628 rx_nFreePackets += apackets;
629 rxi_NeedMorePackets = FALSE;
632 MUTEX_EXIT(&rx_freePktQ_lock);
635 #endif /* RX_ENABLE_TSFPQ */
637 #ifdef RX_ENABLE_TSFPQ
639 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
641 struct rx_packet *p, *e;
642 struct rx_ts_info_t * rx_ts_info;
646 getme = apackets * sizeof(struct rx_packet);
647 p = osi_Alloc(getme);
649 PIN(p, getme); /* XXXXX */
651 RX_TS_INFO_GET(rx_ts_info);
653 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
654 /* TSFPQ patch also needs to keep track of total packets */
655 MUTEX_ENTER(&rx_packets_mutex);
656 rx_nPackets += apackets;
657 RX_TS_FPQ_COMPUTE_LIMITS;
658 MUTEX_EXIT(&rx_packets_mutex);
660 for (e = p + apackets; p < e; p++) {
661 RX_PACKET_IOV_INIT(p);
663 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
666 MUTEX_ENTER(&rx_freePktQ_lock);
667 #ifdef RXDEBUG_PACKET
668 p->packetId = rx_packet_id++;
669 p->allNextp = rx_mallocedP;
670 #endif /* RXDEBUG_PACKET */
672 MUTEX_EXIT(&rx_freePktQ_lock);
675 rx_ts_info->_FPQ.delta += apackets;
678 (num_keep_local < apackets)) {
680 MUTEX_ENTER(&rx_freePktQ_lock);
682 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
683 rxi_NeedMorePackets = FALSE;
686 MUTEX_EXIT(&rx_freePktQ_lock);
690 #endif /* RX_ENABLE_TSFPQ */
693 /* Add more packet buffers */
695 rxi_MorePacketsNoLock(int apackets)
697 #ifdef RX_ENABLE_TSFPQ
698 struct rx_ts_info_t * rx_ts_info;
699 #endif /* RX_ENABLE_TSFPQ */
700 struct rx_packet *p, *e;
703 /* allocate enough packets that 1/4 of the packets will be able
704 * to hold maximal amounts of data */
705 apackets += (apackets / 4)
706 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
708 getme = apackets * sizeof(struct rx_packet);
709 p = osi_Alloc(getme);
711 apackets -= apackets / 4;
712 osi_Assert(apackets > 0);
717 #ifdef RX_ENABLE_TSFPQ
718 RX_TS_INFO_GET(rx_ts_info);
719 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
720 #endif /* RX_ENABLE_TSFPQ */
722 for (e = p + apackets; p < e; p++) {
723 RX_PACKET_IOV_INIT(p);
724 #ifdef RX_TRACK_PACKETS
725 p->flags |= RX_PKTFLAG_FREE;
729 opr_queue_Append(&rx_freePacketQueue, &p->entry);
730 #ifdef RXDEBUG_PACKET
731 p->packetId = rx_packet_id++;
732 p->allNextp = rx_mallocedP;
733 #endif /* RXDEBUG_PACKET */
737 rx_nFreePackets += apackets;
738 MUTEX_ENTER(&rx_packets_mutex);
739 rx_nPackets += apackets;
740 #ifdef RX_ENABLE_TSFPQ
741 RX_TS_FPQ_COMPUTE_LIMITS;
742 #endif /* RX_ENABLE_TSFPQ */
743 MUTEX_EXIT(&rx_packets_mutex);
744 rxi_NeedMorePackets = FALSE;
750 rxi_FreeAllPackets(void)
752 /* must be called at proper interrupt level, etcetera */
753 /* MTUXXX need to free all Packets */
754 osi_Free(rx_mallocedP,
755 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
756 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
759 #ifdef RX_ENABLE_TSFPQ
761 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
763 struct rx_ts_info_t * rx_ts_info;
767 RX_TS_INFO_GET(rx_ts_info);
769 if (num_keep_local != rx_ts_info->_FPQ.len) {
771 MUTEX_ENTER(&rx_freePktQ_lock);
772 if (num_keep_local < rx_ts_info->_FPQ.len) {
773 xfer = rx_ts_info->_FPQ.len - num_keep_local;
774 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
777 xfer = num_keep_local - rx_ts_info->_FPQ.len;
778 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
779 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
780 if (rx_nFreePackets < xfer) {
781 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
783 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
785 MUTEX_EXIT(&rx_freePktQ_lock);
791 rxi_FlushLocalPacketsTSFPQ(void)
793 rxi_AdjustLocalPacketsTSFPQ(0, 0);
795 #endif /* RX_ENABLE_TSFPQ */
797 /* Allocate more packets iff we need more continuation buffers */
798 /* In kernel, can't page in memory with interrupts disabled, so we
799 * don't use the event mechanism. */
801 rx_CheckPackets(void)
803 if (rxi_NeedMorePackets) {
804 rxi_MorePackets(rx_maxSendWindow);
808 /* In the packet freeing routine below, the assumption is that
809 we want all of the packets to be used equally frequently, so that we
810 don't get packet buffers paging out. It would be just as valid to
811 assume that we DO want them to page out if not many are being used.
812 In any event, we assume the former, and append the packets to the end
814 /* This explanation is bogus. The free list doesn't remain in any kind of
815 useful order for afs_int32: the packets in use get pretty much randomly scattered
816 across all the pages. In order to permit unused {packets,bufs} to page out, they
817 must be stored so that packets which are adjacent in memory are adjacent in the
818 free list. An array springs rapidly to mind.
821 /* Actually free the packet p. */
822 #ifndef RX_ENABLE_TSFPQ
824 rxi_FreePacketNoLock(struct rx_packet *p)
826 dpf(("Free %"AFS_PTR_FMT"\n", p));
830 opr_queue_Append(&rx_freePacketQueue, &p->entry);
832 #endif /* RX_ENABLE_TSFPQ */
834 #ifdef RX_ENABLE_TSFPQ
836 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
838 struct rx_ts_info_t * rx_ts_info;
839 dpf(("Free %"AFS_PTR_FMT"\n", p));
841 RX_TS_INFO_GET(rx_ts_info);
842 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
844 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
846 MUTEX_ENTER(&rx_freePktQ_lock);
848 RX_TS_FPQ_LTOG(rx_ts_info);
850 /* Wakeup anyone waiting for packets */
853 MUTEX_EXIT(&rx_freePktQ_lock);
857 #endif /* RX_ENABLE_TSFPQ */
860 * free continuation buffers off a packet into a queue
862 * [IN] p -- packet from which continuation buffers will be freed
863 * [IN] first -- iovec offset of first continuation buffer to free
864 * [IN] q -- queue into which continuation buffers will be chained
867 * number of continuation buffers freed
869 #ifndef RX_ENABLE_TSFPQ
871 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct opr_queue * q)
874 struct rx_packet * cb;
877 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
878 iov = &p->wirevec[first];
880 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
881 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
882 RX_FPQ_MARK_FREE(cb);
883 opr_queue_Append(q, &cb->entry);
892 * free packet continuation buffers into the global free packet pool
894 * [IN] p -- packet from which to free continuation buffers
895 * [IN] first -- iovec offset of first continuation buffer to free
901 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
905 for (first = MAX(2, first); first < p->niovecs; first++) {
906 iov = &p->wirevec[first];
908 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
909 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
920 * free packet continuation buffers into the thread-local free pool
922 * [IN] p -- packet from which continuation buffers will be freed
923 * [IN] first -- iovec offset of first continuation buffer to free
924 * any value less than 2, the min number of iovecs,
925 * is treated as if it is 2.
926 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
927 * global free pool before returning
933 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
936 struct rx_ts_info_t * rx_ts_info;
938 RX_TS_INFO_GET(rx_ts_info);
940 for (first = MAX(2, first); first < p->niovecs; first++) {
941 iov = &p->wirevec[first];
943 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
944 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
949 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
951 MUTEX_ENTER(&rx_freePktQ_lock);
953 RX_TS_FPQ_LTOG(rx_ts_info);
955 /* Wakeup anyone waiting for packets */
958 MUTEX_EXIT(&rx_freePktQ_lock);
963 #endif /* RX_ENABLE_TSFPQ */
965 int rxi_nBadIovecs = 0;
967 /* rxi_RestoreDataBufs
969 * Restore the correct sizes to the iovecs. Called when reusing a packet
970 * for reading off the wire.
973 rxi_RestoreDataBufs(struct rx_packet *p)
978 RX_PACKET_IOV_INIT(p);
980 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
981 if (!iov->iov_base) {
986 iov->iov_len = RX_CBUFFERSIZE;
990 #ifdef RX_ENABLE_TSFPQ
992 rxi_TrimDataBufs(struct rx_packet *p, int first)
995 struct iovec *iov, *end;
996 struct rx_ts_info_t * rx_ts_info;
1000 osi_Panic("TrimDataBufs 1: first must be 1");
1002 /* Skip over continuation buffers containing message data */
1003 iov = &p->wirevec[2];
1004 end = iov + (p->niovecs - 2);
1005 length = p->length - p->wirevec[1].iov_len;
1006 for (; iov < end && length > 0; iov++) {
1008 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1009 length -= iov->iov_len;
1012 /* iov now points to the first empty data buffer. */
1016 RX_TS_INFO_GET(rx_ts_info);
1017 for (; iov < end; iov++) {
1019 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1020 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1023 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1025 MUTEX_ENTER(&rx_freePktQ_lock);
1027 RX_TS_FPQ_LTOG(rx_ts_info);
1028 rxi_PacketsUnWait();
1030 MUTEX_EXIT(&rx_freePktQ_lock);
1036 #else /* RX_ENABLE_TSFPQ */
1038 rxi_TrimDataBufs(struct rx_packet *p, int first)
1041 struct iovec *iov, *end;
1045 osi_Panic("TrimDataBufs 1: first must be 1");
1047 /* Skip over continuation buffers containing message data */
1048 iov = &p->wirevec[2];
1049 end = iov + (p->niovecs - 2);
1050 length = p->length - p->wirevec[1].iov_len;
1051 for (; iov < end && length > 0; iov++) {
1053 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1054 length -= iov->iov_len;
1057 /* iov now points to the first empty data buffer. */
1062 MUTEX_ENTER(&rx_freePktQ_lock);
1064 for (; iov < end; iov++) {
1066 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1067 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1070 rxi_PacketsUnWait();
1072 MUTEX_EXIT(&rx_freePktQ_lock);
1077 #endif /* RX_ENABLE_TSFPQ */
1079 /* Free the packet p. P is assumed not to be on any queue, i.e.
1080 * remove it yourself first if you call this routine. */
1081 #ifdef RX_ENABLE_TSFPQ
1083 rxi_FreePacket(struct rx_packet *p)
1085 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1086 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1088 #else /* RX_ENABLE_TSFPQ */
1090 rxi_FreePacket(struct rx_packet *p)
1095 MUTEX_ENTER(&rx_freePktQ_lock);
1097 rxi_FreeDataBufsNoLock(p, 2);
1098 rxi_FreePacketNoLock(p);
1099 /* Wakeup anyone waiting for packets */
1100 rxi_PacketsUnWait();
1102 MUTEX_EXIT(&rx_freePktQ_lock);
1105 #endif /* RX_ENABLE_TSFPQ */
1107 /* rxi_AllocPacket sets up p->length so it reflects the number of
1108 * bytes in the packet at this point, **not including** the header.
1109 * The header is absolutely necessary, besides, this is the way the
1110 * length field is usually used */
1111 #ifdef RX_ENABLE_TSFPQ
1112 static struct rx_packet *
1113 rxi_AllocPacketNoLock(int class)
1115 struct rx_packet *p;
1116 struct rx_ts_info_t * rx_ts_info;
1118 RX_TS_INFO_GET(rx_ts_info);
1121 if (rxi_OverQuota(class)) {
1122 rxi_NeedMorePackets = TRUE;
1123 if (rx_stats_active) {
1125 case RX_PACKET_CLASS_RECEIVE:
1126 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1128 case RX_PACKET_CLASS_SEND:
1129 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1131 case RX_PACKET_CLASS_SPECIAL:
1132 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1134 case RX_PACKET_CLASS_RECV_CBUF:
1135 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1137 case RX_PACKET_CLASS_SEND_CBUF:
1138 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1142 return (struct rx_packet *)0;
1146 if (rx_stats_active)
1147 rx_atomic_inc(&rx_stats.packetRequests);
1148 if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1151 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1152 osi_Panic("rxi_AllocPacket error");
1154 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1155 rxi_MorePacketsNoLock(rx_maxSendWindow);
1159 RX_TS_FPQ_GTOL(rx_ts_info);
1162 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1164 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1167 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1168 * order to truncate outbound packets. In the near future, may need
1169 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1171 RX_PACKET_IOV_FULLINIT(p);
1174 #else /* RX_ENABLE_TSFPQ */
1175 static struct rx_packet *
1176 rxi_AllocPacketNoLock(int class)
1178 struct rx_packet *p;
1181 if (rxi_OverQuota(class)) {
1182 rxi_NeedMorePackets = TRUE;
1183 if (rx_stats_active) {
1185 case RX_PACKET_CLASS_RECEIVE:
1186 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1188 case RX_PACKET_CLASS_SEND:
1189 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1191 case RX_PACKET_CLASS_SPECIAL:
1192 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1194 case RX_PACKET_CLASS_RECV_CBUF:
1195 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1197 case RX_PACKET_CLASS_SEND_CBUF:
1198 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1202 return (struct rx_packet *)0;
1206 if (rx_stats_active)
1207 rx_atomic_inc(&rx_stats.packetRequests);
1210 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1211 osi_Panic("rxi_AllocPacket error");
1213 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1214 rxi_MorePacketsNoLock(rx_maxSendWindow);
1218 p = opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
1219 opr_queue_Remove(&p->entry);
1220 RX_FPQ_MARK_USED(p);
1222 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1225 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1226 * order to truncate outbound packets. In the near future, may need
1227 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1229 RX_PACKET_IOV_FULLINIT(p);
1232 #endif /* RX_ENABLE_TSFPQ */
1234 #ifdef RX_ENABLE_TSFPQ
1235 static struct rx_packet *
1236 rxi_AllocPacketTSFPQ(int class, int pull_global)
1238 struct rx_packet *p;
1239 struct rx_ts_info_t * rx_ts_info;
1241 RX_TS_INFO_GET(rx_ts_info);
1243 if (rx_stats_active)
1244 rx_atomic_inc(&rx_stats.packetRequests);
1245 if (pull_global && opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1246 MUTEX_ENTER(&rx_freePktQ_lock);
1248 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1249 rxi_MorePacketsNoLock(rx_maxSendWindow);
1251 RX_TS_FPQ_GTOL(rx_ts_info);
1253 MUTEX_EXIT(&rx_freePktQ_lock);
1254 } else if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1258 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1260 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1262 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1263 * order to truncate outbound packets. In the near future, may need
1264 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1266 RX_PACKET_IOV_FULLINIT(p);
1269 #endif /* RX_ENABLE_TSFPQ */
1271 #ifdef RX_ENABLE_TSFPQ
1273 rxi_AllocPacket(int class)
1275 struct rx_packet *p;
1277 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1280 #else /* RX_ENABLE_TSFPQ */
1282 rxi_AllocPacket(int class)
1284 struct rx_packet *p;
1286 MUTEX_ENTER(&rx_freePktQ_lock);
1287 p = rxi_AllocPacketNoLock(class);
1288 MUTEX_EXIT(&rx_freePktQ_lock);
1291 #endif /* RX_ENABLE_TSFPQ */
1293 /* This guy comes up with as many buffers as it {takes,can get} given
1294 * the MTU for this call. It also sets the packet length before
1295 * returning. caution: this is often called at NETPRI
1296 * Called with call locked.
1299 rxi_AllocSendPacket(struct rx_call *call, int want)
1301 struct rx_packet *p = (struct rx_packet *)0;
1306 mud = call->MTU - RX_HEADER_SIZE;
1308 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1309 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1311 #ifdef RX_ENABLE_TSFPQ
1312 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1314 want = MIN(want, mud);
1316 if ((unsigned)want > p->length)
1317 (void)rxi_AllocDataBuf(p, (want - p->length),
1318 RX_PACKET_CLASS_SEND_CBUF);
1320 if (p->length > mud)
1323 if (delta >= p->length) {
1331 #endif /* RX_ENABLE_TSFPQ */
1333 while (!(call->error)) {
1334 MUTEX_ENTER(&rx_freePktQ_lock);
1335 /* if an error occurred, or we get the packet we want, we're done */
1336 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1337 MUTEX_EXIT(&rx_freePktQ_lock);
1340 want = MIN(want, mud);
1342 if ((unsigned)want > p->length)
1343 (void)rxi_AllocDataBuf(p, (want - p->length),
1344 RX_PACKET_CLASS_SEND_CBUF);
1346 if (p->length > mud)
1349 if (delta >= p->length) {
1358 /* no error occurred, and we didn't get a packet, so we sleep.
1359 * At this point, we assume that packets will be returned
1360 * sooner or later, as packets are acknowledged, and so we
1363 call->flags |= RX_CALL_WAIT_PACKETS;
1364 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1365 MUTEX_EXIT(&call->lock);
1366 rx_waitingForPackets = 1;
1368 #ifdef RX_ENABLE_LOCKS
1369 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1371 osi_rxSleep(&rx_waitingForPackets);
1373 MUTEX_EXIT(&rx_freePktQ_lock);
1374 MUTEX_ENTER(&call->lock);
1375 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1376 call->flags &= ~RX_CALL_WAIT_PACKETS;
1385 /* Windows does not use file descriptors. */
1386 #define CountFDs(amax) 0
1388 /* count the number of used FDs */
1397 for (i = 0; i < amax; i++) {
1398 code = fstat(i, &tstat);
1404 #endif /* AFS_NT40_ENV */
1407 #define CountFDs(amax) amax
1411 #if !defined(KERNEL) || defined(UKERNEL)
1413 /* This function reads a single packet from the interface into the
1414 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1415 * (host,port) of the sender are stored in the supplied variables, and
1416 * the data length of the packet is stored in the packet structure.
1417 * The header is decoded. */
1419 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1422 struct sockaddr_in from;
1425 afs_uint32 tlen, savelen;
1427 rx_computelen(p, tlen);
1428 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1430 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1431 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1432 * it once in order to avoid races. */
1435 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1443 /* Extend the last iovec for padding, it's just to make sure that the
1444 * read doesn't return more data than we expect, and is done to get around
1445 * our problems caused by the lack of a length field in the rx header.
1446 * Use the extra buffer that follows the localdata in each packet
1448 savelen = p->wirevec[p->niovecs - 1].iov_len;
1449 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1451 memset(&msg, 0, sizeof(msg));
1452 msg.msg_name = (char *)&from;
1453 msg.msg_namelen = sizeof(struct sockaddr_in);
1454 msg.msg_iov = p->wirevec;
1455 msg.msg_iovlen = p->niovecs;
1456 nbytes = rxi_Recvmsg(socket, &msg, 0);
1458 /* restore the vec to its correct state */
1459 p->wirevec[p->niovecs - 1].iov_len = savelen;
1461 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1462 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1463 if (nbytes < 0 && errno == EWOULDBLOCK) {
1464 if (rx_stats_active)
1465 rx_atomic_inc(&rx_stats.noPacketOnRead);
1466 } else if (nbytes <= 0) {
1467 if (rx_stats_active) {
1468 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1469 rx_stats.bogusHost = from.sin_addr.s_addr;
1471 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1472 ntohs(from.sin_port), nbytes));
1477 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1478 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1479 rxi_DecodePacketHeader(p);
1481 *host = from.sin_addr.s_addr;
1482 *port = from.sin_port;
1484 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1485 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1486 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1488 #ifdef RX_TRIMDATABUFS
1489 rxi_TrimDataBufs(p, 1);
1495 /* Extract packet header. */
1496 rxi_DecodePacketHeader(p);
1498 *host = from.sin_addr.s_addr;
1499 *port = from.sin_port;
1501 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1503 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1506 #ifdef RX_TRIMDATABUFS
1507 /* Free any empty packet buffers at the end of this packet */
1508 rxi_TrimDataBufs(p, 1);
1514 #endif /* !KERNEL || UKERNEL */
1516 /* This function splits off the first packet in a jumbo packet.
1517 * As of AFS 3.5, jumbograms contain more than one fixed size
1518 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1519 * last packet header. All packets (except the last) are padded to
1520 * fall on RX_CBUFFERSIZE boundaries.
1521 * HACK: We store the length of the first n-1 packets in the
1522 * last two pad bytes. */
1525 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1528 struct rx_packet *np;
1529 struct rx_jumboHeader *jp;
1535 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1536 * bytes in length. All but the first packet are preceded by
1537 * an abbreviated four byte header. The length of the last packet
1538 * is calculated from the size of the jumbogram. */
1539 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1541 if ((int)p->length < length) {
1542 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1545 niov = p->niovecs - 2;
1547 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1550 iov = &p->wirevec[2];
1551 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1553 /* Get a pointer to the abbreviated packet header */
1554 jp = (struct rx_jumboHeader *)
1555 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1557 /* Set up the iovecs for the next packet */
1558 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1559 np->wirevec[0].iov_len = sizeof(struct rx_header);
1560 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1561 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1562 np->niovecs = niov + 1;
1563 for (i = 2, iov++; i <= niov; i++, iov++) {
1564 np->wirevec[i] = *iov;
1566 np->length = p->length - length;
1567 p->length = RX_JUMBOBUFFERSIZE;
1570 /* Convert the jumbo packet header to host byte order */
1571 temp = ntohl(*(afs_uint32 *) jp);
1572 jp->flags = (u_char) (temp >> 24);
1573 jp->cksum = (u_short) (temp);
1575 /* Fill in the packet header */
1576 np->header = p->header;
1577 np->header.serial = p->header.serial + 1;
1578 np->header.seq = p->header.seq + 1;
1579 np->header.flags = jp->flags;
1580 np->header.spare = jp->cksum;
1586 /* Send a udp datagram */
1588 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1589 int length, int istack)
1594 memset(&msg, 0, sizeof(msg));
1596 msg.msg_iovlen = nvecs;
1597 msg.msg_name = addr;
1598 msg.msg_namelen = sizeof(struct sockaddr_in);
1600 ret = rxi_Sendmsg(socket, &msg, 0);
1604 #elif !defined(UKERNEL)
1606 * message receipt is done in rxk_input or rx_put.
1609 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1611 * Copy an mblock to the contiguous area pointed to by cp.
1612 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1613 * but it doesn't really.
1614 * Returns the number of bytes not transferred.
1615 * The message is NOT changed.
1618 cpytoc(mblk_t * mp, int off, int len, char *cp)
1622 for (; mp && len > 0; mp = mp->b_cont) {
1623 if (mp->b_datap->db_type != M_DATA) {
1626 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1627 memcpy(cp, (char *)mp->b_rptr, n);
1635 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1636 * but it doesn't really.
1637 * This sucks, anyway, do it like m_cpy.... below
1640 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1645 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1646 if (mp->b_datap->db_type != M_DATA) {
1649 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1655 t = iovs[i].iov_len;
1658 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1668 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1669 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1671 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1673 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1676 unsigned int l1, l2, i, t;
1678 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1679 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1682 if (m->m_len <= off) {
1692 p1 = mtod(m, caddr_t) + off;
1693 l1 = m->m_len - off;
1695 p2 = iovs[0].iov_base;
1696 l2 = iovs[0].iov_len;
1699 t = MIN(l1, MIN(l2, (unsigned int)len));
1710 p1 = mtod(m, caddr_t);
1716 p2 = iovs[i].iov_base;
1717 l2 = iovs[i].iov_len;
1725 #endif /* AFS_SUN5_ENV */
1727 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1728 #if defined(AFS_NBSD_ENV)
1730 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1733 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1734 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1740 struct rx_packet *phandle;
1741 int hdr_len, data_len;
1742 #endif /* AFS_NBSD_ENV */
1747 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1754 #endif /*KERNEL && !UKERNEL */
1757 /* send a response to a debug packet */
1760 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1761 afs_uint32 ahost, short aport, int istack)
1763 struct rx_debugIn tin;
1767 * Only respond to client-initiated Rx debug packets,
1768 * and clear the client flag in the response.
1770 if (ap->header.flags & RX_CLIENT_INITIATED) {
1771 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1772 rxi_EncodePacketHeader(ap);
1777 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1778 /* all done with packet, now set length to the truth, so we can
1779 * reuse this packet */
1780 rx_computelen(ap, ap->length);
1782 tin.type = ntohl(tin.type);
1783 tin.index = ntohl(tin.index);
1785 case RX_DEBUGI_GETSTATS:{
1786 struct rx_debugStats tstat;
1788 /* get basic stats */
1789 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1790 tstat.version = RX_DEBUGI_VERSION;
1791 #ifndef RX_ENABLE_LOCKS
1792 tstat.waitingForPackets = rx_waitingForPackets;
1794 MUTEX_ENTER(&rx_serverPool_lock);
1795 tstat.nFreePackets = htonl(rx_nFreePackets);
1796 tstat.nPackets = htonl(rx_nPackets);
1797 tstat.callsExecuted = htonl(rxi_nCalls);
1798 tstat.packetReclaims = htonl(rx_packetReclaims);
1799 tstat.usedFDs = CountFDs(64);
1800 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1801 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1802 tstat.idleThreads = opr_queue_Count(&rx_idleServerQueue);
1803 MUTEX_EXIT(&rx_serverPool_lock);
1804 tstat.idleThreads = htonl(tstat.idleThreads);
1805 tl = sizeof(struct rx_debugStats) - ap->length;
1807 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1810 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1812 ap->length = sizeof(struct rx_debugStats);
1813 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1814 rx_computelen(ap, ap->length);
1819 case RX_DEBUGI_GETALLCONN:
1820 case RX_DEBUGI_GETCONN:{
1822 struct rx_connection *tc;
1823 struct rx_call *tcall;
1824 struct rx_debugConn tconn;
1825 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1828 tl = sizeof(struct rx_debugConn) - ap->length;
1830 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1834 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1835 /* get N'th (maybe) "interesting" connection info */
1836 for (i = 0; i < rx_hashTableSize; i++) {
1837 #if !defined(KERNEL)
1838 /* the time complexity of the algorithm used here
1839 * exponentially increses with the number of connections.
1841 #ifdef AFS_PTHREAD_ENV
1847 MUTEX_ENTER(&rx_connHashTable_lock);
1848 /* We might be slightly out of step since we are not
1849 * locking each call, but this is only debugging output.
1851 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1852 if ((all || rxi_IsConnInteresting(tc))
1853 && tin.index-- <= 0) {
1854 tconn.host = tc->peer->host;
1855 tconn.port = tc->peer->port;
1856 tconn.cid = htonl(tc->cid);
1857 tconn.epoch = htonl(tc->epoch);
1858 tconn.serial = htonl(tc->serial);
1859 for (j = 0; j < RX_MAXCALLS; j++) {
1860 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1861 if ((tcall = tc->call[j])) {
1862 tconn.callState[j] = tcall->state;
1863 tconn.callMode[j] = tcall->app.mode;
1864 tconn.callFlags[j] = tcall->flags;
1865 if (!opr_queue_IsEmpty(&tcall->rq))
1866 tconn.callOther[j] |= RX_OTHER_IN;
1867 if (!opr_queue_IsEmpty(&tcall->tq))
1868 tconn.callOther[j] |= RX_OTHER_OUT;
1870 tconn.callState[j] = RX_STATE_NOTINIT;
1873 tconn.natMTU = htonl(tc->peer->natMTU);
1874 tconn.error = htonl(tc->error);
1875 tconn.flags = tc->flags;
1876 tconn.type = tc->type;
1877 tconn.securityIndex = tc->securityIndex;
1878 if (tc->securityObject) {
1879 RXS_GetStats(tc->securityObject, tc,
1881 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1882 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1885 DOHTONL(packetsReceived);
1886 DOHTONL(packetsSent);
1887 DOHTONL(bytesReceived);
1891 sizeof(tconn.secStats.spares) /
1896 sizeof(tconn.secStats.sparel) /
1897 sizeof(afs_int32); i++)
1901 MUTEX_EXIT(&rx_connHashTable_lock);
1902 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1905 ap->length = sizeof(struct rx_debugConn);
1906 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1912 MUTEX_EXIT(&rx_connHashTable_lock);
1914 /* if we make it here, there are no interesting packets */
1915 tconn.cid = htonl(0xffffffff); /* means end */
1916 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1919 ap->length = sizeof(struct rx_debugConn);
1920 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1926 * Pass back all the peer structures we have available
1929 case RX_DEBUGI_GETPEER:{
1932 struct rx_debugPeer tpeer;
1935 tl = sizeof(struct rx_debugPeer) - ap->length;
1937 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1941 memset(&tpeer, 0, sizeof(tpeer));
1942 for (i = 0; i < rx_hashTableSize; i++) {
1943 #if !defined(KERNEL)
1944 /* the time complexity of the algorithm used here
1945 * exponentially increses with the number of peers.
1947 * Yielding after processing each hash table entry
1948 * and dropping rx_peerHashTable_lock.
1949 * also increases the risk that we will miss a new
1950 * entry - but we are willing to live with this
1951 * limitation since this is meant for debugging only
1953 #ifdef AFS_PTHREAD_ENV
1959 MUTEX_ENTER(&rx_peerHashTable_lock);
1960 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1961 if (tin.index-- <= 0) {
1963 MUTEX_EXIT(&rx_peerHashTable_lock);
1965 MUTEX_ENTER(&tp->peer_lock);
1966 tpeer.host = tp->host;
1967 tpeer.port = tp->port;
1968 tpeer.ifMTU = htons(tp->ifMTU);
1969 tpeer.idleWhen = htonl(tp->idleWhen);
1970 tpeer.refCount = htons(tp->refCount);
1971 tpeer.burstSize = 0;
1973 tpeer.burstWait.sec = 0;
1974 tpeer.burstWait.usec = 0;
1975 tpeer.rtt = htonl(tp->rtt);
1976 tpeer.rtt_dev = htonl(tp->rtt_dev);
1977 tpeer.nSent = htonl(tp->nSent);
1978 tpeer.reSends = htonl(tp->reSends);
1979 tpeer.natMTU = htons(tp->natMTU);
1980 tpeer.maxMTU = htons(tp->maxMTU);
1981 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1982 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1983 tpeer.MTU = htons(tp->MTU);
1984 tpeer.cwind = htons(tp->cwind);
1985 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1986 tpeer.congestSeq = htons(tp->congestSeq);
1987 tpeer.bytesSent.high =
1988 htonl(tp->bytesSent >> 32);
1989 tpeer.bytesSent.low =
1990 htonl(tp->bytesSent & MAX_AFS_UINT32);
1991 tpeer.bytesReceived.high =
1992 htonl(tp->bytesReceived >> 32);
1993 tpeer.bytesReceived.low =
1994 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1995 MUTEX_EXIT(&tp->peer_lock);
1997 MUTEX_ENTER(&rx_peerHashTable_lock);
1999 MUTEX_EXIT(&rx_peerHashTable_lock);
2001 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2004 ap->length = sizeof(struct rx_debugPeer);
2005 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2011 MUTEX_EXIT(&rx_peerHashTable_lock);
2013 /* if we make it here, there are no interesting packets */
2014 tpeer.host = htonl(0xffffffff); /* means end */
2015 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2018 ap->length = sizeof(struct rx_debugPeer);
2019 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2024 case RX_DEBUGI_RXSTATS:{
2028 tl = sizeof(rx_stats) - ap->length;
2030 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2034 /* Since its all int32s convert to network order with a loop. */
2035 if (rx_stats_active)
2036 MUTEX_ENTER(&rx_stats_mutex);
2037 s = (afs_int32 *) & rx_stats;
2038 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2039 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2042 ap->length = sizeof(rx_stats);
2043 if (rx_stats_active)
2044 MUTEX_EXIT(&rx_stats_mutex);
2045 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2051 /* error response packet */
2052 tin.type = htonl(RX_DEBUGI_BADTYPE);
2053 tin.index = tin.type;
2054 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2056 ap->length = sizeof(struct rx_debugIn);
2057 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2065 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2066 afs_uint32 ahost, short aport, int istack)
2071 * Only respond to client-initiated version requests, and
2072 * clear that flag in the response.
2074 if (ap->header.flags & RX_CLIENT_INITIATED) {
2077 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2078 rxi_EncodePacketHeader(ap);
2079 memset(buf, 0, sizeof(buf));
2080 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2081 rx_packetwrite(ap, 0, 65, buf);
2084 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2092 /* send a debug packet back to the sender */
2094 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2095 afs_uint32 ahost, short aport, afs_int32 istack)
2097 struct sockaddr_in taddr;
2098 unsigned int i, nbytes, savelen = 0;
2101 int waslocked = ISAFS_GLOCK();
2104 taddr.sin_family = AF_INET;
2105 taddr.sin_port = aport;
2106 taddr.sin_addr.s_addr = ahost;
2107 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2108 taddr.sin_len = sizeof(struct sockaddr_in);
2111 /* We need to trim the niovecs. */
2112 nbytes = apacket->length;
2113 for (i = 1; i < apacket->niovecs; i++) {
2114 if (nbytes <= apacket->wirevec[i].iov_len) {
2115 savelen = apacket->wirevec[i].iov_len;
2116 saven = apacket->niovecs;
2117 apacket->wirevec[i].iov_len = nbytes;
2118 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2120 nbytes -= apacket->wirevec[i].iov_len;
2123 #ifdef RX_KERNEL_TRACE
2124 if (ICL_SETACTIVE(afs_iclSetp)) {
2127 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2128 "before osi_NetSend()");
2136 /* debug packets are not reliably delivered, hence the cast below. */
2137 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2138 apacket->length + RX_HEADER_SIZE, istack);
2140 #ifdef RX_KERNEL_TRACE
2141 if (ICL_SETACTIVE(afs_iclSetp)) {
2143 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2144 "after osi_NetSend()");
2153 if (saven) { /* means we truncated the packet above. */
2154 apacket->wirevec[i - 1].iov_len = savelen;
2155 apacket->niovecs = saven;
2161 rxi_NetSendError(struct rx_call *call, int code)
2165 if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2168 if (code == -WSAEHOSTUNREACH) {
2171 #elif defined(AFS_LINUX20_ENV)
2172 if (code == -ENETUNREACH) {
2175 #elif defined(AFS_DARWIN_ENV)
2176 if (code == EHOSTUNREACH) {
2181 call->lastReceiveTime = 0;
2185 /* Send the packet to appropriate destination for the specified
2186 * call. The header is first encoded and placed in the packet.
2189 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2190 struct rx_packet *p, int istack)
2196 struct sockaddr_in addr;
2197 struct rx_peer *peer = conn->peer;
2200 char deliveryType = 'S';
2202 /* The address we're sending the packet to */
2203 memset(&addr, 0, sizeof(addr));
2204 addr.sin_family = AF_INET;
2205 addr.sin_port = peer->port;
2206 addr.sin_addr.s_addr = peer->host;
2208 /* This stuff should be revamped, I think, so that most, if not
2209 * all, of the header stuff is always added here. We could
2210 * probably do away with the encode/decode routines. XXXXX */
2212 /* Stamp each packet with a unique serial number. The serial
2213 * number is maintained on a connection basis because some types
2214 * of security may be based on the serial number of the packet,
2215 * and security is handled on a per authenticated-connection
2217 /* Pre-increment, to guarantee no zero serial number; a zero
2218 * serial number means the packet was never sent. */
2219 MUTEX_ENTER(&conn->conn_data_lock);
2220 p->header.serial = ++conn->serial;
2221 if (p->length > conn->peer->maxPacketSize) {
2222 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2223 (p->header.flags & RX_REQUEST_ACK)) {
2224 conn->lastPingSize = p->length;
2225 conn->lastPingSizeSer = p->header.serial;
2226 } else if (p->header.seq != 0) {
2227 conn->lastPacketSize = p->length;
2228 conn->lastPacketSizeSeq = p->header.seq;
2231 MUTEX_EXIT(&conn->conn_data_lock);
2232 /* This is so we can adjust retransmit time-outs better in the face of
2233 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2235 if (p->firstSerial == 0) {
2236 p->firstSerial = p->header.serial;
2239 /* If an output tracer function is defined, call it with the packet and
2240 * network address. Note this function may modify its arguments. */
2241 if (rx_almostSent) {
2242 int drop = (*rx_almostSent) (p, &addr);
2243 /* drop packet if return value is non-zero? */
2245 deliveryType = 'D'; /* Drop the packet */
2249 /* Get network byte order header */
2250 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2251 * touch ALL the fields */
2253 /* Send the packet out on the same socket that related packets are being
2257 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2260 /* Possibly drop this packet, for testing purposes */
2261 if ((deliveryType == 'D')
2262 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2263 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2264 deliveryType = 'D'; /* Drop the packet */
2266 deliveryType = 'S'; /* Send the packet */
2267 #endif /* RXDEBUG */
2269 /* Loop until the packet is sent. We'd prefer just to use a
2270 * blocking socket, but unfortunately the interface doesn't
2271 * allow us to have the socket block in send mode, and not
2272 * block in receive mode */
2274 waslocked = ISAFS_GLOCK();
2275 #ifdef RX_KERNEL_TRACE
2276 if (ICL_SETACTIVE(afs_iclSetp)) {
2279 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2280 "before osi_NetSend()");
2289 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2290 p->length + RX_HEADER_SIZE, istack)) != 0) {
2291 /* send failed, so let's hurry up the resend, eh? */
2292 if (rx_stats_active)
2293 rx_atomic_inc(&rx_stats.netSendFailures);
2294 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2296 /* Some systems are nice and tell us right away that we cannot
2297 * reach this recipient by returning an error code.
2298 * So, when this happens let's "down" the host NOW so
2299 * we don't sit around waiting for this host to timeout later.
2302 rxi_NetSendError(call, code);
2306 #ifdef RX_KERNEL_TRACE
2307 if (ICL_SETACTIVE(afs_iclSetp)) {
2309 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2310 "after osi_NetSend()");
2321 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2322 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2323 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2324 p->header.seq, p->header.flags, p, p->length));
2326 if (rx_stats_active) {
2327 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2328 MUTEX_ENTER(&peer->peer_lock);
2329 peer->bytesSent += p->length;
2330 MUTEX_EXIT(&peer->peer_lock);
2334 /* Send a list of packets to appropriate destination for the specified
2335 * connection. The headers are first encoded and placed in the packets.
2338 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2339 struct rx_packet **list, int len, int istack)
2341 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2344 struct sockaddr_in addr;
2345 struct rx_peer *peer = conn->peer;
2347 struct rx_packet *p = NULL;
2348 struct iovec wirevec[RX_MAXIOVECS];
2349 int i, length, code;
2352 struct rx_jumboHeader *jp;
2354 char deliveryType = 'S';
2356 /* The address we're sending the packet to */
2357 addr.sin_family = AF_INET;
2358 addr.sin_port = peer->port;
2359 addr.sin_addr.s_addr = peer->host;
2361 if (len + 1 > RX_MAXIOVECS) {
2362 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2366 * Stamp the packets in this jumbogram with consecutive serial numbers
2368 MUTEX_ENTER(&conn->conn_data_lock);
2369 serial = conn->serial;
2370 conn->serial += len;
2371 for (i = 0; i < len; i++) {
2373 if (p->length > conn->peer->maxPacketSize) {
2374 /* a ping *or* a sequenced packet can count */
2375 if ((p->length > conn->peer->maxPacketSize)) {
2376 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2377 (p->header.flags & RX_REQUEST_ACK)) &&
2378 ((i == 0) || (p->length >= conn->lastPingSize))) {
2379 conn->lastPingSize = p->length;
2380 conn->lastPingSizeSer = serial + i;
2381 } else if ((p->header.seq != 0) &&
2382 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2383 conn->lastPacketSize = p->length;
2384 conn->lastPacketSizeSeq = p->header.seq;
2389 MUTEX_EXIT(&conn->conn_data_lock);
2392 /* This stuff should be revamped, I think, so that most, if not
2393 * all, of the header stuff is always added here. We could
2394 * probably do away with the encode/decode routines. XXXXX */
2397 length = RX_HEADER_SIZE;
2398 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2399 wirevec[0].iov_len = RX_HEADER_SIZE;
2400 for (i = 0; i < len; i++) {
2403 /* The whole 3.5 jumbogram scheme relies on packets fitting
2404 * in a single packet buffer. */
2405 if (p->niovecs > 2) {
2406 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2409 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2412 if (p->length != RX_JUMBOBUFFERSIZE) {
2413 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2415 p->header.flags |= RX_JUMBO_PACKET;
2416 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2417 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2419 wirevec[i + 1].iov_len = p->length;
2420 length += p->length;
2422 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2424 /* Convert jumbo packet header to network byte order */
2425 temp = (afs_uint32) (p->header.flags) << 24;
2426 temp |= (afs_uint32) (p->header.spare);
2427 *(afs_uint32 *) jp = htonl(temp);
2429 jp = (struct rx_jumboHeader *)
2430 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2432 /* Stamp each packet with a unique serial number. The serial
2433 * number is maintained on a connection basis because some types
2434 * of security may be based on the serial number of the packet,
2435 * and security is handled on a per authenticated-connection
2437 /* Pre-increment, to guarantee no zero serial number; a zero
2438 * serial number means the packet was never sent. */
2439 p->header.serial = ++serial;
2440 /* This is so we can adjust retransmit time-outs better in the face of
2441 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2443 if (p->firstSerial == 0) {
2444 p->firstSerial = p->header.serial;
2447 /* If an output tracer function is defined, call it with the packet and
2448 * network address. Note this function may modify its arguments. */
2449 if (rx_almostSent) {
2450 int drop = (*rx_almostSent) (p, &addr);
2451 /* drop packet if return value is non-zero? */
2453 deliveryType = 'D'; /* Drop the packet */
2457 /* Get network byte order header */
2458 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2459 * touch ALL the fields */
2462 /* Send the packet out on the same socket that related packets are being
2466 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2469 /* Possibly drop this packet, for testing purposes */
2470 if ((deliveryType == 'D')
2471 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2472 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2473 deliveryType = 'D'; /* Drop the packet */
2475 deliveryType = 'S'; /* Send the packet */
2476 #endif /* RXDEBUG */
2478 /* Loop until the packet is sent. We'd prefer just to use a
2479 * blocking socket, but unfortunately the interface doesn't
2480 * allow us to have the socket block in send mode, and not
2481 * block in receive mode */
2482 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2483 waslocked = ISAFS_GLOCK();
2484 if (!istack && waslocked)
2488 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2490 /* send failed, so let's hurry up the resend, eh? */
2491 if (rx_stats_active)
2492 rx_atomic_inc(&rx_stats.netSendFailures);
2493 for (i = 0; i < len; i++) {
2495 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2497 /* Some systems are nice and tell us right away that we cannot
2498 * reach this recipient by returning an error code.
2499 * So, when this happens let's "down" the host NOW so
2500 * we don't sit around waiting for this host to timeout later.
2503 rxi_NetSendError(call, code);
2506 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2507 if (!istack && waslocked)
2513 osi_Assert(p != NULL);
2515 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2516 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2517 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2518 p->header.seq, p->header.flags, p, p->length));
2521 if (rx_stats_active) {
2522 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2523 MUTEX_ENTER(&peer->peer_lock);
2524 peer->bytesSent += p->length;
2525 MUTEX_EXIT(&peer->peer_lock);
2529 /* Send a raw abort packet, without any call or connection structures */
2531 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2532 afs_int32 error, struct rx_packet *source, int istack)
2534 struct rx_header theader;
2535 struct sockaddr_in addr;
2536 struct iovec iov[2];
2538 memset(&theader, 0, sizeof(theader));
2539 theader.epoch = htonl(source->header.epoch);
2540 theader.callNumber = htonl(source->header.callNumber);
2541 theader.serial = htonl(1);
2542 theader.type = RX_PACKET_TYPE_ABORT;
2543 theader.serviceId = htons(source->header.serviceId);
2544 theader.securityIndex = source->header.securityIndex;
2545 theader.cid = htonl(source->header.cid);
2548 * If the abort is being sent in response to a server initiated packet,
2549 * set client_initiated in the abort to ensure it is not associated by
2550 * the receiver with a connection in the opposite direction.
2552 if ((source->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2553 theader.flags |= RX_CLIENT_INITIATED;
2555 error = htonl(error);
2557 iov[0].iov_base = &theader;
2558 iov[0].iov_len = sizeof(struct rx_header);
2559 iov[1].iov_base = &error;
2560 iov[1].iov_len = sizeof(error);
2562 addr.sin_family = AF_INET;
2563 addr.sin_addr.s_addr = host;
2564 addr.sin_port = port;
2565 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2566 addr.sin_len = sizeof(struct sockaddr_in);
2569 osi_NetSend(socket, &addr, iov, 2,
2570 sizeof(struct rx_header) + sizeof(error), istack);
2573 /* Send a "special" packet to the peer connection. If call is
2574 * specified, then the packet is directed to a specific call channel
2575 * associated with the connection, otherwise it is directed to the
2576 * connection only. Uses optionalPacket if it is supplied, rather than
2577 * allocating a new packet buffer. Nbytes is the length of the data
2578 * portion of the packet. If data is non-null, nbytes of data are
2579 * copied into the packet. Type is the type of the packet, as defined
2580 * in rx.h. Bug: there's a lot of duplication between this and other
2581 * routines. This needs to be cleaned up. */
2583 rxi_SendSpecial(struct rx_call *call,
2584 struct rx_connection *conn,
2585 struct rx_packet *optionalPacket, int type, char *data,
2586 int nbytes, int istack)
2588 /* Some of the following stuff should be common code for all
2589 * packet sends (it's repeated elsewhere) */
2590 struct rx_packet *p;
2592 int savelen = 0, saven = 0;
2593 int channel, callNumber;
2595 channel = call->channel;
2596 callNumber = *call->callNumber;
2597 /* BUSY packets refer to the next call on this connection */
2598 if (type == RX_PACKET_TYPE_BUSY) {
2607 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2609 osi_Panic("rxi_SendSpecial failure");
2616 p->header.serviceId = conn->serviceId;
2617 p->header.securityIndex = conn->securityIndex;
2618 p->header.cid = (conn->cid | channel);
2619 p->header.callNumber = callNumber;
2621 p->header.epoch = conn->epoch;
2622 p->header.type = type;
2623 p->header.flags = 0;
2624 if (conn->type == RX_CLIENT_CONNECTION)
2625 p->header.flags |= RX_CLIENT_INITIATED;
2627 rx_packetwrite(p, 0, nbytes, data);
2629 for (i = 1; i < p->niovecs; i++) {
2630 if (nbytes <= p->wirevec[i].iov_len) {
2631 savelen = p->wirevec[i].iov_len;
2633 p->wirevec[i].iov_len = nbytes;
2634 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2636 nbytes -= p->wirevec[i].iov_len;
2640 rxi_Send(call, p, istack);
2642 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2643 if (saven) { /* means we truncated the packet above. We probably don't */
2644 /* really need to do this, but it seems safer this way, given that */
2645 /* sneaky optionalPacket... */
2646 p->wirevec[i - 1].iov_len = savelen;
2649 if (!optionalPacket)
2651 return optionalPacket;
2655 /* Encode the packet's header (from the struct header in the packet to
2656 * the net byte order representation in the wire representation of the
2657 * packet, which is what is actually sent out on the wire) */
2659 rxi_EncodePacketHeader(struct rx_packet *p)
2661 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2663 memset(buf, 0, RX_HEADER_SIZE);
2664 *buf++ = htonl(p->header.epoch);
2665 *buf++ = htonl(p->header.cid);
2666 *buf++ = htonl(p->header.callNumber);
2667 *buf++ = htonl(p->header.seq);
2668 *buf++ = htonl(p->header.serial);
2669 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2670 | (((afs_uint32) p->header.flags) << 16)
2671 | (p->header.userStatus << 8) | p->header.securityIndex);
2672 /* Note: top 16 bits of this next word were reserved */
2673 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2676 /* Decode the packet's header (from net byte order to a struct header) */
2678 rxi_DecodePacketHeader(struct rx_packet *p)
2680 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2683 p->header.epoch = ntohl(*buf);
2685 p->header.cid = ntohl(*buf);
2687 p->header.callNumber = ntohl(*buf);
2689 p->header.seq = ntohl(*buf);
2691 p->header.serial = ntohl(*buf);
2697 /* C will truncate byte fields to bytes for me */
2698 p->header.type = temp >> 24;
2699 p->header.flags = temp >> 16;
2700 p->header.userStatus = temp >> 8;
2701 p->header.securityIndex = temp >> 0;
2706 p->header.serviceId = (temp & 0xffff);
2707 p->header.spare = temp >> 16;
2708 /* Note: top 16 bits of this last word are the security checksum */
2712 * LOCKS HELD: called with call->lock held.
2714 * PrepareSendPacket is the only place in the code that
2715 * can increment call->tnext. This could become an atomic
2716 * in the future. Beyond that there is nothing in this
2717 * function that requires the call being locked. This
2718 * function can only be called by the application thread.
2721 rxi_PrepareSendPacket(struct rx_call *call,
2722 struct rx_packet *p, int last)
2724 struct rx_connection *conn = call->conn;
2725 afs_uint32 seq = call->tnext++;
2727 afs_int32 len; /* len must be a signed type; it can go negative */
2729 /* No data packets on call 0. Where do these come from? */
2730 if (*call->callNumber == 0)
2731 *call->callNumber = 1;
2733 MUTEX_EXIT(&call->lock);
2734 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2736 p->header.cid = (conn->cid | call->channel);
2737 p->header.serviceId = conn->serviceId;
2738 p->header.securityIndex = conn->securityIndex;
2740 p->header.callNumber = *call->callNumber;
2741 p->header.seq = seq;
2742 p->header.epoch = conn->epoch;
2743 p->header.type = RX_PACKET_TYPE_DATA;
2744 p->header.flags = 0;
2745 p->header.spare = 0;
2746 if (conn->type == RX_CLIENT_CONNECTION)
2747 p->header.flags |= RX_CLIENT_INITIATED;
2750 p->header.flags |= RX_LAST_PACKET;
2752 clock_Zero(&p->firstSent); /* Never yet transmitted */
2753 p->header.serial = 0; /* Another way of saying never transmitted... */
2755 /* Now that we're sure this is the last data on the call, make sure
2756 * that the "length" and the sum of the iov_lens matches. */
2757 len = p->length + call->conn->securityHeaderSize;
2759 for (i = 1; i < p->niovecs && len > 0; i++) {
2760 len -= p->wirevec[i].iov_len;
2763 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2764 } else if (i < p->niovecs) {
2765 /* Free any extra elements in the wirevec */
2766 #if defined(RX_ENABLE_TSFPQ)
2767 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2768 #else /* !RX_ENABLE_TSFPQ */
2769 MUTEX_ENTER(&rx_freePktQ_lock);
2770 rxi_FreeDataBufsNoLock(p, i);
2771 MUTEX_EXIT(&rx_freePktQ_lock);
2772 #endif /* !RX_ENABLE_TSFPQ */
2777 p->wirevec[i - 1].iov_len += len;
2778 MUTEX_ENTER(&call->lock);
2779 RXS_PreparePacket(conn->securityObject, call, p);
2782 /* Given an interface MTU size, calculate an adjusted MTU size that
2783 * will make efficient use of the RX buffers when the peer is sending
2784 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2786 rxi_AdjustIfMTU(int mtu)
2791 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2793 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2794 if (mtu <= adjMTU) {
2801 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2802 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2805 /* Given an interface MTU size, and the peer's advertised max receive
2806 * size, calculate an adjisted maxMTU size that makes efficient use
2807 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2809 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2811 int maxMTU = mtu * rxi_nSendFrags;
2812 maxMTU = MIN(maxMTU, peerMaxMTU);
2813 return rxi_AdjustIfMTU(maxMTU);
2816 /* Given a packet size, figure out how many datagram packet will fit.
2817 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2818 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2819 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2821 rxi_AdjustDgramPackets(int frags, int mtu)
2824 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2827 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2828 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2829 /* subtract the size of the first and last packets */
2830 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2834 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2839 * This function can be used by the Windows Cache Manager
2840 * to dump the list of all rx packets so that we can determine
2841 * where the packet leakage is.
2843 int rx_DumpPackets(FILE *outputFile, char *cookie)
2845 #ifdef RXDEBUG_PACKET
2846 struct rx_packet *p;
2850 #define RXDPRINTF sprintf
2851 #define RXDPRINTOUT output
2853 #define RXDPRINTF fprintf
2854 #define RXDPRINTOUT outputFile
2858 MUTEX_ENTER(&rx_freePktQ_lock);
2859 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2861 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2864 for (p = rx_mallocedP; p; p = p->allNextp) {
2865 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2866 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2867 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2868 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2869 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2870 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2872 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2876 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2878 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2881 MUTEX_EXIT(&rx_freePktQ_lock);
2883 #endif /* RXDEBUG_PACKET */