2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
47 # if defined(AFS_NT40_ENV)
49 # define EWOULDBLOCK WSAEWOULDBLOCK
52 # include "rx_xmit_nt.h"
58 # include <sys/sysmacros.h>
61 #include <opr/queue.h>
65 #include "rx_packet.h"
66 #include "rx_atomic.h"
67 #include "rx_globals.h"
68 #include "rx_internal.h"
76 /* rxdb_fileID is used to identify the lock location, along with line#. */
77 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
78 #endif /* RX_LOCKS_DB */
79 static struct rx_packet *rx_mallocedP = 0;
81 static afs_uint32 rx_packet_id = 0;
84 extern char cml_version_number[];
86 static int AllocPacketBufs(int class, int num_pkts, struct opr_queue *q);
88 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
89 afs_uint32 ahost, short aport,
91 static struct rx_packet *rxi_AllocPacketNoLock(int class);
94 static void rxi_MorePacketsNoLock(int apackets);
97 #ifdef RX_ENABLE_TSFPQ
98 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
100 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
101 int allow_overcommit);
103 static void rxi_FreePacketNoLock(struct rx_packet *p);
104 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
105 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
106 struct opr_queue * q);
109 extern struct opr_queue rx_idleServerQueue;
111 /* some rules about packets:
112 * 1. When a packet is allocated, the final iov_buf contains room for
113 * a security trailer, but iov_len masks that fact. If the security
114 * package wants to add the trailer, it may do so, and then extend
115 * iov_len appropriately. For this reason, packet's niovecs and
116 * iov_len fields should be accurate before calling PreparePacket.
120 * all packet buffers (iov_base) are integral multiples of
122 * offset is an integral multiple of the word size.
125 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
129 for (l = 0, i = 1; i < packet->niovecs; i++) {
130 if (l + packet->wirevec[i].iov_len > offset) {
132 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
135 l += packet->wirevec[i].iov_len;
142 * all packet buffers (iov_base) are integral multiples of the word size.
143 * offset is an integral multiple of the word size.
146 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
150 for (l = 0, i = 1; i < packet->niovecs; i++) {
151 if (l + packet->wirevec[i].iov_len > offset) {
152 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
153 (offset - l))) = data;
156 l += packet->wirevec[i].iov_len;
163 * all packet buffers (iov_base) are integral multiples of the
165 * offset is an integral multiple of the word size.
167 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
170 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
173 unsigned int i, j, l, r;
174 for (l = 0, i = 1; i < packet->niovecs; i++) {
175 if (l + packet->wirevec[i].iov_len > offset) {
178 l += packet->wirevec[i].iov_len;
181 /* i is the iovec which contains the first little bit of data in which we
182 * are interested. l is the total length of everything prior to this iovec.
183 * j is the number of bytes we can safely copy out of this iovec.
184 * offset only applies to the first iovec.
187 while ((r > 0) && (i < packet->niovecs)) {
188 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
189 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
192 l += packet->wirevec[i].iov_len;
197 return (r ? (resid - r) : resid);
202 * all packet buffers (iov_base) are integral multiples of the
204 * offset is an integral multiple of the word size.
207 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
209 unsigned int i, j, l, o, r;
212 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
213 if (l + packet->wirevec[i].iov_len > o) {
216 l += packet->wirevec[i].iov_len;
219 /* i is the iovec which contains the first little bit of data in which we
220 * are interested. l is the total length of everything prior to this iovec.
221 * j is the number of bytes we can safely copy out of this iovec.
222 * offset only applies to the first iovec.
225 while ((r > 0) && (i <= RX_MAXWVECS)) {
226 if (i >= packet->niovecs)
227 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
230 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
231 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
235 l += packet->wirevec[i].iov_len;
240 return (r ? (resid - r) : resid);
244 rxi_AllocPackets(int class, int num_pkts, struct opr_queue * q)
248 num_pkts = AllocPacketBufs(class, num_pkts, q);
250 for (opr_queue_Scan(q, c)) {
251 RX_PACKET_IOV_FULLINIT(opr_queue_Entry(c, struct rx_packet, entry));
257 #ifdef RX_ENABLE_TSFPQ
259 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
261 struct rx_ts_info_t * rx_ts_info;
265 RX_TS_INFO_GET(rx_ts_info);
267 transfer = num_pkts - rx_ts_info->_FPQ.len;
270 MUTEX_ENTER(&rx_freePktQ_lock);
271 transfer = MAX(transfer, rx_TSFPQGlobSize);
272 if (transfer > rx_nFreePackets) {
273 /* alloc enough for us, plus a few globs for other threads */
274 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
277 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
279 MUTEX_EXIT(&rx_freePktQ_lock);
283 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
287 #else /* RX_ENABLE_TSFPQ */
289 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
300 MUTEX_ENTER(&rx_freePktQ_lock);
303 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
304 num_pkts--, overq++);
307 rxi_NeedMorePackets = TRUE;
308 if (rx_stats_active) {
310 case RX_PACKET_CLASS_RECEIVE:
311 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
313 case RX_PACKET_CLASS_SEND:
314 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
316 case RX_PACKET_CLASS_SPECIAL:
317 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
319 case RX_PACKET_CLASS_RECV_CBUF:
320 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
322 case RX_PACKET_CLASS_SEND_CBUF:
323 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
329 if (rx_nFreePackets < num_pkts)
330 num_pkts = rx_nFreePackets;
333 rxi_NeedMorePackets = TRUE;
337 if (rx_nFreePackets < num_pkts) {
338 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
342 for (i=0, c=opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
344 i++, c=opr_queue_Next(&c->entry, struct rx_packet, entry)) {
348 opr_queue_SplitBeforeAppend(&rx_freePacketQueue, q, &c->entry);
350 rx_nFreePackets -= num_pkts;
355 MUTEX_EXIT(&rx_freePktQ_lock);
360 #endif /* RX_ENABLE_TSFPQ */
363 * Free a packet currently used as a continuation buffer
365 #ifdef RX_ENABLE_TSFPQ
366 /* num_pkts=0 means queue length is unknown */
368 rxi_FreePackets(int num_pkts, struct opr_queue * q)
370 struct rx_ts_info_t * rx_ts_info;
371 struct opr_queue *cursor, *store;
374 osi_Assert(num_pkts >= 0);
375 RX_TS_INFO_GET(rx_ts_info);
378 for (opr_queue_ScanSafe(q, cursor, store)) {
380 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
384 for (opr_queue_ScanSafe(q, cursor, store)) {
385 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
391 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
394 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
396 MUTEX_ENTER(&rx_freePktQ_lock);
398 RX_TS_FPQ_LTOG(rx_ts_info);
400 /* Wakeup anyone waiting for packets */
403 MUTEX_EXIT(&rx_freePktQ_lock);
409 #else /* RX_ENABLE_TSFPQ */
410 /* num_pkts=0 means queue length is unknown */
412 rxi_FreePackets(int num_pkts, struct opr_queue *q)
414 struct opr_queue cbs;
415 struct opr_queue *cursor, *store;
419 osi_Assert(num_pkts >= 0);
420 opr_queue_Init(&cbs);
423 for (opr_queue_ScanSafe(q, cursor, store)) {
425 = opr_queue_Entry(cursor, struct rx_packet, entry);
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
435 for (opr_queue_ScanSafe(q, cursor, store)) {
437 = opr_queue_Entry(cursor, struct rx_packet, entry);
439 if (p->niovecs > 2) {
440 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
447 opr_queue_SpliceAppend(q, &cbs);
453 MUTEX_ENTER(&rx_freePktQ_lock);
455 opr_queue_SpliceAppend(&rx_freePacketQueue, q);
456 rx_nFreePackets += qlen;
458 /* Wakeup anyone waiting for packets */
461 MUTEX_EXIT(&rx_freePktQ_lock);
466 #endif /* RX_ENABLE_TSFPQ */
468 /* this one is kind of awful.
469 * In rxkad, the packet has been all shortened, and everything, ready for
470 * sending. All of a sudden, we discover we need some of that space back.
471 * This isn't terribly general, because it knows that the packets are only
472 * rounded up to the EBS (userdata + security header).
475 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
479 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
480 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
481 p->wirevec[i].iov_len += nb;
485 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
486 p->wirevec[i].iov_len += nb;
494 /* get sufficient space to store nb bytes of data (or more), and hook
495 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
496 * returns the number of bytes >0 which it failed to come up with.
497 * Don't need to worry about locking on packet, since only
498 * one thread can manipulate one at a time. Locking on continution
499 * packets is handled by AllocPacketBufs */
500 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
502 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
505 struct opr_queue q, *cursor, *store;
507 /* compute the number of cbuf's we need */
508 nv = nb / RX_CBUFFERSIZE;
509 if ((nv * RX_CBUFFERSIZE) < nb)
511 if ((nv + p->niovecs) > RX_MAXWVECS)
512 nv = RX_MAXWVECS - p->niovecs;
516 /* allocate buffers */
518 nv = AllocPacketBufs(class, nv, &q);
520 /* setup packet iovs */
522 for (opr_queue_ScanSafe(&q, cursor, store)) {
524 = opr_queue_Entry(cursor, struct rx_packet, entry);
526 opr_queue_Remove(&cb->entry);
527 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
528 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
532 nb -= (nv * RX_CBUFFERSIZE);
533 p->length += (nv * RX_CBUFFERSIZE);
539 /* Add more packet buffers */
540 #ifdef RX_ENABLE_TSFPQ
542 rxi_MorePackets(int apackets)
544 struct rx_packet *p, *e;
545 struct rx_ts_info_t * rx_ts_info;
549 getme = apackets * sizeof(struct rx_packet);
550 p = osi_Alloc(getme);
553 PIN(p, getme); /* XXXXX */
555 RX_TS_INFO_GET(rx_ts_info);
557 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
558 /* TSFPQ patch also needs to keep track of total packets */
560 MUTEX_ENTER(&rx_packets_mutex);
561 rx_nPackets += apackets;
562 RX_TS_FPQ_COMPUTE_LIMITS;
563 MUTEX_EXIT(&rx_packets_mutex);
565 for (e = p + apackets; p < e; p++) {
566 RX_PACKET_IOV_INIT(p);
569 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
572 MUTEX_ENTER(&rx_freePktQ_lock);
573 #ifdef RXDEBUG_PACKET
574 p->packetId = rx_packet_id++;
575 p->allNextp = rx_mallocedP;
576 #endif /* RXDEBUG_PACKET */
578 MUTEX_EXIT(&rx_freePktQ_lock);
581 rx_ts_info->_FPQ.delta += apackets;
583 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
585 MUTEX_ENTER(&rx_freePktQ_lock);
587 RX_TS_FPQ_LTOG(rx_ts_info);
588 rxi_NeedMorePackets = FALSE;
591 MUTEX_EXIT(&rx_freePktQ_lock);
595 #else /* RX_ENABLE_TSFPQ */
597 rxi_MorePackets(int apackets)
599 struct rx_packet *p, *e;
603 getme = apackets * sizeof(struct rx_packet);
604 p = osi_Alloc(getme);
607 PIN(p, getme); /* XXXXX */
610 MUTEX_ENTER(&rx_freePktQ_lock);
612 for (e = p + apackets; p < e; p++) {
613 RX_PACKET_IOV_INIT(p);
614 #ifdef RX_TRACK_PACKETS
615 p->flags |= RX_PKTFLAG_FREE;
619 opr_queue_Append(&rx_freePacketQueue, &p->entry);
620 #ifdef RXDEBUG_PACKET
621 p->packetId = rx_packet_id++;
622 p->allNextp = rx_mallocedP;
623 #endif /* RXDEBUG_PACKET */
627 rx_nPackets += apackets;
628 rx_nFreePackets += apackets;
629 rxi_NeedMorePackets = FALSE;
632 MUTEX_EXIT(&rx_freePktQ_lock);
635 #endif /* RX_ENABLE_TSFPQ */
637 #ifdef RX_ENABLE_TSFPQ
639 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
641 struct rx_packet *p, *e;
642 struct rx_ts_info_t * rx_ts_info;
646 getme = apackets * sizeof(struct rx_packet);
647 p = osi_Alloc(getme);
649 PIN(p, getme); /* XXXXX */
651 RX_TS_INFO_GET(rx_ts_info);
653 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
654 /* TSFPQ patch also needs to keep track of total packets */
655 MUTEX_ENTER(&rx_packets_mutex);
656 rx_nPackets += apackets;
657 RX_TS_FPQ_COMPUTE_LIMITS;
658 MUTEX_EXIT(&rx_packets_mutex);
660 for (e = p + apackets; p < e; p++) {
661 RX_PACKET_IOV_INIT(p);
663 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
666 MUTEX_ENTER(&rx_freePktQ_lock);
667 #ifdef RXDEBUG_PACKET
668 p->packetId = rx_packet_id++;
669 p->allNextp = rx_mallocedP;
670 #endif /* RXDEBUG_PACKET */
672 MUTEX_EXIT(&rx_freePktQ_lock);
675 rx_ts_info->_FPQ.delta += apackets;
678 (num_keep_local < apackets)) {
680 MUTEX_ENTER(&rx_freePktQ_lock);
682 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
683 rxi_NeedMorePackets = FALSE;
686 MUTEX_EXIT(&rx_freePktQ_lock);
690 #endif /* RX_ENABLE_TSFPQ */
693 /* Add more packet buffers */
695 rxi_MorePacketsNoLock(int apackets)
697 #ifdef RX_ENABLE_TSFPQ
698 struct rx_ts_info_t * rx_ts_info;
699 #endif /* RX_ENABLE_TSFPQ */
700 struct rx_packet *p, *e;
703 /* allocate enough packets that 1/4 of the packets will be able
704 * to hold maximal amounts of data */
705 apackets += (apackets / 4)
706 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
708 getme = apackets * sizeof(struct rx_packet);
709 p = osi_Alloc(getme);
711 apackets -= apackets / 4;
712 osi_Assert(apackets > 0);
717 #ifdef RX_ENABLE_TSFPQ
718 RX_TS_INFO_GET(rx_ts_info);
719 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
720 #endif /* RX_ENABLE_TSFPQ */
722 for (e = p + apackets; p < e; p++) {
723 RX_PACKET_IOV_INIT(p);
724 #ifdef RX_TRACK_PACKETS
725 p->flags |= RX_PKTFLAG_FREE;
729 opr_queue_Append(&rx_freePacketQueue, &p->entry);
730 #ifdef RXDEBUG_PACKET
731 p->packetId = rx_packet_id++;
732 p->allNextp = rx_mallocedP;
733 #endif /* RXDEBUG_PACKET */
737 rx_nFreePackets += apackets;
738 MUTEX_ENTER(&rx_packets_mutex);
739 rx_nPackets += apackets;
740 #ifdef RX_ENABLE_TSFPQ
741 RX_TS_FPQ_COMPUTE_LIMITS;
742 #endif /* RX_ENABLE_TSFPQ */
743 MUTEX_EXIT(&rx_packets_mutex);
744 rxi_NeedMorePackets = FALSE;
750 rxi_FreeAllPackets(void)
752 /* must be called at proper interrupt level, etcetera */
753 /* MTUXXX need to free all Packets */
754 osi_Free(rx_mallocedP,
755 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
756 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
759 #ifdef RX_ENABLE_TSFPQ
761 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
763 struct rx_ts_info_t * rx_ts_info;
767 RX_TS_INFO_GET(rx_ts_info);
769 if (num_keep_local != rx_ts_info->_FPQ.len) {
771 MUTEX_ENTER(&rx_freePktQ_lock);
772 if (num_keep_local < rx_ts_info->_FPQ.len) {
773 xfer = rx_ts_info->_FPQ.len - num_keep_local;
774 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
777 xfer = num_keep_local - rx_ts_info->_FPQ.len;
778 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
779 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
780 if (rx_nFreePackets < xfer) {
781 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
783 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
785 MUTEX_EXIT(&rx_freePktQ_lock);
791 rxi_FlushLocalPacketsTSFPQ(void)
793 rxi_AdjustLocalPacketsTSFPQ(0, 0);
795 #endif /* RX_ENABLE_TSFPQ */
797 /* Allocate more packets iff we need more continuation buffers */
798 /* In kernel, can't page in memory with interrupts disabled, so we
799 * don't use the event mechanism. */
801 rx_CheckPackets(void)
803 if (rxi_NeedMorePackets) {
804 rxi_MorePackets(rx_maxSendWindow);
808 /* In the packet freeing routine below, the assumption is that
809 we want all of the packets to be used equally frequently, so that we
810 don't get packet buffers paging out. It would be just as valid to
811 assume that we DO want them to page out if not many are being used.
812 In any event, we assume the former, and append the packets to the end
814 /* This explanation is bogus. The free list doesn't remain in any kind of
815 useful order for afs_int32: the packets in use get pretty much randomly scattered
816 across all the pages. In order to permit unused {packets,bufs} to page out, they
817 must be stored so that packets which are adjacent in memory are adjacent in the
818 free list. An array springs rapidly to mind.
821 /* Actually free the packet p. */
822 #ifndef RX_ENABLE_TSFPQ
824 rxi_FreePacketNoLock(struct rx_packet *p)
826 dpf(("Free %"AFS_PTR_FMT"\n", p));
830 opr_queue_Append(&rx_freePacketQueue, &p->entry);
832 #endif /* RX_ENABLE_TSFPQ */
834 #ifdef RX_ENABLE_TSFPQ
836 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
838 struct rx_ts_info_t * rx_ts_info;
839 dpf(("Free %"AFS_PTR_FMT"\n", p));
841 RX_TS_INFO_GET(rx_ts_info);
842 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
844 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
846 MUTEX_ENTER(&rx_freePktQ_lock);
848 RX_TS_FPQ_LTOG(rx_ts_info);
850 /* Wakeup anyone waiting for packets */
853 MUTEX_EXIT(&rx_freePktQ_lock);
857 #endif /* RX_ENABLE_TSFPQ */
860 * free continuation buffers off a packet into a queue
862 * [IN] p -- packet from which continuation buffers will be freed
863 * [IN] first -- iovec offset of first continuation buffer to free
864 * [IN] q -- queue into which continuation buffers will be chained
867 * number of continuation buffers freed
869 #ifndef RX_ENABLE_TSFPQ
871 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct opr_queue * q)
874 struct rx_packet * cb;
877 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
878 iov = &p->wirevec[first];
880 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
881 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
882 RX_FPQ_MARK_FREE(cb);
883 opr_queue_Append(q, &cb->entry);
892 * free packet continuation buffers into the global free packet pool
894 * [IN] p -- packet from which to free continuation buffers
895 * [IN] first -- iovec offset of first continuation buffer to free
901 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
905 for (first = MAX(2, first); first < p->niovecs; first++) {
906 iov = &p->wirevec[first];
908 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
909 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
920 * free packet continuation buffers into the thread-local free pool
922 * [IN] p -- packet from which continuation buffers will be freed
923 * [IN] first -- iovec offset of first continuation buffer to free
924 * any value less than 2, the min number of iovecs,
925 * is treated as if it is 2.
926 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
927 * global free pool before returning
933 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
936 struct rx_ts_info_t * rx_ts_info;
938 RX_TS_INFO_GET(rx_ts_info);
940 for (first = MAX(2, first); first < p->niovecs; first++) {
941 iov = &p->wirevec[first];
943 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
944 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
949 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
951 MUTEX_ENTER(&rx_freePktQ_lock);
953 RX_TS_FPQ_LTOG(rx_ts_info);
955 /* Wakeup anyone waiting for packets */
958 MUTEX_EXIT(&rx_freePktQ_lock);
963 #endif /* RX_ENABLE_TSFPQ */
965 int rxi_nBadIovecs = 0;
967 /* rxi_RestoreDataBufs
969 * Restore the correct sizes to the iovecs. Called when reusing a packet
970 * for reading off the wire.
973 rxi_RestoreDataBufs(struct rx_packet *p)
978 RX_PACKET_IOV_INIT(p);
980 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
981 if (!iov->iov_base) {
986 iov->iov_len = RX_CBUFFERSIZE;
990 #ifdef RX_ENABLE_TSFPQ
992 rxi_TrimDataBufs(struct rx_packet *p, int first)
995 struct iovec *iov, *end;
996 struct rx_ts_info_t * rx_ts_info;
1000 osi_Panic("TrimDataBufs 1: first must be 1");
1002 /* Skip over continuation buffers containing message data */
1003 iov = &p->wirevec[2];
1004 end = iov + (p->niovecs - 2);
1005 length = p->length - p->wirevec[1].iov_len;
1006 for (; iov < end && length > 0; iov++) {
1008 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1009 length -= iov->iov_len;
1012 /* iov now points to the first empty data buffer. */
1016 RX_TS_INFO_GET(rx_ts_info);
1017 for (; iov < end; iov++) {
1019 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1020 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1023 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1025 MUTEX_ENTER(&rx_freePktQ_lock);
1027 RX_TS_FPQ_LTOG(rx_ts_info);
1028 rxi_PacketsUnWait();
1030 MUTEX_EXIT(&rx_freePktQ_lock);
1036 #else /* RX_ENABLE_TSFPQ */
1038 rxi_TrimDataBufs(struct rx_packet *p, int first)
1041 struct iovec *iov, *end;
1045 osi_Panic("TrimDataBufs 1: first must be 1");
1047 /* Skip over continuation buffers containing message data */
1048 iov = &p->wirevec[2];
1049 end = iov + (p->niovecs - 2);
1050 length = p->length - p->wirevec[1].iov_len;
1051 for (; iov < end && length > 0; iov++) {
1053 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1054 length -= iov->iov_len;
1057 /* iov now points to the first empty data buffer. */
1062 MUTEX_ENTER(&rx_freePktQ_lock);
1064 for (; iov < end; iov++) {
1066 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1067 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1070 rxi_PacketsUnWait();
1072 MUTEX_EXIT(&rx_freePktQ_lock);
1077 #endif /* RX_ENABLE_TSFPQ */
1079 /* Free the packet p. P is assumed not to be on any queue, i.e.
1080 * remove it yourself first if you call this routine. */
1081 #ifdef RX_ENABLE_TSFPQ
1083 rxi_FreePacket(struct rx_packet *p)
1085 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1086 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1088 #else /* RX_ENABLE_TSFPQ */
1090 rxi_FreePacket(struct rx_packet *p)
1095 MUTEX_ENTER(&rx_freePktQ_lock);
1097 rxi_FreeDataBufsNoLock(p, 2);
1098 rxi_FreePacketNoLock(p);
1099 /* Wakeup anyone waiting for packets */
1100 rxi_PacketsUnWait();
1102 MUTEX_EXIT(&rx_freePktQ_lock);
1105 #endif /* RX_ENABLE_TSFPQ */
1107 /* rxi_AllocPacket sets up p->length so it reflects the number of
1108 * bytes in the packet at this point, **not including** the header.
1109 * The header is absolutely necessary, besides, this is the way the
1110 * length field is usually used */
1111 #ifdef RX_ENABLE_TSFPQ
1112 static struct rx_packet *
1113 rxi_AllocPacketNoLock(int class)
1115 struct rx_packet *p;
1116 struct rx_ts_info_t * rx_ts_info;
1118 RX_TS_INFO_GET(rx_ts_info);
1120 if (rx_stats_active)
1121 rx_atomic_inc(&rx_stats.packetRequests);
1122 if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1125 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1126 osi_Panic("rxi_AllocPacket error");
1128 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1129 rxi_MorePacketsNoLock(rx_maxSendWindow);
1133 RX_TS_FPQ_GTOL(rx_ts_info);
1136 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1138 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1141 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1142 * order to truncate outbound packets. In the near future, may need
1143 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1145 RX_PACKET_IOV_FULLINIT(p);
1148 #else /* RX_ENABLE_TSFPQ */
1149 static struct rx_packet *
1150 rxi_AllocPacketNoLock(int class)
1152 struct rx_packet *p;
1155 if (rxi_OverQuota(class)) {
1156 rxi_NeedMorePackets = TRUE;
1157 if (rx_stats_active) {
1159 case RX_PACKET_CLASS_RECEIVE:
1160 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1162 case RX_PACKET_CLASS_SEND:
1163 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1165 case RX_PACKET_CLASS_SPECIAL:
1166 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1168 case RX_PACKET_CLASS_RECV_CBUF:
1169 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1171 case RX_PACKET_CLASS_SEND_CBUF:
1172 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1176 return (struct rx_packet *)0;
1180 if (rx_stats_active)
1181 rx_atomic_inc(&rx_stats.packetRequests);
1184 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1185 osi_Panic("rxi_AllocPacket error");
1187 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1188 rxi_MorePacketsNoLock(rx_maxSendWindow);
1192 p = opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
1193 opr_queue_Remove(&p->entry);
1194 RX_FPQ_MARK_USED(p);
1196 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1199 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1200 * order to truncate outbound packets. In the near future, may need
1201 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1203 RX_PACKET_IOV_FULLINIT(p);
1206 #endif /* RX_ENABLE_TSFPQ */
1208 #ifdef RX_ENABLE_TSFPQ
1209 static struct rx_packet *
1210 rxi_AllocPacketTSFPQ(int class, int pull_global)
1212 struct rx_packet *p;
1213 struct rx_ts_info_t * rx_ts_info;
1215 RX_TS_INFO_GET(rx_ts_info);
1217 if (rx_stats_active)
1218 rx_atomic_inc(&rx_stats.packetRequests);
1219 if (pull_global && opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1220 MUTEX_ENTER(&rx_freePktQ_lock);
1222 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1223 rxi_MorePacketsNoLock(rx_maxSendWindow);
1225 RX_TS_FPQ_GTOL(rx_ts_info);
1227 MUTEX_EXIT(&rx_freePktQ_lock);
1228 } else if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1232 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1234 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1236 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1237 * order to truncate outbound packets. In the near future, may need
1238 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1240 RX_PACKET_IOV_FULLINIT(p);
1243 #endif /* RX_ENABLE_TSFPQ */
1245 #ifdef RX_ENABLE_TSFPQ
1247 rxi_AllocPacket(int class)
1249 struct rx_packet *p;
1251 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1254 #else /* RX_ENABLE_TSFPQ */
1256 rxi_AllocPacket(int class)
1258 struct rx_packet *p;
1260 MUTEX_ENTER(&rx_freePktQ_lock);
1261 p = rxi_AllocPacketNoLock(class);
1262 MUTEX_EXIT(&rx_freePktQ_lock);
1265 #endif /* RX_ENABLE_TSFPQ */
1267 /* This guy comes up with as many buffers as it {takes,can get} given
1268 * the MTU for this call. It also sets the packet length before
1269 * returning. caution: this is often called at NETPRI
1270 * Called with call locked.
1273 rxi_AllocSendPacket(struct rx_call *call, int want)
1275 struct rx_packet *p = (struct rx_packet *)0;
1280 mud = call->MTU - RX_HEADER_SIZE;
1282 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1283 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1285 #ifdef RX_ENABLE_TSFPQ
1286 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1288 want = MIN(want, mud);
1290 if ((unsigned)want > p->length)
1291 (void)rxi_AllocDataBuf(p, (want - p->length),
1292 RX_PACKET_CLASS_SEND_CBUF);
1294 if (p->length > mud)
1297 if (delta >= p->length) {
1305 #endif /* RX_ENABLE_TSFPQ */
1307 while (!(call->error)) {
1308 MUTEX_ENTER(&rx_freePktQ_lock);
1309 /* if an error occurred, or we get the packet we want, we're done */
1310 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1311 MUTEX_EXIT(&rx_freePktQ_lock);
1314 want = MIN(want, mud);
1316 if ((unsigned)want > p->length)
1317 (void)rxi_AllocDataBuf(p, (want - p->length),
1318 RX_PACKET_CLASS_SEND_CBUF);
1320 if (p->length > mud)
1323 if (delta >= p->length) {
1332 /* no error occurred, and we didn't get a packet, so we sleep.
1333 * At this point, we assume that packets will be returned
1334 * sooner or later, as packets are acknowledged, and so we
1337 call->flags |= RX_CALL_WAIT_PACKETS;
1338 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1339 MUTEX_EXIT(&call->lock);
1340 rx_waitingForPackets = 1;
1342 #ifdef RX_ENABLE_LOCKS
1343 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1345 osi_rxSleep(&rx_waitingForPackets);
1347 MUTEX_EXIT(&rx_freePktQ_lock);
1348 MUTEX_ENTER(&call->lock);
1349 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1350 call->flags &= ~RX_CALL_WAIT_PACKETS;
1359 /* Windows does not use file descriptors. */
1360 #define CountFDs(amax) 0
1362 /* count the number of used FDs */
1371 for (i = 0; i < amax; i++) {
1372 code = fstat(i, &tstat);
1378 #endif /* AFS_NT40_ENV */
1381 #define CountFDs(amax) amax
1385 #if !defined(KERNEL) || defined(UKERNEL)
1387 /* This function reads a single packet from the interface into the
1388 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1389 * (host,port) of the sender are stored in the supplied variables, and
1390 * the data length of the packet is stored in the packet structure.
1391 * The header is decoded. */
1393 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1396 struct sockaddr_in from;
1399 afs_uint32 tlen, savelen;
1401 rx_computelen(p, tlen);
1402 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1404 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1405 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1406 * it once in order to avoid races. */
1409 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1417 /* Extend the last iovec for padding, it's just to make sure that the
1418 * read doesn't return more data than we expect, and is done to get around
1419 * our problems caused by the lack of a length field in the rx header.
1420 * Use the extra buffer that follows the localdata in each packet
1422 savelen = p->wirevec[p->niovecs - 1].iov_len;
1423 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1425 memset(&msg, 0, sizeof(msg));
1426 msg.msg_name = (char *)&from;
1427 msg.msg_namelen = sizeof(struct sockaddr_in);
1428 msg.msg_iov = p->wirevec;
1429 msg.msg_iovlen = p->niovecs;
1430 nbytes = rxi_Recvmsg(socket, &msg, 0);
1432 /* restore the vec to its correct state */
1433 p->wirevec[p->niovecs - 1].iov_len = savelen;
1435 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1436 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1437 if (nbytes < 0 && errno == EWOULDBLOCK) {
1438 if (rx_stats_active)
1439 rx_atomic_inc(&rx_stats.noPacketOnRead);
1440 } else if (nbytes <= 0) {
1441 if (rx_stats_active) {
1442 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1443 rx_stats.bogusHost = from.sin_addr.s_addr;
1445 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1446 ntohs(from.sin_port), nbytes));
1451 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1452 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1453 rxi_DecodePacketHeader(p);
1455 *host = from.sin_addr.s_addr;
1456 *port = from.sin_port;
1458 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1459 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1460 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1462 #ifdef RX_TRIMDATABUFS
1463 rxi_TrimDataBufs(p, 1);
1469 /* Extract packet header. */
1470 rxi_DecodePacketHeader(p);
1472 *host = from.sin_addr.s_addr;
1473 *port = from.sin_port;
1475 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1477 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1480 #ifdef RX_TRIMDATABUFS
1481 /* Free any empty packet buffers at the end of this packet */
1482 rxi_TrimDataBufs(p, 1);
1488 #endif /* !KERNEL || UKERNEL */
1490 /* This function splits off the first packet in a jumbo packet.
1491 * As of AFS 3.5, jumbograms contain more than one fixed size
1492 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1493 * last packet header. All packets (except the last) are padded to
1494 * fall on RX_CBUFFERSIZE boundaries.
1495 * HACK: We store the length of the first n-1 packets in the
1496 * last two pad bytes. */
1499 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1502 struct rx_packet *np;
1503 struct rx_jumboHeader *jp;
1509 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1510 * bytes in length. All but the first packet are preceded by
1511 * an abbreviated four byte header. The length of the last packet
1512 * is calculated from the size of the jumbogram. */
1513 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1515 if ((int)p->length < length) {
1516 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1519 niov = p->niovecs - 2;
1521 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1524 iov = &p->wirevec[2];
1525 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1527 /* Get a pointer to the abbreviated packet header */
1528 jp = (struct rx_jumboHeader *)
1529 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1531 /* Set up the iovecs for the next packet */
1532 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1533 np->wirevec[0].iov_len = sizeof(struct rx_header);
1534 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1535 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1536 np->niovecs = niov + 1;
1537 for (i = 2, iov++; i <= niov; i++, iov++) {
1538 np->wirevec[i] = *iov;
1540 np->length = p->length - length;
1541 p->length = RX_JUMBOBUFFERSIZE;
1544 /* Convert the jumbo packet header to host byte order */
1545 temp = ntohl(*(afs_uint32 *) jp);
1546 jp->flags = (u_char) (temp >> 24);
1547 jp->cksum = (u_short) (temp);
1549 /* Fill in the packet header */
1550 np->header = p->header;
1551 np->header.serial = p->header.serial + 1;
1552 np->header.seq = p->header.seq + 1;
1553 np->header.flags = jp->flags;
1554 np->header.spare = jp->cksum;
1560 /* Send a udp datagram */
1562 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1563 int length, int istack)
1568 memset(&msg, 0, sizeof(msg));
1570 msg.msg_iovlen = nvecs;
1571 msg.msg_name = addr;
1572 msg.msg_namelen = sizeof(struct sockaddr_in);
1574 ret = rxi_Sendmsg(socket, &msg, 0);
1578 #elif !defined(UKERNEL)
1580 * message receipt is done in rxk_input or rx_put.
1583 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1585 * Copy an mblock to the contiguous area pointed to by cp.
1586 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1587 * but it doesn't really.
1588 * Returns the number of bytes not transferred.
1589 * The message is NOT changed.
1592 cpytoc(mblk_t * mp, int off, int len, char *cp)
1596 for (; mp && len > 0; mp = mp->b_cont) {
1597 if (mp->b_datap->db_type != M_DATA) {
1600 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1601 memcpy(cp, (char *)mp->b_rptr, n);
1609 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1610 * but it doesn't really.
1611 * This sucks, anyway, do it like m_cpy.... below
1614 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1619 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1620 if (mp->b_datap->db_type != M_DATA) {
1623 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1629 t = iovs[i].iov_len;
1632 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1642 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1643 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1645 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1647 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1650 unsigned int l1, l2, i, t;
1652 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1653 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1656 if (m->m_len <= off) {
1666 p1 = mtod(m, caddr_t) + off;
1667 l1 = m->m_len - off;
1669 p2 = iovs[0].iov_base;
1670 l2 = iovs[0].iov_len;
1673 t = MIN(l1, MIN(l2, (unsigned int)len));
1684 p1 = mtod(m, caddr_t);
1690 p2 = iovs[i].iov_base;
1691 l2 = iovs[i].iov_len;
1699 #endif /* AFS_SUN5_ENV */
1701 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1702 #if defined(AFS_NBSD_ENV)
1704 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1707 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1708 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1714 struct rx_packet *phandle;
1715 int hdr_len, data_len;
1716 #endif /* AFS_NBSD_ENV */
1721 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1728 #endif /*KERNEL && !UKERNEL */
1731 /* send a response to a debug packet */
1734 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1735 afs_uint32 ahost, short aport, int istack)
1737 struct rx_debugIn tin;
1741 * Only respond to client-initiated Rx debug packets,
1742 * and clear the client flag in the response.
1744 if (ap->header.flags & RX_CLIENT_INITIATED) {
1745 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1746 rxi_EncodePacketHeader(ap);
1751 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1752 /* all done with packet, now set length to the truth, so we can
1753 * reuse this packet */
1754 rx_computelen(ap, ap->length);
1756 tin.type = ntohl(tin.type);
1757 tin.index = ntohl(tin.index);
1759 case RX_DEBUGI_GETSTATS:{
1760 struct rx_debugStats tstat;
1762 /* get basic stats */
1763 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1764 tstat.version = RX_DEBUGI_VERSION;
1765 #ifndef RX_ENABLE_LOCKS
1766 tstat.waitingForPackets = rx_waitingForPackets;
1768 MUTEX_ENTER(&rx_serverPool_lock);
1769 tstat.nFreePackets = htonl(rx_nFreePackets);
1770 tstat.nPackets = htonl(rx_nPackets);
1771 tstat.callsExecuted = htonl(rxi_nCalls);
1772 tstat.packetReclaims = htonl(rx_packetReclaims);
1773 tstat.usedFDs = CountFDs(64);
1774 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1775 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1776 tstat.idleThreads = opr_queue_Count(&rx_idleServerQueue);
1777 MUTEX_EXIT(&rx_serverPool_lock);
1778 tstat.idleThreads = htonl(tstat.idleThreads);
1779 tl = sizeof(struct rx_debugStats) - ap->length;
1781 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1784 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1786 ap->length = sizeof(struct rx_debugStats);
1787 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1788 rx_computelen(ap, ap->length);
1793 case RX_DEBUGI_GETALLCONN:
1794 case RX_DEBUGI_GETCONN:{
1796 struct rx_connection *tc;
1797 struct rx_call *tcall;
1798 struct rx_debugConn tconn;
1799 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1802 tl = sizeof(struct rx_debugConn) - ap->length;
1804 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1808 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1809 /* get N'th (maybe) "interesting" connection info */
1810 for (i = 0; i < rx_hashTableSize; i++) {
1811 #if !defined(KERNEL)
1812 /* the time complexity of the algorithm used here
1813 * exponentially increses with the number of connections.
1815 #ifdef AFS_PTHREAD_ENV
1821 MUTEX_ENTER(&rx_connHashTable_lock);
1822 /* We might be slightly out of step since we are not
1823 * locking each call, but this is only debugging output.
1825 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1826 if ((all || rxi_IsConnInteresting(tc))
1827 && tin.index-- <= 0) {
1828 tconn.host = tc->peer->host;
1829 tconn.port = tc->peer->port;
1830 tconn.cid = htonl(tc->cid);
1831 tconn.epoch = htonl(tc->epoch);
1832 tconn.serial = htonl(tc->serial);
1833 for (j = 0; j < RX_MAXCALLS; j++) {
1834 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1835 if ((tcall = tc->call[j])) {
1836 tconn.callState[j] = tcall->state;
1837 tconn.callMode[j] = tcall->app.mode;
1838 tconn.callFlags[j] = tcall->flags;
1839 if (!opr_queue_IsEmpty(&tcall->rq))
1840 tconn.callOther[j] |= RX_OTHER_IN;
1841 if (!opr_queue_IsEmpty(&tcall->tq))
1842 tconn.callOther[j] |= RX_OTHER_OUT;
1844 tconn.callState[j] = RX_STATE_NOTINIT;
1847 tconn.natMTU = htonl(tc->peer->natMTU);
1848 tconn.error = htonl(tc->error);
1849 tconn.flags = tc->flags;
1850 tconn.type = tc->type;
1851 tconn.securityIndex = tc->securityIndex;
1852 if (tc->securityObject) {
1853 RXS_GetStats(tc->securityObject, tc,
1855 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1856 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1859 DOHTONL(packetsReceived);
1860 DOHTONL(packetsSent);
1861 DOHTONL(bytesReceived);
1865 sizeof(tconn.secStats.spares) /
1870 sizeof(tconn.secStats.sparel) /
1871 sizeof(afs_int32); i++)
1875 MUTEX_EXIT(&rx_connHashTable_lock);
1876 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1879 ap->length = sizeof(struct rx_debugConn);
1880 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1886 MUTEX_EXIT(&rx_connHashTable_lock);
1888 /* if we make it here, there are no interesting packets */
1889 tconn.cid = htonl(0xffffffff); /* means end */
1890 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1893 ap->length = sizeof(struct rx_debugConn);
1894 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1900 * Pass back all the peer structures we have available
1903 case RX_DEBUGI_GETPEER:{
1906 struct rx_debugPeer tpeer;
1909 tl = sizeof(struct rx_debugPeer) - ap->length;
1911 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1915 memset(&tpeer, 0, sizeof(tpeer));
1916 for (i = 0; i < rx_hashTableSize; i++) {
1917 #if !defined(KERNEL)
1918 /* the time complexity of the algorithm used here
1919 * exponentially increses with the number of peers.
1921 * Yielding after processing each hash table entry
1922 * and dropping rx_peerHashTable_lock.
1923 * also increases the risk that we will miss a new
1924 * entry - but we are willing to live with this
1925 * limitation since this is meant for debugging only
1927 #ifdef AFS_PTHREAD_ENV
1933 MUTEX_ENTER(&rx_peerHashTable_lock);
1934 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1935 if (tin.index-- <= 0) {
1937 MUTEX_EXIT(&rx_peerHashTable_lock);
1939 MUTEX_ENTER(&tp->peer_lock);
1940 tpeer.host = tp->host;
1941 tpeer.port = tp->port;
1942 tpeer.ifMTU = htons(tp->ifMTU);
1943 tpeer.idleWhen = htonl(tp->idleWhen);
1944 tpeer.refCount = htons(tp->refCount);
1945 tpeer.burstSize = 0;
1947 tpeer.burstWait.sec = 0;
1948 tpeer.burstWait.usec = 0;
1949 tpeer.rtt = htonl(tp->rtt);
1950 tpeer.rtt_dev = htonl(tp->rtt_dev);
1951 tpeer.nSent = htonl(tp->nSent);
1952 tpeer.reSends = htonl(tp->reSends);
1953 tpeer.natMTU = htons(tp->natMTU);
1954 tpeer.maxMTU = htons(tp->maxMTU);
1955 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1956 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1957 tpeer.MTU = htons(tp->MTU);
1958 tpeer.cwind = htons(tp->cwind);
1959 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1960 tpeer.congestSeq = htons(tp->congestSeq);
1961 tpeer.bytesSent.high =
1962 htonl(tp->bytesSent >> 32);
1963 tpeer.bytesSent.low =
1964 htonl(tp->bytesSent & MAX_AFS_UINT32);
1965 tpeer.bytesReceived.high =
1966 htonl(tp->bytesReceived >> 32);
1967 tpeer.bytesReceived.low =
1968 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1969 MUTEX_EXIT(&tp->peer_lock);
1971 MUTEX_ENTER(&rx_peerHashTable_lock);
1973 MUTEX_EXIT(&rx_peerHashTable_lock);
1975 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1978 ap->length = sizeof(struct rx_debugPeer);
1979 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1985 MUTEX_EXIT(&rx_peerHashTable_lock);
1987 /* if we make it here, there are no interesting packets */
1988 tpeer.host = htonl(0xffffffff); /* means end */
1989 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1992 ap->length = sizeof(struct rx_debugPeer);
1993 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1998 case RX_DEBUGI_RXSTATS:{
2002 tl = sizeof(rx_stats) - ap->length;
2004 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2008 /* Since its all int32s convert to network order with a loop. */
2009 if (rx_stats_active)
2010 MUTEX_ENTER(&rx_stats_mutex);
2011 s = (afs_int32 *) & rx_stats;
2012 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2013 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2016 ap->length = sizeof(rx_stats);
2017 if (rx_stats_active)
2018 MUTEX_EXIT(&rx_stats_mutex);
2019 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2025 /* error response packet */
2026 tin.type = htonl(RX_DEBUGI_BADTYPE);
2027 tin.index = tin.type;
2028 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2030 ap->length = sizeof(struct rx_debugIn);
2031 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2039 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2040 afs_uint32 ahost, short aport, int istack)
2045 * Only respond to client-initiated version requests, and
2046 * clear that flag in the response.
2048 if (ap->header.flags & RX_CLIENT_INITIATED) {
2051 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2052 rxi_EncodePacketHeader(ap);
2053 memset(buf, 0, sizeof(buf));
2054 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2055 rx_packetwrite(ap, 0, 65, buf);
2058 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2066 /* send a debug packet back to the sender */
2068 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2069 afs_uint32 ahost, short aport, afs_int32 istack)
2071 struct sockaddr_in taddr;
2072 unsigned int i, nbytes, savelen = 0;
2075 int waslocked = ISAFS_GLOCK();
2078 taddr.sin_family = AF_INET;
2079 taddr.sin_port = aport;
2080 taddr.sin_addr.s_addr = ahost;
2081 memset(&taddr.sin_zero, 0, sizeof(taddr.sin_zero));
2082 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2083 taddr.sin_len = sizeof(struct sockaddr_in);
2086 /* We need to trim the niovecs. */
2087 nbytes = apacket->length;
2088 for (i = 1; i < apacket->niovecs; i++) {
2089 if (nbytes <= apacket->wirevec[i].iov_len) {
2090 savelen = apacket->wirevec[i].iov_len;
2091 saven = apacket->niovecs;
2092 apacket->wirevec[i].iov_len = nbytes;
2093 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2095 nbytes -= apacket->wirevec[i].iov_len;
2098 #ifdef RX_KERNEL_TRACE
2099 if (ICL_SETACTIVE(afs_iclSetp)) {
2102 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2103 "before osi_NetSend()");
2111 /* debug packets are not reliably delivered, hence the cast below. */
2112 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2113 apacket->length + RX_HEADER_SIZE, istack);
2115 #ifdef RX_KERNEL_TRACE
2116 if (ICL_SETACTIVE(afs_iclSetp)) {
2118 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2119 "after osi_NetSend()");
2128 if (saven) { /* means we truncated the packet above. */
2129 apacket->wirevec[i - 1].iov_len = savelen;
2130 apacket->niovecs = saven;
2136 rxi_NetSendError(struct rx_call *call, int code)
2140 if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2143 if (code == -WSAEHOSTUNREACH) {
2146 #elif defined(AFS_LINUX20_ENV)
2147 if (code == -ENETUNREACH) {
2150 #elif defined(AFS_DARWIN_ENV)
2151 if (code == EHOSTUNREACH) {
2156 call->lastReceiveTime = 0;
2160 /* Send the packet to appropriate destination for the specified
2161 * call. The header is first encoded and placed in the packet.
2164 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2165 struct rx_packet *p, int istack)
2171 struct sockaddr_in addr;
2172 struct rx_peer *peer = conn->peer;
2175 char deliveryType = 'S';
2177 /* The address we're sending the packet to */
2178 memset(&addr, 0, sizeof(addr));
2179 addr.sin_family = AF_INET;
2180 addr.sin_port = peer->port;
2181 addr.sin_addr.s_addr = peer->host;
2182 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2184 /* This stuff should be revamped, I think, so that most, if not
2185 * all, of the header stuff is always added here. We could
2186 * probably do away with the encode/decode routines. XXXXX */
2188 /* Stamp each packet with a unique serial number. The serial
2189 * number is maintained on a connection basis because some types
2190 * of security may be based on the serial number of the packet,
2191 * and security is handled on a per authenticated-connection
2193 /* Pre-increment, to guarantee no zero serial number; a zero
2194 * serial number means the packet was never sent. */
2195 MUTEX_ENTER(&conn->conn_data_lock);
2196 p->header.serial = ++conn->serial;
2197 if (p->length > conn->peer->maxPacketSize) {
2198 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2199 (p->header.flags & RX_REQUEST_ACK)) {
2200 conn->lastPingSize = p->length;
2201 conn->lastPingSizeSer = p->header.serial;
2202 } else if (p->header.seq != 0) {
2203 conn->lastPacketSize = p->length;
2204 conn->lastPacketSizeSeq = p->header.seq;
2207 MUTEX_EXIT(&conn->conn_data_lock);
2208 /* This is so we can adjust retransmit time-outs better in the face of
2209 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2211 if (p->firstSerial == 0) {
2212 p->firstSerial = p->header.serial;
2215 /* If an output tracer function is defined, call it with the packet and
2216 * network address. Note this function may modify its arguments. */
2217 if (rx_almostSent) {
2218 int drop = (*rx_almostSent) (p, &addr);
2219 /* drop packet if return value is non-zero? */
2221 deliveryType = 'D'; /* Drop the packet */
2225 /* Get network byte order header */
2226 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2227 * touch ALL the fields */
2229 /* Send the packet out on the same socket that related packets are being
2233 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2236 /* Possibly drop this packet, for testing purposes */
2237 if ((deliveryType == 'D')
2238 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2239 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2240 deliveryType = 'D'; /* Drop the packet */
2242 deliveryType = 'S'; /* Send the packet */
2243 #endif /* RXDEBUG */
2245 /* Loop until the packet is sent. We'd prefer just to use a
2246 * blocking socket, but unfortunately the interface doesn't
2247 * allow us to have the socket block in send mode, and not
2248 * block in receive mode */
2250 waslocked = ISAFS_GLOCK();
2251 #ifdef RX_KERNEL_TRACE
2252 if (ICL_SETACTIVE(afs_iclSetp)) {
2255 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2256 "before osi_NetSend()");
2265 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2266 p->length + RX_HEADER_SIZE, istack)) != 0) {
2267 /* send failed, so let's hurry up the resend, eh? */
2268 if (rx_stats_active)
2269 rx_atomic_inc(&rx_stats.netSendFailures);
2270 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2272 /* Some systems are nice and tell us right away that we cannot
2273 * reach this recipient by returning an error code.
2274 * So, when this happens let's "down" the host NOW so
2275 * we don't sit around waiting for this host to timeout later.
2278 rxi_NetSendError(call, code);
2282 #ifdef RX_KERNEL_TRACE
2283 if (ICL_SETACTIVE(afs_iclSetp)) {
2285 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2286 "after osi_NetSend()");
2297 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2298 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2299 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2300 p->header.seq, p->header.flags, p, p->length));
2302 if (rx_stats_active) {
2303 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2304 MUTEX_ENTER(&peer->peer_lock);
2305 peer->bytesSent += p->length;
2306 MUTEX_EXIT(&peer->peer_lock);
2310 /* Send a list of packets to appropriate destination for the specified
2311 * connection. The headers are first encoded and placed in the packets.
2314 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2315 struct rx_packet **list, int len, int istack)
2317 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2320 struct sockaddr_in addr;
2321 struct rx_peer *peer = conn->peer;
2323 struct rx_packet *p = NULL;
2324 struct iovec wirevec[RX_MAXIOVECS];
2325 int i, length, code;
2328 struct rx_jumboHeader *jp;
2330 char deliveryType = 'S';
2332 /* The address we're sending the packet to */
2333 addr.sin_family = AF_INET;
2334 addr.sin_port = peer->port;
2335 addr.sin_addr.s_addr = peer->host;
2336 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2338 if (len + 1 > RX_MAXIOVECS) {
2339 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2343 * Stamp the packets in this jumbogram with consecutive serial numbers
2345 MUTEX_ENTER(&conn->conn_data_lock);
2346 serial = conn->serial;
2347 conn->serial += len;
2348 for (i = 0; i < len; i++) {
2350 /* a ping *or* a sequenced packet can count */
2351 if (p->length > conn->peer->maxPacketSize) {
2352 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2353 (p->header.flags & RX_REQUEST_ACK)) &&
2354 ((i == 0) || (p->length >= conn->lastPingSize))) {
2355 conn->lastPingSize = p->length;
2356 conn->lastPingSizeSer = serial + i;
2357 } else if ((p->header.seq != 0) &&
2358 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2359 conn->lastPacketSize = p->length;
2360 conn->lastPacketSizeSeq = p->header.seq;
2364 MUTEX_EXIT(&conn->conn_data_lock);
2367 /* This stuff should be revamped, I think, so that most, if not
2368 * all, of the header stuff is always added here. We could
2369 * probably do away with the encode/decode routines. XXXXX */
2372 length = RX_HEADER_SIZE;
2373 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2374 wirevec[0].iov_len = RX_HEADER_SIZE;
2375 for (i = 0; i < len; i++) {
2378 /* The whole 3.5 jumbogram scheme relies on packets fitting
2379 * in a single packet buffer. */
2380 if (p->niovecs > 2) {
2381 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2384 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2387 if (p->length != RX_JUMBOBUFFERSIZE) {
2388 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2390 p->header.flags |= RX_JUMBO_PACKET;
2391 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2392 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2394 wirevec[i + 1].iov_len = p->length;
2395 length += p->length;
2397 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2399 /* Convert jumbo packet header to network byte order */
2400 temp = (afs_uint32) (p->header.flags) << 24;
2401 temp |= (afs_uint32) (p->header.spare);
2402 *(afs_uint32 *) jp = htonl(temp);
2404 jp = (struct rx_jumboHeader *)
2405 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2407 /* Stamp each packet with a unique serial number. The serial
2408 * number is maintained on a connection basis because some types
2409 * of security may be based on the serial number of the packet,
2410 * and security is handled on a per authenticated-connection
2412 /* Pre-increment, to guarantee no zero serial number; a zero
2413 * serial number means the packet was never sent. */
2414 p->header.serial = ++serial;
2415 /* This is so we can adjust retransmit time-outs better in the face of
2416 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2418 if (p->firstSerial == 0) {
2419 p->firstSerial = p->header.serial;
2422 /* If an output tracer function is defined, call it with the packet and
2423 * network address. Note this function may modify its arguments. */
2424 if (rx_almostSent) {
2425 int drop = (*rx_almostSent) (p, &addr);
2426 /* drop packet if return value is non-zero? */
2428 deliveryType = 'D'; /* Drop the packet */
2432 /* Get network byte order header */
2433 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2434 * touch ALL the fields */
2437 /* Send the packet out on the same socket that related packets are being
2441 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2444 /* Possibly drop this packet, for testing purposes */
2445 if ((deliveryType == 'D')
2446 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2447 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2448 deliveryType = 'D'; /* Drop the packet */
2450 deliveryType = 'S'; /* Send the packet */
2451 #endif /* RXDEBUG */
2453 /* Loop until the packet is sent. We'd prefer just to use a
2454 * blocking socket, but unfortunately the interface doesn't
2455 * allow us to have the socket block in send mode, and not
2456 * block in receive mode */
2457 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2458 waslocked = ISAFS_GLOCK();
2459 if (!istack && waslocked)
2463 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2465 /* send failed, so let's hurry up the resend, eh? */
2466 if (rx_stats_active)
2467 rx_atomic_inc(&rx_stats.netSendFailures);
2468 for (i = 0; i < len; i++) {
2470 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2472 /* Some systems are nice and tell us right away that we cannot
2473 * reach this recipient by returning an error code.
2474 * So, when this happens let's "down" the host NOW so
2475 * we don't sit around waiting for this host to timeout later.
2478 rxi_NetSendError(call, code);
2481 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2482 if (!istack && waslocked)
2488 osi_Assert(p != NULL);
2490 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2491 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2492 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2493 p->header.seq, p->header.flags, p, p->length));
2496 if (rx_stats_active) {
2497 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2498 MUTEX_ENTER(&peer->peer_lock);
2499 peer->bytesSent += p->length;
2500 MUTEX_EXIT(&peer->peer_lock);
2504 /* Send a raw abort packet, without any call or connection structures */
2506 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2507 afs_int32 error, struct rx_packet *source, int istack)
2509 struct rx_header theader;
2510 struct sockaddr_in addr;
2511 struct iovec iov[2];
2513 memset(&theader, 0, sizeof(theader));
2514 theader.epoch = htonl(source->header.epoch);
2515 theader.callNumber = htonl(source->header.callNumber);
2516 theader.serial = htonl(1);
2517 theader.type = RX_PACKET_TYPE_ABORT;
2518 theader.serviceId = htons(source->header.serviceId);
2519 theader.securityIndex = source->header.securityIndex;
2520 theader.cid = htonl(source->header.cid);
2523 * If the abort is being sent in response to a server initiated packet,
2524 * set client_initiated in the abort to ensure it is not associated by
2525 * the receiver with a connection in the opposite direction.
2527 if ((source->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2528 theader.flags |= RX_CLIENT_INITIATED;
2530 error = htonl(error);
2532 iov[0].iov_base = &theader;
2533 iov[0].iov_len = sizeof(struct rx_header);
2534 iov[1].iov_base = &error;
2535 iov[1].iov_len = sizeof(error);
2537 addr.sin_family = AF_INET;
2538 addr.sin_addr.s_addr = host;
2539 addr.sin_port = port;
2540 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2541 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2542 addr.sin_len = sizeof(struct sockaddr_in);
2545 osi_NetSend(socket, &addr, iov, 2,
2546 sizeof(struct rx_header) + sizeof(error), istack);
2549 /* Send a "special" packet to the peer connection. If call is
2550 * specified, then the packet is directed to a specific call channel
2551 * associated with the connection, otherwise it is directed to the
2552 * connection only. Uses optionalPacket if it is supplied, rather than
2553 * allocating a new packet buffer. Nbytes is the length of the data
2554 * portion of the packet. If data is non-null, nbytes of data are
2555 * copied into the packet. Type is the type of the packet, as defined
2556 * in rx.h. Bug: there's a lot of duplication between this and other
2557 * routines. This needs to be cleaned up. */
2559 rxi_SendSpecial(struct rx_call *call,
2560 struct rx_connection *conn,
2561 struct rx_packet *optionalPacket, int type, char *data,
2562 int nbytes, int istack)
2564 /* Some of the following stuff should be common code for all
2565 * packet sends (it's repeated elsewhere) */
2566 struct rx_packet *p;
2568 int savelen = 0, saven = 0;
2569 int channel, callNumber;
2571 channel = call->channel;
2572 callNumber = *call->callNumber;
2573 /* BUSY packets refer to the next call on this connection */
2574 if (type == RX_PACKET_TYPE_BUSY) {
2583 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2585 osi_Panic("rxi_SendSpecial failure");
2592 p->header.serviceId = conn->serviceId;
2593 p->header.securityIndex = conn->securityIndex;
2594 p->header.cid = (conn->cid | channel);
2595 p->header.callNumber = callNumber;
2597 p->header.epoch = conn->epoch;
2598 p->header.type = type;
2599 p->header.flags = 0;
2600 if (conn->type == RX_CLIENT_CONNECTION)
2601 p->header.flags |= RX_CLIENT_INITIATED;
2603 rx_packetwrite(p, 0, nbytes, data);
2605 for (i = 1; i < p->niovecs; i++) {
2606 if (nbytes <= p->wirevec[i].iov_len) {
2607 savelen = p->wirevec[i].iov_len;
2609 p->wirevec[i].iov_len = nbytes;
2610 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2612 nbytes -= p->wirevec[i].iov_len;
2616 rxi_Send(call, p, istack);
2618 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2619 if (saven) { /* means we truncated the packet above. We probably don't */
2620 /* really need to do this, but it seems safer this way, given that */
2621 /* sneaky optionalPacket... */
2622 p->wirevec[i - 1].iov_len = savelen;
2625 if (!optionalPacket)
2627 return optionalPacket;
2631 /* Encode the packet's header (from the struct header in the packet to
2632 * the net byte order representation in the wire representation of the
2633 * packet, which is what is actually sent out on the wire) */
2635 rxi_EncodePacketHeader(struct rx_packet *p)
2637 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2639 memset(buf, 0, RX_HEADER_SIZE);
2640 *buf++ = htonl(p->header.epoch);
2641 *buf++ = htonl(p->header.cid);
2642 *buf++ = htonl(p->header.callNumber);
2643 *buf++ = htonl(p->header.seq);
2644 *buf++ = htonl(p->header.serial);
2645 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2646 | (((afs_uint32) p->header.flags) << 16)
2647 | (p->header.userStatus << 8) | p->header.securityIndex);
2648 /* Note: top 16 bits of this next word were reserved */
2649 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2652 /* Decode the packet's header (from net byte order to a struct header) */
2654 rxi_DecodePacketHeader(struct rx_packet *p)
2656 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2659 p->header.epoch = ntohl(*buf);
2661 p->header.cid = ntohl(*buf);
2663 p->header.callNumber = ntohl(*buf);
2665 p->header.seq = ntohl(*buf);
2667 p->header.serial = ntohl(*buf);
2673 /* C will truncate byte fields to bytes for me */
2674 p->header.type = temp >> 24;
2675 p->header.flags = temp >> 16;
2676 p->header.userStatus = temp >> 8;
2677 p->header.securityIndex = temp >> 0;
2682 p->header.serviceId = (temp & 0xffff);
2683 p->header.spare = temp >> 16;
2684 /* Note: top 16 bits of this last word are the security checksum */
2688 * LOCKS HELD: called with call->lock held.
2690 * PrepareSendPacket is the only place in the code that
2691 * can increment call->tnext. This could become an atomic
2692 * in the future. Beyond that there is nothing in this
2693 * function that requires the call being locked. This
2694 * function can only be called by the application thread.
2697 rxi_PrepareSendPacket(struct rx_call *call,
2698 struct rx_packet *p, int last)
2700 struct rx_connection *conn = call->conn;
2701 afs_uint32 seq = call->tnext++;
2703 afs_int32 len; /* len must be a signed type; it can go negative */
2706 /* No data packets on call 0. Where do these come from? */
2707 if (*call->callNumber == 0)
2708 *call->callNumber = 1;
2710 MUTEX_EXIT(&call->lock);
2711 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2713 p->header.cid = (conn->cid | call->channel);
2714 p->header.serviceId = conn->serviceId;
2715 p->header.securityIndex = conn->securityIndex;
2717 p->header.callNumber = *call->callNumber;
2718 p->header.seq = seq;
2719 p->header.epoch = conn->epoch;
2720 p->header.type = RX_PACKET_TYPE_DATA;
2721 p->header.flags = 0;
2722 p->header.spare = 0;
2723 if (conn->type == RX_CLIENT_CONNECTION)
2724 p->header.flags |= RX_CLIENT_INITIATED;
2727 p->header.flags |= RX_LAST_PACKET;
2729 clock_Zero(&p->firstSent); /* Never yet transmitted */
2730 p->header.serial = 0; /* Another way of saying never transmitted... */
2732 /* Now that we're sure this is the last data on the call, make sure
2733 * that the "length" and the sum of the iov_lens matches. */
2734 len = p->length + call->conn->securityHeaderSize;
2736 for (i = 1; i < p->niovecs && len > 0; i++) {
2737 len -= p->wirevec[i].iov_len;
2740 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2741 } else if (i < p->niovecs) {
2742 /* Free any extra elements in the wirevec */
2743 #if defined(RX_ENABLE_TSFPQ)
2744 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2745 #else /* !RX_ENABLE_TSFPQ */
2746 MUTEX_ENTER(&rx_freePktQ_lock);
2747 rxi_FreeDataBufsNoLock(p, i);
2748 MUTEX_EXIT(&rx_freePktQ_lock);
2749 #endif /* !RX_ENABLE_TSFPQ */
2754 p->wirevec[i - 1].iov_len += len;
2755 MUTEX_ENTER(&call->lock);
2756 code = RXS_PreparePacket(conn->securityObject, call, p);
2758 MUTEX_EXIT(&call->lock);
2759 rxi_ConnectionError(conn, code);
2760 MUTEX_ENTER(&conn->conn_data_lock);
2761 p = rxi_SendConnectionAbort(conn, p, 0, 0);
2762 MUTEX_EXIT(&conn->conn_data_lock);
2763 MUTEX_ENTER(&call->lock);
2764 /* setting a connection error means all calls for that conn are also
2765 * error'd. if this call does not have an error by now, something is
2766 * very wrong, and we risk sending data in the clear that is supposed
2767 * to be encrypted. */
2768 osi_Assert(call->error);
2772 /* Given an interface MTU size, calculate an adjusted MTU size that
2773 * will make efficient use of the RX buffers when the peer is sending
2774 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2776 rxi_AdjustIfMTU(int mtu)
2781 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2783 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2784 if (mtu <= adjMTU) {
2791 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2792 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2795 /* Given an interface MTU size, and the peer's advertised max receive
2796 * size, calculate an adjisted maxMTU size that makes efficient use
2797 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2799 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2801 int maxMTU = mtu * rxi_nSendFrags;
2802 maxMTU = MIN(maxMTU, peerMaxMTU);
2803 return rxi_AdjustIfMTU(maxMTU);
2806 /* Given a packet size, figure out how many datagram packet will fit.
2807 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2808 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2809 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2811 rxi_AdjustDgramPackets(int frags, int mtu)
2814 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2817 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2818 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2819 /* subtract the size of the first and last packets */
2820 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2824 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2829 * This function can be used by the Windows Cache Manager
2830 * to dump the list of all rx packets so that we can determine
2831 * where the packet leakage is.
2833 int rx_DumpPackets(FILE *outputFile, char *cookie)
2835 #ifdef RXDEBUG_PACKET
2836 struct rx_packet *p;
2840 #define RXDPRINTF sprintf
2841 #define RXDPRINTOUT output
2843 #define RXDPRINTF fprintf
2844 #define RXDPRINTOUT outputFile
2848 MUTEX_ENTER(&rx_freePktQ_lock);
2849 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2851 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2854 for (p = rx_mallocedP; p; p = p->allNextp) {
2855 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2856 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2857 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2858 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2859 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2860 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2862 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2866 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2868 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2871 MUTEX_EXIT(&rx_freePktQ_lock);
2873 #endif /* RXDEBUG_PACKET */