2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
47 # if defined(AFS_NT40_ENV)
49 # define EWOULDBLOCK WSAEWOULDBLOCK
52 # include "rx_xmit_nt.h"
58 # include <sys/sysmacros.h>
61 #include <opr/queue.h>
65 #include "rx_packet.h"
66 #include "rx_atomic.h"
67 #include "rx_globals.h"
68 #include "rx_internal.h"
76 /* rxdb_fileID is used to identify the lock location, along with line#. */
77 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
78 #endif /* RX_LOCKS_DB */
79 static struct rx_packet *rx_mallocedP = 0;
81 static afs_uint32 rx_packet_id = 0;
84 extern char cml_version_number[];
86 static int AllocPacketBufs(int class, int num_pkts, struct opr_queue *q);
88 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
89 afs_uint32 ahost, short aport,
91 static struct rx_packet *rxi_AllocPacketNoLock(int class);
94 static void rxi_MorePacketsNoLock(int apackets);
97 #ifdef RX_ENABLE_TSFPQ
98 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
100 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
101 int allow_overcommit);
103 static void rxi_FreePacketNoLock(struct rx_packet *p);
104 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
105 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
106 struct opr_queue * q);
109 extern struct opr_queue rx_idleServerQueue;
111 /* some rules about packets:
112 * 1. When a packet is allocated, the final iov_buf contains room for
113 * a security trailer, but iov_len masks that fact. If the security
114 * package wants to add the trailer, it may do so, and then extend
115 * iov_len appropriately. For this reason, packet's niovecs and
116 * iov_len fields should be accurate before calling PreparePacket.
120 * all packet buffers (iov_base) are integral multiples of
122 * offset is an integral multiple of the word size.
125 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
129 for (l = 0, i = 1; i < packet->niovecs; i++) {
130 if (l + packet->wirevec[i].iov_len > offset) {
132 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
135 l += packet->wirevec[i].iov_len;
142 * all packet buffers (iov_base) are integral multiples of the word size.
143 * offset is an integral multiple of the word size.
146 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
150 for (l = 0, i = 1; i < packet->niovecs; i++) {
151 if (l + packet->wirevec[i].iov_len > offset) {
152 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
153 (offset - l))) = data;
156 l += packet->wirevec[i].iov_len;
163 * all packet buffers (iov_base) are integral multiples of the
165 * offset is an integral multiple of the word size.
167 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
170 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
173 unsigned int i, j, l, r;
174 for (l = 0, i = 1; i < packet->niovecs; i++) {
175 if (l + packet->wirevec[i].iov_len > offset) {
178 l += packet->wirevec[i].iov_len;
181 /* i is the iovec which contains the first little bit of data in which we
182 * are interested. l is the total length of everything prior to this iovec.
183 * j is the number of bytes we can safely copy out of this iovec.
184 * offset only applies to the first iovec.
187 while ((r > 0) && (i < packet->niovecs)) {
188 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
189 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
192 l += packet->wirevec[i].iov_len;
197 return (r ? (resid - r) : resid);
202 * all packet buffers (iov_base) are integral multiples of the
204 * offset is an integral multiple of the word size.
207 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
209 unsigned int i, j, l, o, r;
212 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
213 if (l + packet->wirevec[i].iov_len > o) {
216 l += packet->wirevec[i].iov_len;
219 /* i is the iovec which contains the first little bit of data in which we
220 * are interested. l is the total length of everything prior to this iovec.
221 * j is the number of bytes we can safely copy out of this iovec.
222 * offset only applies to the first iovec.
225 while ((r > 0) && (i <= RX_MAXWVECS)) {
226 if (i >= packet->niovecs)
227 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
230 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
231 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
235 l += packet->wirevec[i].iov_len;
240 return (r ? (resid - r) : resid);
244 rxi_AllocPackets(int class, int num_pkts, struct opr_queue * q)
248 num_pkts = AllocPacketBufs(class, num_pkts, q);
250 for (opr_queue_Scan(q, c)) {
251 RX_PACKET_IOV_FULLINIT(opr_queue_Entry(c, struct rx_packet, entry));
257 #ifdef RX_ENABLE_TSFPQ
259 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
261 struct rx_ts_info_t * rx_ts_info;
265 RX_TS_INFO_GET(rx_ts_info);
267 transfer = num_pkts - rx_ts_info->_FPQ.len;
270 MUTEX_ENTER(&rx_freePktQ_lock);
271 transfer = MAX(transfer, rx_TSFPQGlobSize);
272 if (transfer > rx_nFreePackets) {
273 /* alloc enough for us, plus a few globs for other threads */
274 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
277 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
279 MUTEX_EXIT(&rx_freePktQ_lock);
283 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
287 #else /* RX_ENABLE_TSFPQ */
289 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
300 MUTEX_ENTER(&rx_freePktQ_lock);
303 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
304 num_pkts--, overq++);
307 rxi_NeedMorePackets = TRUE;
308 if (rx_stats_active) {
310 case RX_PACKET_CLASS_RECEIVE:
311 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
313 case RX_PACKET_CLASS_SEND:
314 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
316 case RX_PACKET_CLASS_SPECIAL:
317 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
319 case RX_PACKET_CLASS_RECV_CBUF:
320 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
322 case RX_PACKET_CLASS_SEND_CBUF:
323 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
329 if (rx_nFreePackets < num_pkts)
330 num_pkts = rx_nFreePackets;
333 rxi_NeedMorePackets = TRUE;
337 if (rx_nFreePackets < num_pkts) {
338 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
342 for (i=0, c=opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
344 i++, c=opr_queue_Next(&c->entry, struct rx_packet, entry)) {
348 opr_queue_SplitBeforeAppend(&rx_freePacketQueue, q, &c->entry);
350 rx_nFreePackets -= num_pkts;
355 MUTEX_EXIT(&rx_freePktQ_lock);
360 #endif /* RX_ENABLE_TSFPQ */
363 * Free a packet currently used as a continuation buffer
365 #ifdef RX_ENABLE_TSFPQ
366 /* num_pkts=0 means queue length is unknown */
368 rxi_FreePackets(int num_pkts, struct opr_queue * q)
370 struct rx_ts_info_t * rx_ts_info;
371 struct opr_queue *cursor, *store;
374 osi_Assert(num_pkts >= 0);
375 RX_TS_INFO_GET(rx_ts_info);
378 for (opr_queue_ScanSafe(q, cursor, store)) {
380 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
384 for (opr_queue_ScanSafe(q, cursor, store)) {
385 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
391 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
394 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
396 MUTEX_ENTER(&rx_freePktQ_lock);
398 RX_TS_FPQ_LTOG(rx_ts_info);
400 /* Wakeup anyone waiting for packets */
403 MUTEX_EXIT(&rx_freePktQ_lock);
409 #else /* RX_ENABLE_TSFPQ */
410 /* num_pkts=0 means queue length is unknown */
412 rxi_FreePackets(int num_pkts, struct opr_queue *q)
414 struct opr_queue cbs;
415 struct opr_queue *cursor, *store;
419 osi_Assert(num_pkts >= 0);
420 opr_queue_Init(&cbs);
423 for (opr_queue_ScanSafe(q, cursor, store)) {
425 = opr_queue_Entry(cursor, struct rx_packet, entry);
426 if (p->niovecs > 2) {
427 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
434 for (opr_queue_ScanSafe(q, cursor, store)) {
436 = opr_queue_Entry(cursor, struct rx_packet, entry);
438 if (p->niovecs > 2) {
439 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
446 opr_queue_SpliceAppend(q, &cbs);
452 MUTEX_ENTER(&rx_freePktQ_lock);
454 opr_queue_SpliceAppend(&rx_freePacketQueue, q);
455 rx_nFreePackets += qlen;
457 /* Wakeup anyone waiting for packets */
460 MUTEX_EXIT(&rx_freePktQ_lock);
465 #endif /* RX_ENABLE_TSFPQ */
467 /* this one is kind of awful.
468 * In rxkad, the packet has been all shortened, and everything, ready for
469 * sending. All of a sudden, we discover we need some of that space back.
470 * This isn't terribly general, because it knows that the packets are only
471 * rounded up to the EBS (userdata + security header).
474 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
478 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
479 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
480 p->wirevec[i].iov_len += nb;
484 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
485 p->wirevec[i].iov_len += nb;
493 /* get sufficient space to store nb bytes of data (or more), and hook
494 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
495 * returns the number of bytes >0 which it failed to come up with.
496 * Don't need to worry about locking on packet, since only
497 * one thread can manipulate one at a time. Locking on continution
498 * packets is handled by AllocPacketBufs */
499 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
501 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
504 struct opr_queue q, *cursor, *store;
506 /* compute the number of cbuf's we need */
507 nv = nb / RX_CBUFFERSIZE;
508 if ((nv * RX_CBUFFERSIZE) < nb)
510 if ((nv + p->niovecs) > RX_MAXWVECS)
511 nv = RX_MAXWVECS - p->niovecs;
515 /* allocate buffers */
517 nv = AllocPacketBufs(class, nv, &q);
519 /* setup packet iovs */
521 for (opr_queue_ScanSafe(&q, cursor, store)) {
523 = opr_queue_Entry(cursor, struct rx_packet, entry);
525 opr_queue_Remove(&cb->entry);
526 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
527 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
531 nb -= (nv * RX_CBUFFERSIZE);
532 p->length += (nv * RX_CBUFFERSIZE);
538 /* Add more packet buffers */
539 #ifdef RX_ENABLE_TSFPQ
541 rxi_MorePackets(int apackets)
543 struct rx_packet *p, *e;
544 struct rx_ts_info_t * rx_ts_info;
548 getme = apackets * sizeof(struct rx_packet);
549 p = osi_Alloc(getme);
552 PIN(p, getme); /* XXXXX */
554 RX_TS_INFO_GET(rx_ts_info);
556 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
557 /* TSFPQ patch also needs to keep track of total packets */
559 MUTEX_ENTER(&rx_packets_mutex);
560 rx_nPackets += apackets;
561 RX_TS_FPQ_COMPUTE_LIMITS;
562 MUTEX_EXIT(&rx_packets_mutex);
564 for (e = p + apackets; p < e; p++) {
565 RX_PACKET_IOV_INIT(p);
568 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
571 MUTEX_ENTER(&rx_freePktQ_lock);
572 #ifdef RXDEBUG_PACKET
573 p->packetId = rx_packet_id++;
574 p->allNextp = rx_mallocedP;
575 #endif /* RXDEBUG_PACKET */
577 MUTEX_EXIT(&rx_freePktQ_lock);
580 rx_ts_info->_FPQ.delta += apackets;
582 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
584 MUTEX_ENTER(&rx_freePktQ_lock);
586 RX_TS_FPQ_LTOG(rx_ts_info);
587 rxi_NeedMorePackets = FALSE;
590 MUTEX_EXIT(&rx_freePktQ_lock);
594 #else /* RX_ENABLE_TSFPQ */
596 rxi_MorePackets(int apackets)
598 struct rx_packet *p, *e;
602 getme = apackets * sizeof(struct rx_packet);
603 p = osi_Alloc(getme);
606 PIN(p, getme); /* XXXXX */
609 MUTEX_ENTER(&rx_freePktQ_lock);
611 for (e = p + apackets; p < e; p++) {
612 RX_PACKET_IOV_INIT(p);
613 #ifdef RX_TRACK_PACKETS
614 p->flags |= RX_PKTFLAG_FREE;
618 opr_queue_Append(&rx_freePacketQueue, &p->entry);
619 #ifdef RXDEBUG_PACKET
620 p->packetId = rx_packet_id++;
621 p->allNextp = rx_mallocedP;
622 #endif /* RXDEBUG_PACKET */
626 rx_nPackets += apackets;
627 rx_nFreePackets += apackets;
628 rxi_NeedMorePackets = FALSE;
631 MUTEX_EXIT(&rx_freePktQ_lock);
634 #endif /* RX_ENABLE_TSFPQ */
636 #ifdef RX_ENABLE_TSFPQ
638 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
640 struct rx_packet *p, *e;
641 struct rx_ts_info_t * rx_ts_info;
645 getme = apackets * sizeof(struct rx_packet);
646 p = osi_Alloc(getme);
648 PIN(p, getme); /* XXXXX */
650 RX_TS_INFO_GET(rx_ts_info);
652 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
653 /* TSFPQ patch also needs to keep track of total packets */
654 MUTEX_ENTER(&rx_packets_mutex);
655 rx_nPackets += apackets;
656 RX_TS_FPQ_COMPUTE_LIMITS;
657 MUTEX_EXIT(&rx_packets_mutex);
659 for (e = p + apackets; p < e; p++) {
660 RX_PACKET_IOV_INIT(p);
662 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
665 MUTEX_ENTER(&rx_freePktQ_lock);
666 #ifdef RXDEBUG_PACKET
667 p->packetId = rx_packet_id++;
668 p->allNextp = rx_mallocedP;
669 #endif /* RXDEBUG_PACKET */
671 MUTEX_EXIT(&rx_freePktQ_lock);
674 rx_ts_info->_FPQ.delta += apackets;
677 (num_keep_local < apackets)) {
679 MUTEX_ENTER(&rx_freePktQ_lock);
681 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
682 rxi_NeedMorePackets = FALSE;
685 MUTEX_EXIT(&rx_freePktQ_lock);
689 #endif /* RX_ENABLE_TSFPQ */
692 /* Add more packet buffers */
694 rxi_MorePacketsNoLock(int apackets)
696 #ifdef RX_ENABLE_TSFPQ
697 struct rx_ts_info_t * rx_ts_info;
698 #endif /* RX_ENABLE_TSFPQ */
699 struct rx_packet *p, *e;
702 /* allocate enough packets that 1/4 of the packets will be able
703 * to hold maximal amounts of data */
704 apackets += (apackets / 4)
705 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
707 getme = apackets * sizeof(struct rx_packet);
708 p = osi_Alloc(getme);
710 apackets -= apackets / 4;
711 osi_Assert(apackets > 0);
716 #ifdef RX_ENABLE_TSFPQ
717 RX_TS_INFO_GET(rx_ts_info);
718 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
719 #endif /* RX_ENABLE_TSFPQ */
721 for (e = p + apackets; p < e; p++) {
722 RX_PACKET_IOV_INIT(p);
723 #ifdef RX_TRACK_PACKETS
724 p->flags |= RX_PKTFLAG_FREE;
728 opr_queue_Append(&rx_freePacketQueue, &p->entry);
729 #ifdef RXDEBUG_PACKET
730 p->packetId = rx_packet_id++;
731 p->allNextp = rx_mallocedP;
732 #endif /* RXDEBUG_PACKET */
736 rx_nFreePackets += apackets;
737 MUTEX_ENTER(&rx_packets_mutex);
738 rx_nPackets += apackets;
739 #ifdef RX_ENABLE_TSFPQ
740 RX_TS_FPQ_COMPUTE_LIMITS;
741 #endif /* RX_ENABLE_TSFPQ */
742 MUTEX_EXIT(&rx_packets_mutex);
743 rxi_NeedMorePackets = FALSE;
749 rxi_FreeAllPackets(void)
751 /* must be called at proper interrupt level, etcetera */
752 /* MTUXXX need to free all Packets */
753 osi_Free(rx_mallocedP,
754 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
755 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
758 #ifdef RX_ENABLE_TSFPQ
760 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
762 struct rx_ts_info_t * rx_ts_info;
766 RX_TS_INFO_GET(rx_ts_info);
768 if (num_keep_local != rx_ts_info->_FPQ.len) {
770 MUTEX_ENTER(&rx_freePktQ_lock);
771 if (num_keep_local < rx_ts_info->_FPQ.len) {
772 xfer = rx_ts_info->_FPQ.len - num_keep_local;
773 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
776 xfer = num_keep_local - rx_ts_info->_FPQ.len;
777 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
778 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
779 if (rx_nFreePackets < xfer) {
780 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
782 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
784 MUTEX_EXIT(&rx_freePktQ_lock);
790 rxi_FlushLocalPacketsTSFPQ(void)
792 rxi_AdjustLocalPacketsTSFPQ(0, 0);
794 #endif /* RX_ENABLE_TSFPQ */
796 /* Allocate more packets iff we need more continuation buffers */
797 /* In kernel, can't page in memory with interrupts disabled, so we
798 * don't use the event mechanism. */
800 rx_CheckPackets(void)
802 if (rxi_NeedMorePackets) {
803 rxi_MorePackets(rx_maxSendWindow);
807 /* In the packet freeing routine below, the assumption is that
808 we want all of the packets to be used equally frequently, so that we
809 don't get packet buffers paging out. It would be just as valid to
810 assume that we DO want them to page out if not many are being used.
811 In any event, we assume the former, and append the packets to the end
813 /* This explanation is bogus. The free list doesn't remain in any kind of
814 useful order for afs_int32: the packets in use get pretty much randomly scattered
815 across all the pages. In order to permit unused {packets,bufs} to page out, they
816 must be stored so that packets which are adjacent in memory are adjacent in the
817 free list. An array springs rapidly to mind.
820 /* Actually free the packet p. */
821 #ifndef RX_ENABLE_TSFPQ
823 rxi_FreePacketNoLock(struct rx_packet *p)
825 dpf(("Free %"AFS_PTR_FMT"\n", p));
829 opr_queue_Append(&rx_freePacketQueue, &p->entry);
831 #endif /* RX_ENABLE_TSFPQ */
833 #ifdef RX_ENABLE_TSFPQ
835 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
837 struct rx_ts_info_t * rx_ts_info;
838 dpf(("Free %"AFS_PTR_FMT"\n", p));
840 RX_TS_INFO_GET(rx_ts_info);
841 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
843 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
845 MUTEX_ENTER(&rx_freePktQ_lock);
847 RX_TS_FPQ_LTOG(rx_ts_info);
849 /* Wakeup anyone waiting for packets */
852 MUTEX_EXIT(&rx_freePktQ_lock);
856 #endif /* RX_ENABLE_TSFPQ */
859 * free continuation buffers off a packet into a queue
861 * [IN] p -- packet from which continuation buffers will be freed
862 * [IN] first -- iovec offset of first continuation buffer to free
863 * [IN] q -- queue into which continuation buffers will be chained
866 * number of continuation buffers freed
868 #ifndef RX_ENABLE_TSFPQ
870 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct opr_queue * q)
873 struct rx_packet * cb;
876 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
877 iov = &p->wirevec[first];
879 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
880 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
881 RX_FPQ_MARK_FREE(cb);
882 opr_queue_Append(q, &cb->entry);
891 * free packet continuation buffers into the global free packet pool
893 * [IN] p -- packet from which to free continuation buffers
894 * [IN] first -- iovec offset of first continuation buffer to free
900 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
904 for (first = MAX(2, first); first < p->niovecs; first++) {
905 iov = &p->wirevec[first];
907 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
908 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
919 * free packet continuation buffers into the thread-local free pool
921 * [IN] p -- packet from which continuation buffers will be freed
922 * [IN] first -- iovec offset of first continuation buffer to free
923 * any value less than 2, the min number of iovecs,
924 * is treated as if it is 2.
925 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
926 * global free pool before returning
932 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
935 struct rx_ts_info_t * rx_ts_info;
937 RX_TS_INFO_GET(rx_ts_info);
939 for (first = MAX(2, first); first < p->niovecs; first++) {
940 iov = &p->wirevec[first];
942 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
943 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
948 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
950 MUTEX_ENTER(&rx_freePktQ_lock);
952 RX_TS_FPQ_LTOG(rx_ts_info);
954 /* Wakeup anyone waiting for packets */
957 MUTEX_EXIT(&rx_freePktQ_lock);
962 #endif /* RX_ENABLE_TSFPQ */
964 int rxi_nBadIovecs = 0;
966 /* rxi_RestoreDataBufs
968 * Restore the correct sizes to the iovecs. Called when reusing a packet
969 * for reading off the wire.
972 rxi_RestoreDataBufs(struct rx_packet *p)
977 RX_PACKET_IOV_INIT(p);
979 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
980 if (!iov->iov_base) {
985 iov->iov_len = RX_CBUFFERSIZE;
989 #ifdef RX_ENABLE_TSFPQ
991 rxi_TrimDataBufs(struct rx_packet *p, int first)
994 struct iovec *iov, *end;
995 struct rx_ts_info_t * rx_ts_info;
999 osi_Panic("TrimDataBufs 1: first must be 1");
1001 /* Skip over continuation buffers containing message data */
1002 iov = &p->wirevec[2];
1003 end = iov + (p->niovecs - 2);
1004 length = p->length - p->wirevec[1].iov_len;
1005 for (; iov < end && length > 0; iov++) {
1007 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1008 length -= iov->iov_len;
1011 /* iov now points to the first empty data buffer. */
1015 RX_TS_INFO_GET(rx_ts_info);
1016 for (; iov < end; iov++) {
1018 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1019 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1022 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1024 MUTEX_ENTER(&rx_freePktQ_lock);
1026 RX_TS_FPQ_LTOG(rx_ts_info);
1027 rxi_PacketsUnWait();
1029 MUTEX_EXIT(&rx_freePktQ_lock);
1035 #else /* RX_ENABLE_TSFPQ */
1037 rxi_TrimDataBufs(struct rx_packet *p, int first)
1040 struct iovec *iov, *end;
1044 osi_Panic("TrimDataBufs 1: first must be 1");
1046 /* Skip over continuation buffers containing message data */
1047 iov = &p->wirevec[2];
1048 end = iov + (p->niovecs - 2);
1049 length = p->length - p->wirevec[1].iov_len;
1050 for (; iov < end && length > 0; iov++) {
1052 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1053 length -= iov->iov_len;
1056 /* iov now points to the first empty data buffer. */
1061 MUTEX_ENTER(&rx_freePktQ_lock);
1063 for (; iov < end; iov++) {
1065 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1066 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1069 rxi_PacketsUnWait();
1071 MUTEX_EXIT(&rx_freePktQ_lock);
1076 #endif /* RX_ENABLE_TSFPQ */
1078 /* Free the packet p. P is assumed not to be on any queue, i.e.
1079 * remove it yourself first if you call this routine. */
1080 #ifdef RX_ENABLE_TSFPQ
1082 rxi_FreePacket(struct rx_packet *p)
1084 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1085 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1087 #else /* RX_ENABLE_TSFPQ */
1089 rxi_FreePacket(struct rx_packet *p)
1094 MUTEX_ENTER(&rx_freePktQ_lock);
1096 rxi_FreeDataBufsNoLock(p, 2);
1097 rxi_FreePacketNoLock(p);
1098 /* Wakeup anyone waiting for packets */
1099 rxi_PacketsUnWait();
1101 MUTEX_EXIT(&rx_freePktQ_lock);
1104 #endif /* RX_ENABLE_TSFPQ */
1106 /* rxi_AllocPacket sets up p->length so it reflects the number of
1107 * bytes in the packet at this point, **not including** the header.
1108 * The header is absolutely necessary, besides, this is the way the
1109 * length field is usually used */
1110 #ifdef RX_ENABLE_TSFPQ
1111 static struct rx_packet *
1112 rxi_AllocPacketNoLock(int class)
1114 struct rx_packet *p;
1115 struct rx_ts_info_t * rx_ts_info;
1117 RX_TS_INFO_GET(rx_ts_info);
1120 if (rxi_OverQuota(class)) {
1121 rxi_NeedMorePackets = TRUE;
1122 if (rx_stats_active) {
1124 case RX_PACKET_CLASS_RECEIVE:
1125 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1127 case RX_PACKET_CLASS_SEND:
1128 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1130 case RX_PACKET_CLASS_SPECIAL:
1131 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1133 case RX_PACKET_CLASS_RECV_CBUF:
1134 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1136 case RX_PACKET_CLASS_SEND_CBUF:
1137 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1141 return (struct rx_packet *)0;
1145 if (rx_stats_active)
1146 rx_atomic_inc(&rx_stats.packetRequests);
1147 if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1150 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1151 osi_Panic("rxi_AllocPacket error");
1153 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1154 rxi_MorePacketsNoLock(rx_maxSendWindow);
1158 RX_TS_FPQ_GTOL(rx_ts_info);
1161 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1163 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1166 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1167 * order to truncate outbound packets. In the near future, may need
1168 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1170 RX_PACKET_IOV_FULLINIT(p);
1173 #else /* RX_ENABLE_TSFPQ */
1174 static struct rx_packet *
1175 rxi_AllocPacketNoLock(int class)
1177 struct rx_packet *p;
1180 if (rxi_OverQuota(class)) {
1181 rxi_NeedMorePackets = TRUE;
1182 if (rx_stats_active) {
1184 case RX_PACKET_CLASS_RECEIVE:
1185 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1187 case RX_PACKET_CLASS_SEND:
1188 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1190 case RX_PACKET_CLASS_SPECIAL:
1191 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1193 case RX_PACKET_CLASS_RECV_CBUF:
1194 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1196 case RX_PACKET_CLASS_SEND_CBUF:
1197 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1201 return (struct rx_packet *)0;
1205 if (rx_stats_active)
1206 rx_atomic_inc(&rx_stats.packetRequests);
1209 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1210 osi_Panic("rxi_AllocPacket error");
1212 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1213 rxi_MorePacketsNoLock(rx_maxSendWindow);
1217 p = opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
1218 opr_queue_Remove(&p->entry);
1219 RX_FPQ_MARK_USED(p);
1221 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1224 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1225 * order to truncate outbound packets. In the near future, may need
1226 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1228 RX_PACKET_IOV_FULLINIT(p);
1231 #endif /* RX_ENABLE_TSFPQ */
1233 #ifdef RX_ENABLE_TSFPQ
1234 static struct rx_packet *
1235 rxi_AllocPacketTSFPQ(int class, int pull_global)
1237 struct rx_packet *p;
1238 struct rx_ts_info_t * rx_ts_info;
1240 RX_TS_INFO_GET(rx_ts_info);
1242 if (rx_stats_active)
1243 rx_atomic_inc(&rx_stats.packetRequests);
1244 if (pull_global && opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1245 MUTEX_ENTER(&rx_freePktQ_lock);
1247 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1248 rxi_MorePacketsNoLock(rx_maxSendWindow);
1250 RX_TS_FPQ_GTOL(rx_ts_info);
1252 MUTEX_EXIT(&rx_freePktQ_lock);
1253 } else if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1257 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1259 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1261 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1262 * order to truncate outbound packets. In the near future, may need
1263 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1265 RX_PACKET_IOV_FULLINIT(p);
1268 #endif /* RX_ENABLE_TSFPQ */
1270 #ifdef RX_ENABLE_TSFPQ
1272 rxi_AllocPacket(int class)
1274 struct rx_packet *p;
1276 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1279 #else /* RX_ENABLE_TSFPQ */
1281 rxi_AllocPacket(int class)
1283 struct rx_packet *p;
1285 MUTEX_ENTER(&rx_freePktQ_lock);
1286 p = rxi_AllocPacketNoLock(class);
1287 MUTEX_EXIT(&rx_freePktQ_lock);
1290 #endif /* RX_ENABLE_TSFPQ */
1292 /* This guy comes up with as many buffers as it {takes,can get} given
1293 * the MTU for this call. It also sets the packet length before
1294 * returning. caution: this is often called at NETPRI
1295 * Called with call locked.
1298 rxi_AllocSendPacket(struct rx_call *call, int want)
1300 struct rx_packet *p = (struct rx_packet *)0;
1305 mud = call->MTU - RX_HEADER_SIZE;
1307 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1308 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1310 #ifdef RX_ENABLE_TSFPQ
1311 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1313 want = MIN(want, mud);
1315 if ((unsigned)want > p->length)
1316 (void)rxi_AllocDataBuf(p, (want - p->length),
1317 RX_PACKET_CLASS_SEND_CBUF);
1319 if (p->length > mud)
1322 if (delta >= p->length) {
1330 #endif /* RX_ENABLE_TSFPQ */
1332 while (!(call->error)) {
1333 MUTEX_ENTER(&rx_freePktQ_lock);
1334 /* if an error occurred, or we get the packet we want, we're done */
1335 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1336 MUTEX_EXIT(&rx_freePktQ_lock);
1339 want = MIN(want, mud);
1341 if ((unsigned)want > p->length)
1342 (void)rxi_AllocDataBuf(p, (want - p->length),
1343 RX_PACKET_CLASS_SEND_CBUF);
1345 if (p->length > mud)
1348 if (delta >= p->length) {
1357 /* no error occurred, and we didn't get a packet, so we sleep.
1358 * At this point, we assume that packets will be returned
1359 * sooner or later, as packets are acknowledged, and so we
1362 call->flags |= RX_CALL_WAIT_PACKETS;
1363 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1364 MUTEX_EXIT(&call->lock);
1365 rx_waitingForPackets = 1;
1367 #ifdef RX_ENABLE_LOCKS
1368 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1370 osi_rxSleep(&rx_waitingForPackets);
1372 MUTEX_EXIT(&rx_freePktQ_lock);
1373 MUTEX_ENTER(&call->lock);
1374 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1375 call->flags &= ~RX_CALL_WAIT_PACKETS;
1384 /* Windows does not use file descriptors. */
1385 #define CountFDs(amax) 0
1387 /* count the number of used FDs */
1396 for (i = 0; i < amax; i++) {
1397 code = fstat(i, &tstat);
1403 #endif /* AFS_NT40_ENV */
1406 #define CountFDs(amax) amax
1410 #if !defined(KERNEL) || defined(UKERNEL)
1412 /* This function reads a single packet from the interface into the
1413 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1414 * (host,port) of the sender are stored in the supplied variables, and
1415 * the data length of the packet is stored in the packet structure.
1416 * The header is decoded. */
1418 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1421 struct sockaddr_in from;
1424 afs_uint32 tlen, savelen;
1426 rx_computelen(p, tlen);
1427 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1429 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1430 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1431 * it once in order to avoid races. */
1434 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1442 /* Extend the last iovec for padding, it's just to make sure that the
1443 * read doesn't return more data than we expect, and is done to get around
1444 * our problems caused by the lack of a length field in the rx header.
1445 * Use the extra buffer that follows the localdata in each packet
1447 savelen = p->wirevec[p->niovecs - 1].iov_len;
1448 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1450 memset(&msg, 0, sizeof(msg));
1451 msg.msg_name = (char *)&from;
1452 msg.msg_namelen = sizeof(struct sockaddr_in);
1453 msg.msg_iov = p->wirevec;
1454 msg.msg_iovlen = p->niovecs;
1455 nbytes = rxi_Recvmsg(socket, &msg, 0);
1457 /* restore the vec to its correct state */
1458 p->wirevec[p->niovecs - 1].iov_len = savelen;
1460 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1461 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1462 if (nbytes < 0 && errno == EWOULDBLOCK) {
1463 if (rx_stats_active)
1464 rx_atomic_inc(&rx_stats.noPacketOnRead);
1465 } else if (nbytes <= 0) {
1466 if (rx_stats_active) {
1467 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1468 rx_stats.bogusHost = from.sin_addr.s_addr;
1470 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1471 ntohs(from.sin_port), nbytes));
1476 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1477 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1478 rxi_DecodePacketHeader(p);
1480 *host = from.sin_addr.s_addr;
1481 *port = from.sin_port;
1483 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1484 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1485 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1487 #ifdef RX_TRIMDATABUFS
1488 rxi_TrimDataBufs(p, 1);
1494 /* Extract packet header. */
1495 rxi_DecodePacketHeader(p);
1497 *host = from.sin_addr.s_addr;
1498 *port = from.sin_port;
1500 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1502 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1505 #ifdef RX_TRIMDATABUFS
1506 /* Free any empty packet buffers at the end of this packet */
1507 rxi_TrimDataBufs(p, 1);
1513 #endif /* !KERNEL || UKERNEL */
1515 /* This function splits off the first packet in a jumbo packet.
1516 * As of AFS 3.5, jumbograms contain more than one fixed size
1517 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1518 * last packet header. All packets (except the last) are padded to
1519 * fall on RX_CBUFFERSIZE boundaries.
1520 * HACK: We store the length of the first n-1 packets in the
1521 * last two pad bytes. */
1524 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1527 struct rx_packet *np;
1528 struct rx_jumboHeader *jp;
1534 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1535 * bytes in length. All but the first packet are preceded by
1536 * an abbreviated four byte header. The length of the last packet
1537 * is calculated from the size of the jumbogram. */
1538 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1540 if ((int)p->length < length) {
1541 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1544 niov = p->niovecs - 2;
1546 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1549 iov = &p->wirevec[2];
1550 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1552 /* Get a pointer to the abbreviated packet header */
1553 jp = (struct rx_jumboHeader *)
1554 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1556 /* Set up the iovecs for the next packet */
1557 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1558 np->wirevec[0].iov_len = sizeof(struct rx_header);
1559 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1560 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1561 np->niovecs = niov + 1;
1562 for (i = 2, iov++; i <= niov; i++, iov++) {
1563 np->wirevec[i] = *iov;
1565 np->length = p->length - length;
1566 p->length = RX_JUMBOBUFFERSIZE;
1569 /* Convert the jumbo packet header to host byte order */
1570 temp = ntohl(*(afs_uint32 *) jp);
1571 jp->flags = (u_char) (temp >> 24);
1572 jp->cksum = (u_short) (temp);
1574 /* Fill in the packet header */
1575 np->header = p->header;
1576 np->header.serial = p->header.serial + 1;
1577 np->header.seq = p->header.seq + 1;
1578 np->header.flags = jp->flags;
1579 np->header.spare = jp->cksum;
1585 /* Send a udp datagram */
1587 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1588 int length, int istack)
1593 memset(&msg, 0, sizeof(msg));
1595 msg.msg_iovlen = nvecs;
1596 msg.msg_name = addr;
1597 msg.msg_namelen = sizeof(struct sockaddr_in);
1599 ret = rxi_Sendmsg(socket, &msg, 0);
1603 #elif !defined(UKERNEL)
1605 * message receipt is done in rxk_input or rx_put.
1608 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1610 * Copy an mblock to the contiguous area pointed to by cp.
1611 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1612 * but it doesn't really.
1613 * Returns the number of bytes not transferred.
1614 * The message is NOT changed.
1617 cpytoc(mblk_t * mp, int off, int len, char *cp)
1621 for (; mp && len > 0; mp = mp->b_cont) {
1622 if (mp->b_datap->db_type != M_DATA) {
1625 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1626 memcpy(cp, (char *)mp->b_rptr, n);
1634 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1635 * but it doesn't really.
1636 * This sucks, anyway, do it like m_cpy.... below
1639 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1644 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1645 if (mp->b_datap->db_type != M_DATA) {
1648 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1654 t = iovs[i].iov_len;
1657 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1667 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1668 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1670 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1672 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1675 unsigned int l1, l2, i, t;
1677 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1678 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1681 if (m->m_len <= off) {
1691 p1 = mtod(m, caddr_t) + off;
1692 l1 = m->m_len - off;
1694 p2 = iovs[0].iov_base;
1695 l2 = iovs[0].iov_len;
1698 t = MIN(l1, MIN(l2, (unsigned int)len));
1709 p1 = mtod(m, caddr_t);
1715 p2 = iovs[i].iov_base;
1716 l2 = iovs[i].iov_len;
1724 #endif /* AFS_SUN5_ENV */
1726 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1727 #if defined(AFS_NBSD_ENV)
1729 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1732 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1733 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1739 struct rx_packet *phandle;
1740 int hdr_len, data_len;
1741 #endif /* AFS_NBSD_ENV */
1746 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1753 #endif /*KERNEL && !UKERNEL */
1756 /* send a response to a debug packet */
1759 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1760 afs_uint32 ahost, short aport, int istack)
1762 struct rx_debugIn tin;
1766 * Only respond to client-initiated Rx debug packets,
1767 * and clear the client flag in the response.
1769 if (ap->header.flags & RX_CLIENT_INITIATED) {
1770 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1771 rxi_EncodePacketHeader(ap);
1776 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1777 /* all done with packet, now set length to the truth, so we can
1778 * reuse this packet */
1779 rx_computelen(ap, ap->length);
1781 tin.type = ntohl(tin.type);
1782 tin.index = ntohl(tin.index);
1784 case RX_DEBUGI_GETSTATS:{
1785 struct rx_debugStats tstat;
1787 /* get basic stats */
1788 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1789 tstat.version = RX_DEBUGI_VERSION;
1790 #ifndef RX_ENABLE_LOCKS
1791 tstat.waitingForPackets = rx_waitingForPackets;
1793 MUTEX_ENTER(&rx_serverPool_lock);
1794 tstat.nFreePackets = htonl(rx_nFreePackets);
1795 tstat.nPackets = htonl(rx_nPackets);
1796 tstat.callsExecuted = htonl(rxi_nCalls);
1797 tstat.packetReclaims = htonl(rx_packetReclaims);
1798 tstat.usedFDs = CountFDs(64);
1799 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1800 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1801 tstat.idleThreads = opr_queue_Count(&rx_idleServerQueue);
1802 MUTEX_EXIT(&rx_serverPool_lock);
1803 tstat.idleThreads = htonl(tstat.idleThreads);
1804 tl = sizeof(struct rx_debugStats) - ap->length;
1806 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1809 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1811 ap->length = sizeof(struct rx_debugStats);
1812 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1813 rx_computelen(ap, ap->length);
1818 case RX_DEBUGI_GETALLCONN:
1819 case RX_DEBUGI_GETCONN:{
1821 struct rx_connection *tc;
1822 struct rx_call *tcall;
1823 struct rx_debugConn tconn;
1824 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1827 tl = sizeof(struct rx_debugConn) - ap->length;
1829 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1833 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1834 /* get N'th (maybe) "interesting" connection info */
1835 for (i = 0; i < rx_hashTableSize; i++) {
1836 #if !defined(KERNEL)
1837 /* the time complexity of the algorithm used here
1838 * exponentially increses with the number of connections.
1840 #ifdef AFS_PTHREAD_ENV
1846 MUTEX_ENTER(&rx_connHashTable_lock);
1847 /* We might be slightly out of step since we are not
1848 * locking each call, but this is only debugging output.
1850 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1851 if ((all || rxi_IsConnInteresting(tc))
1852 && tin.index-- <= 0) {
1853 tconn.host = tc->peer->host;
1854 tconn.port = tc->peer->port;
1855 tconn.cid = htonl(tc->cid);
1856 tconn.epoch = htonl(tc->epoch);
1857 tconn.serial = htonl(tc->serial);
1858 for (j = 0; j < RX_MAXCALLS; j++) {
1859 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1860 if ((tcall = tc->call[j])) {
1861 tconn.callState[j] = tcall->state;
1862 tconn.callMode[j] = tcall->mode;
1863 tconn.callFlags[j] = tcall->flags;
1864 if (!opr_queue_IsEmpty(&tcall->rq))
1865 tconn.callOther[j] |= RX_OTHER_IN;
1866 if (!opr_queue_IsEmpty(&tcall->tq))
1867 tconn.callOther[j] |= RX_OTHER_OUT;
1869 tconn.callState[j] = RX_STATE_NOTINIT;
1872 tconn.natMTU = htonl(tc->peer->natMTU);
1873 tconn.error = htonl(tc->error);
1874 tconn.flags = tc->flags;
1875 tconn.type = tc->type;
1876 tconn.securityIndex = tc->securityIndex;
1877 if (tc->securityObject) {
1878 RXS_GetStats(tc->securityObject, tc,
1880 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1881 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1884 DOHTONL(packetsReceived);
1885 DOHTONL(packetsSent);
1886 DOHTONL(bytesReceived);
1890 sizeof(tconn.secStats.spares) /
1895 sizeof(tconn.secStats.sparel) /
1896 sizeof(afs_int32); i++)
1900 MUTEX_EXIT(&rx_connHashTable_lock);
1901 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1904 ap->length = sizeof(struct rx_debugConn);
1905 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1911 MUTEX_EXIT(&rx_connHashTable_lock);
1913 /* if we make it here, there are no interesting packets */
1914 tconn.cid = htonl(0xffffffff); /* means end */
1915 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1918 ap->length = sizeof(struct rx_debugConn);
1919 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1925 * Pass back all the peer structures we have available
1928 case RX_DEBUGI_GETPEER:{
1931 struct rx_debugPeer tpeer;
1934 tl = sizeof(struct rx_debugPeer) - ap->length;
1936 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1940 memset(&tpeer, 0, sizeof(tpeer));
1941 for (i = 0; i < rx_hashTableSize; i++) {
1942 #if !defined(KERNEL)
1943 /* the time complexity of the algorithm used here
1944 * exponentially increses with the number of peers.
1946 * Yielding after processing each hash table entry
1947 * and dropping rx_peerHashTable_lock.
1948 * also increases the risk that we will miss a new
1949 * entry - but we are willing to live with this
1950 * limitation since this is meant for debugging only
1952 #ifdef AFS_PTHREAD_ENV
1958 MUTEX_ENTER(&rx_peerHashTable_lock);
1959 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1960 if (tin.index-- <= 0) {
1962 MUTEX_EXIT(&rx_peerHashTable_lock);
1964 MUTEX_ENTER(&tp->peer_lock);
1965 tpeer.host = tp->host;
1966 tpeer.port = tp->port;
1967 tpeer.ifMTU = htons(tp->ifMTU);
1968 tpeer.idleWhen = htonl(tp->idleWhen);
1969 tpeer.refCount = htons(tp->refCount);
1970 tpeer.burstSize = 0;
1972 tpeer.burstWait.sec = 0;
1973 tpeer.burstWait.usec = 0;
1974 tpeer.rtt = htonl(tp->rtt);
1975 tpeer.rtt_dev = htonl(tp->rtt_dev);
1976 tpeer.nSent = htonl(tp->nSent);
1977 tpeer.reSends = htonl(tp->reSends);
1978 tpeer.natMTU = htons(tp->natMTU);
1979 tpeer.maxMTU = htons(tp->maxMTU);
1980 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1981 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1982 tpeer.MTU = htons(tp->MTU);
1983 tpeer.cwind = htons(tp->cwind);
1984 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1985 tpeer.congestSeq = htons(tp->congestSeq);
1986 tpeer.bytesSent.high =
1987 htonl(tp->bytesSent >> 32);
1988 tpeer.bytesSent.low =
1989 htonl(tp->bytesSent & MAX_AFS_UINT32);
1990 tpeer.bytesReceived.high =
1991 htonl(tp->bytesReceived >> 32);
1992 tpeer.bytesReceived.low =
1993 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1994 MUTEX_EXIT(&tp->peer_lock);
1996 MUTEX_ENTER(&rx_peerHashTable_lock);
1998 MUTEX_EXIT(&rx_peerHashTable_lock);
2000 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2003 ap->length = sizeof(struct rx_debugPeer);
2004 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2010 MUTEX_EXIT(&rx_peerHashTable_lock);
2012 /* if we make it here, there are no interesting packets */
2013 tpeer.host = htonl(0xffffffff); /* means end */
2014 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2017 ap->length = sizeof(struct rx_debugPeer);
2018 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2023 case RX_DEBUGI_RXSTATS:{
2027 tl = sizeof(rx_stats) - ap->length;
2029 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2033 /* Since its all int32s convert to network order with a loop. */
2034 if (rx_stats_active)
2035 MUTEX_ENTER(&rx_stats_mutex);
2036 s = (afs_int32 *) & rx_stats;
2037 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2038 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2041 ap->length = sizeof(rx_stats);
2042 if (rx_stats_active)
2043 MUTEX_EXIT(&rx_stats_mutex);
2044 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2050 /* error response packet */
2051 tin.type = htonl(RX_DEBUGI_BADTYPE);
2052 tin.index = tin.type;
2053 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2055 ap->length = sizeof(struct rx_debugIn);
2056 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2064 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2065 afs_uint32 ahost, short aport, int istack)
2070 * Only respond to client-initiated version requests, and
2071 * clear that flag in the response.
2073 if (ap->header.flags & RX_CLIENT_INITIATED) {
2076 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2077 rxi_EncodePacketHeader(ap);
2078 memset(buf, 0, sizeof(buf));
2079 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2080 rx_packetwrite(ap, 0, 65, buf);
2083 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2091 /* send a debug packet back to the sender */
2093 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2094 afs_uint32 ahost, short aport, afs_int32 istack)
2096 struct sockaddr_in taddr;
2097 unsigned int i, nbytes, savelen = 0;
2100 int waslocked = ISAFS_GLOCK();
2103 taddr.sin_family = AF_INET;
2104 taddr.sin_port = aport;
2105 taddr.sin_addr.s_addr = ahost;
2106 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2107 taddr.sin_len = sizeof(struct sockaddr_in);
2110 /* We need to trim the niovecs. */
2111 nbytes = apacket->length;
2112 for (i = 1; i < apacket->niovecs; i++) {
2113 if (nbytes <= apacket->wirevec[i].iov_len) {
2114 savelen = apacket->wirevec[i].iov_len;
2115 saven = apacket->niovecs;
2116 apacket->wirevec[i].iov_len = nbytes;
2117 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2119 nbytes -= apacket->wirevec[i].iov_len;
2122 #ifdef RX_KERNEL_TRACE
2123 if (ICL_SETACTIVE(afs_iclSetp)) {
2126 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2127 "before osi_NetSend()");
2135 /* debug packets are not reliably delivered, hence the cast below. */
2136 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2137 apacket->length + RX_HEADER_SIZE, istack);
2139 #ifdef RX_KERNEL_TRACE
2140 if (ICL_SETACTIVE(afs_iclSetp)) {
2142 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2143 "after osi_NetSend()");
2152 if (saven) { /* means we truncated the packet above. */
2153 apacket->wirevec[i - 1].iov_len = savelen;
2154 apacket->niovecs = saven;
2160 rxi_NetSendError(struct rx_call *call, int code)
2164 if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2167 if (code == -WSAEHOSTUNREACH) {
2170 #elif defined(AFS_LINUX20_ENV)
2171 if (code == -ENETUNREACH) {
2174 #elif defined(AFS_DARWIN_ENV)
2175 if (code == EHOSTUNREACH) {
2180 call->lastReceiveTime = 0;
2184 /* Send the packet to appropriate destination for the specified
2185 * call. The header is first encoded and placed in the packet.
2188 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2189 struct rx_packet *p, int istack)
2195 struct sockaddr_in addr;
2196 struct rx_peer *peer = conn->peer;
2199 char deliveryType = 'S';
2201 /* The address we're sending the packet to */
2202 memset(&addr, 0, sizeof(addr));
2203 addr.sin_family = AF_INET;
2204 addr.sin_port = peer->port;
2205 addr.sin_addr.s_addr = peer->host;
2207 /* This stuff should be revamped, I think, so that most, if not
2208 * all, of the header stuff is always added here. We could
2209 * probably do away with the encode/decode routines. XXXXX */
2211 /* Stamp each packet with a unique serial number. The serial
2212 * number is maintained on a connection basis because some types
2213 * of security may be based on the serial number of the packet,
2214 * and security is handled on a per authenticated-connection
2216 /* Pre-increment, to guarantee no zero serial number; a zero
2217 * serial number means the packet was never sent. */
2218 MUTEX_ENTER(&conn->conn_data_lock);
2219 p->header.serial = ++conn->serial;
2220 if (p->length > conn->peer->maxPacketSize) {
2221 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2222 (p->header.flags & RX_REQUEST_ACK)) {
2223 conn->lastPingSize = p->length;
2224 conn->lastPingSizeSer = p->header.serial;
2225 } else if (p->header.seq != 0) {
2226 conn->lastPacketSize = p->length;
2227 conn->lastPacketSizeSeq = p->header.seq;
2230 MUTEX_EXIT(&conn->conn_data_lock);
2231 /* This is so we can adjust retransmit time-outs better in the face of
2232 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2234 if (p->firstSerial == 0) {
2235 p->firstSerial = p->header.serial;
2238 /* If an output tracer function is defined, call it with the packet and
2239 * network address. Note this function may modify its arguments. */
2240 if (rx_almostSent) {
2241 int drop = (*rx_almostSent) (p, &addr);
2242 /* drop packet if return value is non-zero? */
2244 deliveryType = 'D'; /* Drop the packet */
2248 /* Get network byte order header */
2249 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2250 * touch ALL the fields */
2252 /* Send the packet out on the same socket that related packets are being
2256 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2259 /* Possibly drop this packet, for testing purposes */
2260 if ((deliveryType == 'D')
2261 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2262 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2263 deliveryType = 'D'; /* Drop the packet */
2265 deliveryType = 'S'; /* Send the packet */
2266 #endif /* RXDEBUG */
2268 /* Loop until the packet is sent. We'd prefer just to use a
2269 * blocking socket, but unfortunately the interface doesn't
2270 * allow us to have the socket block in send mode, and not
2271 * block in receive mode */
2273 waslocked = ISAFS_GLOCK();
2274 #ifdef RX_KERNEL_TRACE
2275 if (ICL_SETACTIVE(afs_iclSetp)) {
2278 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2279 "before osi_NetSend()");
2288 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2289 p->length + RX_HEADER_SIZE, istack)) != 0) {
2290 /* send failed, so let's hurry up the resend, eh? */
2291 if (rx_stats_active)
2292 rx_atomic_inc(&rx_stats.netSendFailures);
2293 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2295 /* Some systems are nice and tell us right away that we cannot
2296 * reach this recipient by returning an error code.
2297 * So, when this happens let's "down" the host NOW so
2298 * we don't sit around waiting for this host to timeout later.
2301 rxi_NetSendError(call, code);
2305 #ifdef RX_KERNEL_TRACE
2306 if (ICL_SETACTIVE(afs_iclSetp)) {
2308 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2309 "after osi_NetSend()");
2320 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2321 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2322 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2323 p->header.seq, p->header.flags, p, p->length));
2325 if (rx_stats_active) {
2326 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2327 MUTEX_ENTER(&peer->peer_lock);
2328 peer->bytesSent += p->length;
2329 MUTEX_EXIT(&peer->peer_lock);
2333 /* Send a list of packets to appropriate destination for the specified
2334 * connection. The headers are first encoded and placed in the packets.
2337 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2338 struct rx_packet **list, int len, int istack)
2340 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2343 struct sockaddr_in addr;
2344 struct rx_peer *peer = conn->peer;
2346 struct rx_packet *p = NULL;
2347 struct iovec wirevec[RX_MAXIOVECS];
2348 int i, length, code;
2351 struct rx_jumboHeader *jp;
2353 char deliveryType = 'S';
2355 /* The address we're sending the packet to */
2356 addr.sin_family = AF_INET;
2357 addr.sin_port = peer->port;
2358 addr.sin_addr.s_addr = peer->host;
2360 if (len + 1 > RX_MAXIOVECS) {
2361 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2365 * Stamp the packets in this jumbogram with consecutive serial numbers
2367 MUTEX_ENTER(&conn->conn_data_lock);
2368 serial = conn->serial;
2369 conn->serial += len;
2370 for (i = 0; i < len; i++) {
2372 if (p->length > conn->peer->maxPacketSize) {
2373 /* a ping *or* a sequenced packet can count */
2374 if ((p->length > conn->peer->maxPacketSize)) {
2375 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2376 (p->header.flags & RX_REQUEST_ACK)) &&
2377 ((i == 0) || (p->length >= conn->lastPingSize))) {
2378 conn->lastPingSize = p->length;
2379 conn->lastPingSizeSer = serial + i;
2380 } else if ((p->header.seq != 0) &&
2381 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2382 conn->lastPacketSize = p->length;
2383 conn->lastPacketSizeSeq = p->header.seq;
2388 MUTEX_EXIT(&conn->conn_data_lock);
2391 /* This stuff should be revamped, I think, so that most, if not
2392 * all, of the header stuff is always added here. We could
2393 * probably do away with the encode/decode routines. XXXXX */
2396 length = RX_HEADER_SIZE;
2397 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2398 wirevec[0].iov_len = RX_HEADER_SIZE;
2399 for (i = 0; i < len; i++) {
2402 /* The whole 3.5 jumbogram scheme relies on packets fitting
2403 * in a single packet buffer. */
2404 if (p->niovecs > 2) {
2405 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2408 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2411 if (p->length != RX_JUMBOBUFFERSIZE) {
2412 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2414 p->header.flags |= RX_JUMBO_PACKET;
2415 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2416 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2418 wirevec[i + 1].iov_len = p->length;
2419 length += p->length;
2421 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2423 /* Convert jumbo packet header to network byte order */
2424 temp = (afs_uint32) (p->header.flags) << 24;
2425 temp |= (afs_uint32) (p->header.spare);
2426 *(afs_uint32 *) jp = htonl(temp);
2428 jp = (struct rx_jumboHeader *)
2429 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2431 /* Stamp each packet with a unique serial number. The serial
2432 * number is maintained on a connection basis because some types
2433 * of security may be based on the serial number of the packet,
2434 * and security is handled on a per authenticated-connection
2436 /* Pre-increment, to guarantee no zero serial number; a zero
2437 * serial number means the packet was never sent. */
2438 p->header.serial = ++serial;
2439 /* This is so we can adjust retransmit time-outs better in the face of
2440 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2442 if (p->firstSerial == 0) {
2443 p->firstSerial = p->header.serial;
2446 /* If an output tracer function is defined, call it with the packet and
2447 * network address. Note this function may modify its arguments. */
2448 if (rx_almostSent) {
2449 int drop = (*rx_almostSent) (p, &addr);
2450 /* drop packet if return value is non-zero? */
2452 deliveryType = 'D'; /* Drop the packet */
2456 /* Get network byte order header */
2457 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2458 * touch ALL the fields */
2461 /* Send the packet out on the same socket that related packets are being
2465 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2468 /* Possibly drop this packet, for testing purposes */
2469 if ((deliveryType == 'D')
2470 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2471 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2472 deliveryType = 'D'; /* Drop the packet */
2474 deliveryType = 'S'; /* Send the packet */
2475 #endif /* RXDEBUG */
2477 /* Loop until the packet is sent. We'd prefer just to use a
2478 * blocking socket, but unfortunately the interface doesn't
2479 * allow us to have the socket block in send mode, and not
2480 * block in receive mode */
2481 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2482 waslocked = ISAFS_GLOCK();
2483 if (!istack && waslocked)
2487 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2489 /* send failed, so let's hurry up the resend, eh? */
2490 if (rx_stats_active)
2491 rx_atomic_inc(&rx_stats.netSendFailures);
2492 for (i = 0; i < len; i++) {
2494 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2496 /* Some systems are nice and tell us right away that we cannot
2497 * reach this recipient by returning an error code.
2498 * So, when this happens let's "down" the host NOW so
2499 * we don't sit around waiting for this host to timeout later.
2502 rxi_NetSendError(call, code);
2505 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2506 if (!istack && waslocked)
2512 osi_Assert(p != NULL);
2514 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2515 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2516 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2517 p->header.seq, p->header.flags, p, p->length));
2520 if (rx_stats_active) {
2521 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2522 MUTEX_ENTER(&peer->peer_lock);
2523 peer->bytesSent += p->length;
2524 MUTEX_EXIT(&peer->peer_lock);
2528 /* Send a raw abort packet, without any call or connection structures */
2530 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2531 afs_int32 error, struct rx_packet *source, int istack)
2533 struct rx_header theader;
2534 struct sockaddr_in addr;
2535 struct iovec iov[2];
2537 memset(&theader, 0, sizeof(theader));
2538 theader.epoch = htonl(source->header.epoch);
2539 theader.callNumber = htonl(source->header.callNumber);
2540 theader.serial = htonl(1);
2541 theader.type = RX_PACKET_TYPE_ABORT;
2542 theader.serviceId = htons(source->header.serviceId);
2543 theader.securityIndex = source->header.securityIndex;
2544 theader.cid = htonl(source->header.cid);
2546 error = htonl(error);
2548 iov[0].iov_base = &theader;
2549 iov[0].iov_len = sizeof(struct rx_header);
2550 iov[1].iov_base = &error;
2551 iov[1].iov_len = sizeof(error);
2553 addr.sin_family = AF_INET;
2554 addr.sin_addr.s_addr = host;
2555 addr.sin_port = port;
2556 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2557 addr.sin_len = sizeof(struct sockaddr_in);
2560 osi_NetSend(socket, &addr, iov, 2,
2561 sizeof(struct rx_header) + sizeof(error), istack);
2564 /* Send a "special" packet to the peer connection. If call is
2565 * specified, then the packet is directed to a specific call channel
2566 * associated with the connection, otherwise it is directed to the
2567 * connection only. Uses optionalPacket if it is supplied, rather than
2568 * allocating a new packet buffer. Nbytes is the length of the data
2569 * portion of the packet. If data is non-null, nbytes of data are
2570 * copied into the packet. Type is the type of the packet, as defined
2571 * in rx.h. Bug: there's a lot of duplication between this and other
2572 * routines. This needs to be cleaned up. */
2574 rxi_SendSpecial(struct rx_call *call,
2575 struct rx_connection *conn,
2576 struct rx_packet *optionalPacket, int type, char *data,
2577 int nbytes, int istack)
2579 /* Some of the following stuff should be common code for all
2580 * packet sends (it's repeated elsewhere) */
2581 struct rx_packet *p;
2583 int savelen = 0, saven = 0;
2584 int channel, callNumber;
2586 channel = call->channel;
2587 callNumber = *call->callNumber;
2588 /* BUSY packets refer to the next call on this connection */
2589 if (type == RX_PACKET_TYPE_BUSY) {
2598 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2600 osi_Panic("rxi_SendSpecial failure");
2607 p->header.serviceId = conn->serviceId;
2608 p->header.securityIndex = conn->securityIndex;
2609 p->header.cid = (conn->cid | channel);
2610 p->header.callNumber = callNumber;
2612 p->header.epoch = conn->epoch;
2613 p->header.type = type;
2614 p->header.flags = 0;
2615 if (conn->type == RX_CLIENT_CONNECTION)
2616 p->header.flags |= RX_CLIENT_INITIATED;
2618 rx_packetwrite(p, 0, nbytes, data);
2620 for (i = 1; i < p->niovecs; i++) {
2621 if (nbytes <= p->wirevec[i].iov_len) {
2622 savelen = p->wirevec[i].iov_len;
2624 p->wirevec[i].iov_len = nbytes;
2625 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2627 nbytes -= p->wirevec[i].iov_len;
2631 rxi_Send(call, p, istack);
2633 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2634 if (saven) { /* means we truncated the packet above. We probably don't */
2635 /* really need to do this, but it seems safer this way, given that */
2636 /* sneaky optionalPacket... */
2637 p->wirevec[i - 1].iov_len = savelen;
2640 if (!optionalPacket)
2642 return optionalPacket;
2646 /* Encode the packet's header (from the struct header in the packet to
2647 * the net byte order representation in the wire representation of the
2648 * packet, which is what is actually sent out on the wire) */
2650 rxi_EncodePacketHeader(struct rx_packet *p)
2652 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2654 memset(buf, 0, RX_HEADER_SIZE);
2655 *buf++ = htonl(p->header.epoch);
2656 *buf++ = htonl(p->header.cid);
2657 *buf++ = htonl(p->header.callNumber);
2658 *buf++ = htonl(p->header.seq);
2659 *buf++ = htonl(p->header.serial);
2660 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2661 | (((afs_uint32) p->header.flags) << 16)
2662 | (p->header.userStatus << 8) | p->header.securityIndex);
2663 /* Note: top 16 bits of this next word were reserved */
2664 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2667 /* Decode the packet's header (from net byte order to a struct header) */
2669 rxi_DecodePacketHeader(struct rx_packet *p)
2671 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2674 p->header.epoch = ntohl(*buf);
2676 p->header.cid = ntohl(*buf);
2678 p->header.callNumber = ntohl(*buf);
2680 p->header.seq = ntohl(*buf);
2682 p->header.serial = ntohl(*buf);
2688 /* C will truncate byte fields to bytes for me */
2689 p->header.type = temp >> 24;
2690 p->header.flags = temp >> 16;
2691 p->header.userStatus = temp >> 8;
2692 p->header.securityIndex = temp >> 0;
2697 p->header.serviceId = (temp & 0xffff);
2698 p->header.spare = temp >> 16;
2699 /* Note: top 16 bits of this last word are the security checksum */
2703 * LOCKS HELD: called with call->lock held.
2705 * PrepareSendPacket is the only place in the code that
2706 * can increment call->tnext. This could become an atomic
2707 * in the future. Beyond that there is nothing in this
2708 * function that requires the call being locked. This
2709 * function can only be called by the application thread.
2712 rxi_PrepareSendPacket(struct rx_call *call,
2713 struct rx_packet *p, int last)
2715 struct rx_connection *conn = call->conn;
2716 afs_uint32 seq = call->tnext++;
2718 afs_int32 len; /* len must be a signed type; it can go negative */
2720 /* No data packets on call 0. Where do these come from? */
2721 if (*call->callNumber == 0)
2722 *call->callNumber = 1;
2724 MUTEX_EXIT(&call->lock);
2725 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2727 p->header.cid = (conn->cid | call->channel);
2728 p->header.serviceId = conn->serviceId;
2729 p->header.securityIndex = conn->securityIndex;
2731 p->header.callNumber = *call->callNumber;
2732 p->header.seq = seq;
2733 p->header.epoch = conn->epoch;
2734 p->header.type = RX_PACKET_TYPE_DATA;
2735 p->header.flags = 0;
2736 p->header.spare = 0;
2737 if (conn->type == RX_CLIENT_CONNECTION)
2738 p->header.flags |= RX_CLIENT_INITIATED;
2741 p->header.flags |= RX_LAST_PACKET;
2743 clock_Zero(&p->firstSent); /* Never yet transmitted */
2744 p->header.serial = 0; /* Another way of saying never transmitted... */
2746 /* Now that we're sure this is the last data on the call, make sure
2747 * that the "length" and the sum of the iov_lens matches. */
2748 len = p->length + call->conn->securityHeaderSize;
2750 for (i = 1; i < p->niovecs && len > 0; i++) {
2751 len -= p->wirevec[i].iov_len;
2754 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2755 } else if (i < p->niovecs) {
2756 /* Free any extra elements in the wirevec */
2757 #if defined(RX_ENABLE_TSFPQ)
2758 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2759 #else /* !RX_ENABLE_TSFPQ */
2760 MUTEX_ENTER(&rx_freePktQ_lock);
2761 rxi_FreeDataBufsNoLock(p, i);
2762 MUTEX_EXIT(&rx_freePktQ_lock);
2763 #endif /* !RX_ENABLE_TSFPQ */
2768 p->wirevec[i - 1].iov_len += len;
2769 MUTEX_ENTER(&call->lock);
2770 RXS_PreparePacket(conn->securityObject, call, p);
2773 /* Given an interface MTU size, calculate an adjusted MTU size that
2774 * will make efficient use of the RX buffers when the peer is sending
2775 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2777 rxi_AdjustIfMTU(int mtu)
2782 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2784 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2785 if (mtu <= adjMTU) {
2792 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2793 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2796 /* Given an interface MTU size, and the peer's advertised max receive
2797 * size, calculate an adjisted maxMTU size that makes efficient use
2798 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2800 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2802 int maxMTU = mtu * rxi_nSendFrags;
2803 maxMTU = MIN(maxMTU, peerMaxMTU);
2804 return rxi_AdjustIfMTU(maxMTU);
2807 /* Given a packet size, figure out how many datagram packet will fit.
2808 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2809 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2810 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2812 rxi_AdjustDgramPackets(int frags, int mtu)
2815 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2818 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2819 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2820 /* subtract the size of the first and last packets */
2821 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2825 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2830 * This function can be used by the Windows Cache Manager
2831 * to dump the list of all rx packets so that we can determine
2832 * where the packet leakage is.
2834 int rx_DumpPackets(FILE *outputFile, char *cookie)
2836 #ifdef RXDEBUG_PACKET
2837 struct rx_packet *p;
2841 #define RXDPRINTF sprintf
2842 #define RXDPRINTOUT output
2844 #define RXDPRINTF fprintf
2845 #define RXDPRINTOUT outputFile
2849 MUTEX_ENTER(&rx_freePktQ_lock);
2850 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2852 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2855 for (p = rx_mallocedP; p; p = p->allNextp) {
2856 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2857 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2858 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2859 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2860 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2861 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2863 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2867 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2869 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2872 MUTEX_EXIT(&rx_freePktQ_lock);
2874 #endif /* RXDEBUG_PACKET */