2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
46 # if defined(AFS_NT40_ENV)
48 # define EWOULDBLOCK WSAEWOULDBLOCK
51 # include "rx_xmit_nt.h"
57 # include <sys/sysmacros.h>
63 #include "rx_packet.h"
64 #include "rx_atomic.h"
65 #include "rx_globals.h"
66 #include "rx_internal.h"
72 /* rxdb_fileID is used to identify the lock location, along with line#. */
73 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
74 #endif /* RX_LOCKS_DB */
75 static struct rx_packet *rx_mallocedP = 0;
77 static afs_uint32 rx_packet_id = 0;
80 extern char cml_version_number[];
82 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
84 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
85 afs_uint32 ahost, short aport,
88 #ifdef RX_ENABLE_TSFPQ
90 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
92 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
97 /* some rules about packets:
98 * 1. When a packet is allocated, the final iov_buf contains room for
99 * a security trailer, but iov_len masks that fact. If the security
100 * package wants to add the trailer, it may do so, and then extend
101 * iov_len appropriately. For this reason, packet's niovecs and
102 * iov_len fields should be accurate before calling PreparePacket.
106 * all packet buffers (iov_base) are integral multiples of
108 * offset is an integral multiple of the word size.
111 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
115 for (l = 0, i = 1; i < packet->niovecs; i++) {
116 if (l + packet->wirevec[i].iov_len > offset) {
118 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
121 l += packet->wirevec[i].iov_len;
128 * all packet buffers (iov_base) are integral multiples of the word size.
129 * offset is an integral multiple of the word size.
132 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
136 for (l = 0, i = 1; i < packet->niovecs; i++) {
137 if (l + packet->wirevec[i].iov_len > offset) {
138 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 (offset - l))) = data;
142 l += packet->wirevec[i].iov_len;
149 * all packet buffers (iov_base) are integral multiples of the
151 * offset is an integral multiple of the word size.
153 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
156 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
159 unsigned int i, j, l, r;
160 for (l = 0, i = 1; i < packet->niovecs; i++) {
161 if (l + packet->wirevec[i].iov_len > offset) {
164 l += packet->wirevec[i].iov_len;
167 /* i is the iovec which contains the first little bit of data in which we
168 * are interested. l is the total length of everything prior to this iovec.
169 * j is the number of bytes we can safely copy out of this iovec.
170 * offset only applies to the first iovec.
173 while ((r > 0) && (i < packet->niovecs)) {
174 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
175 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
178 l += packet->wirevec[i].iov_len;
183 return (r ? (resid - r) : resid);
188 * all packet buffers (iov_base) are integral multiples of the
190 * offset is an integral multiple of the word size.
193 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
195 unsigned int i, j, l, o, r;
198 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
199 if (l + packet->wirevec[i].iov_len > o) {
202 l += packet->wirevec[i].iov_len;
205 /* i is the iovec which contains the first little bit of data in which we
206 * are interested. l is the total length of everything prior to this iovec.
207 * j is the number of bytes we can safely copy out of this iovec.
208 * offset only applies to the first iovec.
211 while ((r > 0) && (i <= RX_MAXWVECS)) {
212 if (i >= packet->niovecs)
213 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
216 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
217 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
221 l += packet->wirevec[i].iov_len;
226 return (r ? (resid - r) : resid);
230 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
232 struct rx_packet *p, *np;
234 num_pkts = AllocPacketBufs(class, num_pkts, q);
236 for (queue_Scan(q, p, np, rx_packet)) {
237 RX_PACKET_IOV_FULLINIT(p);
243 #ifdef RX_ENABLE_TSFPQ
245 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
247 struct rx_ts_info_t * rx_ts_info;
251 RX_TS_INFO_GET(rx_ts_info);
253 transfer = num_pkts - rx_ts_info->_FPQ.len;
256 MUTEX_ENTER(&rx_freePktQ_lock);
257 transfer = MAX(transfer, rx_TSFPQGlobSize);
258 if (transfer > rx_nFreePackets) {
259 /* alloc enough for us, plus a few globs for other threads */
260 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
263 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
265 MUTEX_EXIT(&rx_freePktQ_lock);
269 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
273 #else /* RX_ENABLE_TSFPQ */
275 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
286 MUTEX_ENTER(&rx_freePktQ_lock);
289 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
290 num_pkts--, overq++);
293 rxi_NeedMorePackets = TRUE;
294 if (rx_stats_active) {
296 case RX_PACKET_CLASS_RECEIVE:
297 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
299 case RX_PACKET_CLASS_SEND:
300 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
302 case RX_PACKET_CLASS_SPECIAL:
303 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
305 case RX_PACKET_CLASS_RECV_CBUF:
306 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
308 case RX_PACKET_CLASS_SEND_CBUF:
309 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
315 if (rx_nFreePackets < num_pkts)
316 num_pkts = rx_nFreePackets;
319 rxi_NeedMorePackets = TRUE;
323 if (rx_nFreePackets < num_pkts) {
324 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
328 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
330 i++, c=queue_Next(c, rx_packet)) {
334 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
336 rx_nFreePackets -= num_pkts;
341 MUTEX_EXIT(&rx_freePktQ_lock);
346 #endif /* RX_ENABLE_TSFPQ */
349 * Free a packet currently used as a continuation buffer
351 #ifdef RX_ENABLE_TSFPQ
352 /* num_pkts=0 means queue length is unknown */
354 rxi_FreePackets(int num_pkts, struct rx_queue * q)
356 struct rx_ts_info_t * rx_ts_info;
357 struct rx_packet *c, *nc;
360 osi_Assert(num_pkts >= 0);
361 RX_TS_INFO_GET(rx_ts_info);
364 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
365 rxi_FreeDataBufsTSFPQ(c, 2, 0);
368 for (queue_Scan(q, c, nc, rx_packet)) {
369 rxi_FreeDataBufsTSFPQ(c, 2, 0);
374 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
377 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
379 MUTEX_ENTER(&rx_freePktQ_lock);
381 RX_TS_FPQ_LTOG(rx_ts_info);
383 /* Wakeup anyone waiting for packets */
386 MUTEX_EXIT(&rx_freePktQ_lock);
392 #else /* RX_ENABLE_TSFPQ */
393 /* num_pkts=0 means queue length is unknown */
395 rxi_FreePackets(int num_pkts, struct rx_queue *q)
398 struct rx_packet *p, *np;
402 osi_Assert(num_pkts >= 0);
406 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
407 if (p->niovecs > 2) {
408 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
415 for (queue_Scan(q, p, np, rx_packet)) {
416 if (p->niovecs > 2) {
417 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
424 queue_SpliceAppend(q, &cbs);
430 MUTEX_ENTER(&rx_freePktQ_lock);
432 queue_SpliceAppend(&rx_freePacketQueue, q);
433 rx_nFreePackets += qlen;
435 /* Wakeup anyone waiting for packets */
438 MUTEX_EXIT(&rx_freePktQ_lock);
443 #endif /* RX_ENABLE_TSFPQ */
445 /* this one is kind of awful.
446 * In rxkad, the packet has been all shortened, and everything, ready for
447 * sending. All of a sudden, we discover we need some of that space back.
448 * This isn't terribly general, because it knows that the packets are only
449 * rounded up to the EBS (userdata + security header).
452 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
456 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
457 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
458 p->wirevec[i].iov_len += nb;
462 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
463 p->wirevec[i].iov_len += nb;
471 /* get sufficient space to store nb bytes of data (or more), and hook
472 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
473 * returns the number of bytes >0 which it failed to come up with.
474 * Don't need to worry about locking on packet, since only
475 * one thread can manipulate one at a time. Locking on continution
476 * packets is handled by AllocPacketBufs */
477 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
479 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
483 struct rx_packet *cb, *ncb;
485 /* compute the number of cbuf's we need */
486 nv = nb / RX_CBUFFERSIZE;
487 if ((nv * RX_CBUFFERSIZE) < nb)
489 if ((nv + p->niovecs) > RX_MAXWVECS)
490 nv = RX_MAXWVECS - p->niovecs;
494 /* allocate buffers */
496 nv = AllocPacketBufs(class, nv, &q);
498 /* setup packet iovs */
499 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
501 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
502 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
505 nb -= (nv * RX_CBUFFERSIZE);
506 p->length += (nv * RX_CBUFFERSIZE);
512 /* Add more packet buffers */
513 #ifdef RX_ENABLE_TSFPQ
515 rxi_MorePackets(int apackets)
517 struct rx_packet *p, *e;
518 struct rx_ts_info_t * rx_ts_info;
522 getme = apackets * sizeof(struct rx_packet);
523 p = (struct rx_packet *)osi_Alloc(getme);
526 PIN(p, getme); /* XXXXX */
528 RX_TS_INFO_GET(rx_ts_info);
530 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
531 /* TSFPQ patch also needs to keep track of total packets */
533 MUTEX_ENTER(&rx_packets_mutex);
534 rx_nPackets += apackets;
535 RX_TS_FPQ_COMPUTE_LIMITS;
536 MUTEX_EXIT(&rx_packets_mutex);
538 for (e = p + apackets; p < e; p++) {
539 RX_PACKET_IOV_INIT(p);
542 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
545 MUTEX_ENTER(&rx_freePktQ_lock);
546 #ifdef RXDEBUG_PACKET
547 p->packetId = rx_packet_id++;
548 p->allNextp = rx_mallocedP;
549 #endif /* RXDEBUG_PACKET */
551 MUTEX_EXIT(&rx_freePktQ_lock);
554 rx_ts_info->_FPQ.delta += apackets;
556 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
558 MUTEX_ENTER(&rx_freePktQ_lock);
560 RX_TS_FPQ_LTOG(rx_ts_info);
561 rxi_NeedMorePackets = FALSE;
564 MUTEX_EXIT(&rx_freePktQ_lock);
568 #else /* RX_ENABLE_TSFPQ */
570 rxi_MorePackets(int apackets)
572 struct rx_packet *p, *e;
576 getme = apackets * sizeof(struct rx_packet);
577 p = (struct rx_packet *)osi_Alloc(getme);
580 PIN(p, getme); /* XXXXX */
583 MUTEX_ENTER(&rx_freePktQ_lock);
585 for (e = p + apackets; p < e; p++) {
586 RX_PACKET_IOV_INIT(p);
587 #ifdef RX_TRACK_PACKETS
588 p->flags |= RX_PKTFLAG_FREE;
592 queue_Append(&rx_freePacketQueue, p);
593 #ifdef RXDEBUG_PACKET
594 p->packetId = rx_packet_id++;
595 p->allNextp = rx_mallocedP;
596 #endif /* RXDEBUG_PACKET */
600 rx_nPackets += apackets;
601 rx_nFreePackets += apackets;
602 rxi_NeedMorePackets = FALSE;
605 MUTEX_EXIT(&rx_freePktQ_lock);
608 #endif /* RX_ENABLE_TSFPQ */
610 #ifdef RX_ENABLE_TSFPQ
612 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
614 struct rx_packet *p, *e;
615 struct rx_ts_info_t * rx_ts_info;
619 getme = apackets * sizeof(struct rx_packet);
620 p = (struct rx_packet *)osi_Alloc(getme);
622 PIN(p, getme); /* XXXXX */
624 RX_TS_INFO_GET(rx_ts_info);
626 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
627 /* TSFPQ patch also needs to keep track of total packets */
628 MUTEX_ENTER(&rx_packets_mutex);
629 rx_nPackets += apackets;
630 RX_TS_FPQ_COMPUTE_LIMITS;
631 MUTEX_EXIT(&rx_packets_mutex);
633 for (e = p + apackets; p < e; p++) {
634 RX_PACKET_IOV_INIT(p);
636 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
639 MUTEX_ENTER(&rx_freePktQ_lock);
640 #ifdef RXDEBUG_PACKET
641 p->packetId = rx_packet_id++;
642 p->allNextp = rx_mallocedP;
643 #endif /* RXDEBUG_PACKET */
645 MUTEX_EXIT(&rx_freePktQ_lock);
648 rx_ts_info->_FPQ.delta += apackets;
651 (num_keep_local < apackets)) {
653 MUTEX_ENTER(&rx_freePktQ_lock);
655 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
656 rxi_NeedMorePackets = FALSE;
659 MUTEX_EXIT(&rx_freePktQ_lock);
663 #endif /* RX_ENABLE_TSFPQ */
666 /* Add more packet buffers */
668 rxi_MorePacketsNoLock(int apackets)
670 #ifdef RX_ENABLE_TSFPQ
671 struct rx_ts_info_t * rx_ts_info;
672 #endif /* RX_ENABLE_TSFPQ */
673 struct rx_packet *p, *e;
676 /* allocate enough packets that 1/4 of the packets will be able
677 * to hold maximal amounts of data */
678 apackets += (apackets / 4)
679 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
681 getme = apackets * sizeof(struct rx_packet);
682 p = (struct rx_packet *)osi_Alloc(getme);
684 apackets -= apackets / 4;
685 osi_Assert(apackets > 0);
690 #ifdef RX_ENABLE_TSFPQ
691 RX_TS_INFO_GET(rx_ts_info);
692 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
693 #endif /* RX_ENABLE_TSFPQ */
695 for (e = p + apackets; p < e; p++) {
696 RX_PACKET_IOV_INIT(p);
697 #ifdef RX_TRACK_PACKETS
698 p->flags |= RX_PKTFLAG_FREE;
702 queue_Append(&rx_freePacketQueue, p);
703 #ifdef RXDEBUG_PACKET
704 p->packetId = rx_packet_id++;
705 p->allNextp = rx_mallocedP;
706 #endif /* RXDEBUG_PACKET */
710 rx_nFreePackets += apackets;
711 MUTEX_ENTER(&rx_packets_mutex);
712 rx_nPackets += apackets;
713 #ifdef RX_ENABLE_TSFPQ
714 RX_TS_FPQ_COMPUTE_LIMITS;
715 #endif /* RX_ENABLE_TSFPQ */
716 MUTEX_EXIT(&rx_packets_mutex);
717 rxi_NeedMorePackets = FALSE;
723 rxi_FreeAllPackets(void)
725 /* must be called at proper interrupt level, etcetera */
726 /* MTUXXX need to free all Packets */
727 osi_Free(rx_mallocedP,
728 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
729 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
732 #ifdef RX_ENABLE_TSFPQ
734 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
736 struct rx_ts_info_t * rx_ts_info;
740 RX_TS_INFO_GET(rx_ts_info);
742 if (num_keep_local != rx_ts_info->_FPQ.len) {
744 MUTEX_ENTER(&rx_freePktQ_lock);
745 if (num_keep_local < rx_ts_info->_FPQ.len) {
746 xfer = rx_ts_info->_FPQ.len - num_keep_local;
747 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
750 xfer = num_keep_local - rx_ts_info->_FPQ.len;
751 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
752 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
753 if (rx_nFreePackets < xfer) {
754 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
756 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
758 MUTEX_EXIT(&rx_freePktQ_lock);
764 rxi_FlushLocalPacketsTSFPQ(void)
766 rxi_AdjustLocalPacketsTSFPQ(0, 0);
768 #endif /* RX_ENABLE_TSFPQ */
770 /* Allocate more packets iff we need more continuation buffers */
771 /* In kernel, can't page in memory with interrupts disabled, so we
772 * don't use the event mechanism. */
774 rx_CheckPackets(void)
776 if (rxi_NeedMorePackets) {
777 rxi_MorePackets(rx_maxSendWindow);
781 /* In the packet freeing routine below, the assumption is that
782 we want all of the packets to be used equally frequently, so that we
783 don't get packet buffers paging out. It would be just as valid to
784 assume that we DO want them to page out if not many are being used.
785 In any event, we assume the former, and append the packets to the end
787 /* This explanation is bogus. The free list doesn't remain in any kind of
788 useful order for afs_int32: the packets in use get pretty much randomly scattered
789 across all the pages. In order to permit unused {packets,bufs} to page out, they
790 must be stored so that packets which are adjacent in memory are adjacent in the
791 free list. An array springs rapidly to mind.
794 /* Actually free the packet p. */
795 #ifdef RX_ENABLE_TSFPQ
797 rxi_FreePacketNoLock(struct rx_packet *p)
799 struct rx_ts_info_t * rx_ts_info;
800 dpf(("Free %"AFS_PTR_FMT"\n", p));
802 RX_TS_INFO_GET(rx_ts_info);
803 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
804 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
805 RX_TS_FPQ_LTOG(rx_ts_info);
808 #else /* RX_ENABLE_TSFPQ */
810 rxi_FreePacketNoLock(struct rx_packet *p)
812 dpf(("Free %"AFS_PTR_FMT"\n", p));
816 queue_Append(&rx_freePacketQueue, p);
818 #endif /* RX_ENABLE_TSFPQ */
820 #ifdef RX_ENABLE_TSFPQ
822 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
824 struct rx_ts_info_t * rx_ts_info;
825 dpf(("Free %"AFS_PTR_FMT"\n", p));
827 RX_TS_INFO_GET(rx_ts_info);
828 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
830 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
832 MUTEX_ENTER(&rx_freePktQ_lock);
834 RX_TS_FPQ_LTOG(rx_ts_info);
836 /* Wakeup anyone waiting for packets */
839 MUTEX_EXIT(&rx_freePktQ_lock);
843 #endif /* RX_ENABLE_TSFPQ */
846 * free continuation buffers off a packet into a queue
848 * [IN] p -- packet from which continuation buffers will be freed
849 * [IN] first -- iovec offset of first continuation buffer to free
850 * [IN] q -- queue into which continuation buffers will be chained
853 * number of continuation buffers freed
855 #ifndef RX_ENABLE_TSFPQ
857 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
860 struct rx_packet * cb;
863 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
864 iov = &p->wirevec[first];
866 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
867 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
868 RX_FPQ_MARK_FREE(cb);
879 * free packet continuation buffers into the global free packet pool
881 * [IN] p -- packet from which to free continuation buffers
882 * [IN] first -- iovec offset of first continuation buffer to free
888 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
892 for (first = MAX(2, first); first < p->niovecs; first++) {
893 iov = &p->wirevec[first];
895 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
896 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
904 #ifdef RX_ENABLE_TSFPQ
906 * free packet continuation buffers into the thread-local free pool
908 * [IN] p -- packet from which continuation buffers will be freed
909 * [IN] first -- iovec offset of first continuation buffer to free
910 * any value less than 2, the min number of iovecs,
911 * is treated as if it is 2.
912 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
913 * global free pool before returning
919 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
922 struct rx_ts_info_t * rx_ts_info;
924 RX_TS_INFO_GET(rx_ts_info);
926 for (first = MAX(2, first); first < p->niovecs; first++) {
927 iov = &p->wirevec[first];
929 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
930 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
935 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
937 MUTEX_ENTER(&rx_freePktQ_lock);
939 RX_TS_FPQ_LTOG(rx_ts_info);
941 /* Wakeup anyone waiting for packets */
944 MUTEX_EXIT(&rx_freePktQ_lock);
949 #endif /* RX_ENABLE_TSFPQ */
951 int rxi_nBadIovecs = 0;
953 /* rxi_RestoreDataBufs
955 * Restore the correct sizes to the iovecs. Called when reusing a packet
956 * for reading off the wire.
959 rxi_RestoreDataBufs(struct rx_packet *p)
962 struct iovec *iov = &p->wirevec[2];
964 RX_PACKET_IOV_INIT(p);
966 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
967 if (!iov->iov_base) {
972 iov->iov_len = RX_CBUFFERSIZE;
976 #ifdef RX_ENABLE_TSFPQ
978 rxi_TrimDataBufs(struct rx_packet *p, int first)
981 struct iovec *iov, *end;
982 struct rx_ts_info_t * rx_ts_info;
986 osi_Panic("TrimDataBufs 1: first must be 1");
988 /* Skip over continuation buffers containing message data */
989 iov = &p->wirevec[2];
990 end = iov + (p->niovecs - 2);
991 length = p->length - p->wirevec[1].iov_len;
992 for (; iov < end && length > 0; iov++) {
994 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
995 length -= iov->iov_len;
998 /* iov now points to the first empty data buffer. */
1002 RX_TS_INFO_GET(rx_ts_info);
1003 for (; iov < end; iov++) {
1005 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1006 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1009 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1011 MUTEX_ENTER(&rx_freePktQ_lock);
1013 RX_TS_FPQ_LTOG(rx_ts_info);
1014 rxi_PacketsUnWait();
1016 MUTEX_EXIT(&rx_freePktQ_lock);
1022 #else /* RX_ENABLE_TSFPQ */
1024 rxi_TrimDataBufs(struct rx_packet *p, int first)
1027 struct iovec *iov, *end;
1031 osi_Panic("TrimDataBufs 1: first must be 1");
1033 /* Skip over continuation buffers containing message data */
1034 iov = &p->wirevec[2];
1035 end = iov + (p->niovecs - 2);
1036 length = p->length - p->wirevec[1].iov_len;
1037 for (; iov < end && length > 0; iov++) {
1039 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1040 length -= iov->iov_len;
1043 /* iov now points to the first empty data buffer. */
1048 MUTEX_ENTER(&rx_freePktQ_lock);
1050 for (; iov < end; iov++) {
1052 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1053 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1056 rxi_PacketsUnWait();
1058 MUTEX_EXIT(&rx_freePktQ_lock);
1063 #endif /* RX_ENABLE_TSFPQ */
1065 /* Free the packet p. P is assumed not to be on any queue, i.e.
1066 * remove it yourself first if you call this routine. */
1067 #ifdef RX_ENABLE_TSFPQ
1069 rxi_FreePacket(struct rx_packet *p)
1071 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1072 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1074 #else /* RX_ENABLE_TSFPQ */
1076 rxi_FreePacket(struct rx_packet *p)
1081 MUTEX_ENTER(&rx_freePktQ_lock);
1083 rxi_FreeDataBufsNoLock(p, 2);
1084 rxi_FreePacketNoLock(p);
1085 /* Wakeup anyone waiting for packets */
1086 rxi_PacketsUnWait();
1088 MUTEX_EXIT(&rx_freePktQ_lock);
1091 #endif /* RX_ENABLE_TSFPQ */
1093 /* rxi_AllocPacket sets up p->length so it reflects the number of
1094 * bytes in the packet at this point, **not including** the header.
1095 * The header is absolutely necessary, besides, this is the way the
1096 * length field is usually used */
1097 #ifdef RX_ENABLE_TSFPQ
1099 rxi_AllocPacketNoLock(int class)
1101 struct rx_packet *p;
1102 struct rx_ts_info_t * rx_ts_info;
1104 RX_TS_INFO_GET(rx_ts_info);
1107 if (rxi_OverQuota(class)) {
1108 rxi_NeedMorePackets = TRUE;
1109 if (rx_stats_active) {
1111 case RX_PACKET_CLASS_RECEIVE:
1112 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1114 case RX_PACKET_CLASS_SEND:
1115 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1117 case RX_PACKET_CLASS_SPECIAL:
1118 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1120 case RX_PACKET_CLASS_RECV_CBUF:
1121 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1123 case RX_PACKET_CLASS_SEND_CBUF:
1124 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1128 return (struct rx_packet *)0;
1132 if (rx_stats_active)
1133 rx_atomic_inc(&rx_stats.packetRequests);
1134 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1137 if (queue_IsEmpty(&rx_freePacketQueue))
1138 osi_Panic("rxi_AllocPacket error");
1140 if (queue_IsEmpty(&rx_freePacketQueue))
1141 rxi_MorePacketsNoLock(rx_maxSendWindow);
1145 RX_TS_FPQ_GTOL(rx_ts_info);
1148 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1150 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1153 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1154 * order to truncate outbound packets. In the near future, may need
1155 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1157 RX_PACKET_IOV_FULLINIT(p);
1160 #else /* RX_ENABLE_TSFPQ */
1162 rxi_AllocPacketNoLock(int class)
1164 struct rx_packet *p;
1167 if (rxi_OverQuota(class)) {
1168 rxi_NeedMorePackets = TRUE;
1169 if (rx_stats_active) {
1171 case RX_PACKET_CLASS_RECEIVE:
1172 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1174 case RX_PACKET_CLASS_SEND:
1175 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1177 case RX_PACKET_CLASS_SPECIAL:
1178 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1180 case RX_PACKET_CLASS_RECV_CBUF:
1181 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1183 case RX_PACKET_CLASS_SEND_CBUF:
1184 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1188 return (struct rx_packet *)0;
1192 if (rx_stats_active)
1193 rx_atomic_inc(&rx_stats.packetRequests);
1196 if (queue_IsEmpty(&rx_freePacketQueue))
1197 osi_Panic("rxi_AllocPacket error");
1199 if (queue_IsEmpty(&rx_freePacketQueue))
1200 rxi_MorePacketsNoLock(rx_maxSendWindow);
1204 p = queue_First(&rx_freePacketQueue, rx_packet);
1206 RX_FPQ_MARK_USED(p);
1208 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1211 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1212 * order to truncate outbound packets. In the near future, may need
1213 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1215 RX_PACKET_IOV_FULLINIT(p);
1218 #endif /* RX_ENABLE_TSFPQ */
1220 #ifdef RX_ENABLE_TSFPQ
1222 rxi_AllocPacketTSFPQ(int class, int pull_global)
1224 struct rx_packet *p;
1225 struct rx_ts_info_t * rx_ts_info;
1227 RX_TS_INFO_GET(rx_ts_info);
1229 if (rx_stats_active)
1230 rx_atomic_inc(&rx_stats.packetRequests);
1231 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1232 MUTEX_ENTER(&rx_freePktQ_lock);
1234 if (queue_IsEmpty(&rx_freePacketQueue))
1235 rxi_MorePacketsNoLock(rx_maxSendWindow);
1237 RX_TS_FPQ_GTOL(rx_ts_info);
1239 MUTEX_EXIT(&rx_freePktQ_lock);
1240 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1244 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1246 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1248 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1249 * order to truncate outbound packets. In the near future, may need
1250 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1252 RX_PACKET_IOV_FULLINIT(p);
1255 #endif /* RX_ENABLE_TSFPQ */
1257 #ifdef RX_ENABLE_TSFPQ
1259 rxi_AllocPacket(int class)
1261 struct rx_packet *p;
1263 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1266 #else /* RX_ENABLE_TSFPQ */
1268 rxi_AllocPacket(int class)
1270 struct rx_packet *p;
1272 MUTEX_ENTER(&rx_freePktQ_lock);
1273 p = rxi_AllocPacketNoLock(class);
1274 MUTEX_EXIT(&rx_freePktQ_lock);
1277 #endif /* RX_ENABLE_TSFPQ */
1279 /* This guy comes up with as many buffers as it {takes,can get} given
1280 * the MTU for this call. It also sets the packet length before
1281 * returning. caution: this is often called at NETPRI
1282 * Called with call locked.
1285 rxi_AllocSendPacket(struct rx_call *call, int want)
1287 struct rx_packet *p = (struct rx_packet *)0;
1292 mud = call->MTU - RX_HEADER_SIZE;
1294 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1295 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1297 #ifdef RX_ENABLE_TSFPQ
1298 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1300 want = MIN(want, mud);
1302 if ((unsigned)want > p->length)
1303 (void)rxi_AllocDataBuf(p, (want - p->length),
1304 RX_PACKET_CLASS_SEND_CBUF);
1306 if (p->length > mud)
1309 if (delta >= p->length) {
1317 #endif /* RX_ENABLE_TSFPQ */
1319 while (!(call->error)) {
1320 MUTEX_ENTER(&rx_freePktQ_lock);
1321 /* if an error occurred, or we get the packet we want, we're done */
1322 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1323 MUTEX_EXIT(&rx_freePktQ_lock);
1326 want = MIN(want, mud);
1328 if ((unsigned)want > p->length)
1329 (void)rxi_AllocDataBuf(p, (want - p->length),
1330 RX_PACKET_CLASS_SEND_CBUF);
1332 if (p->length > mud)
1335 if (delta >= p->length) {
1344 /* no error occurred, and we didn't get a packet, so we sleep.
1345 * At this point, we assume that packets will be returned
1346 * sooner or later, as packets are acknowledged, and so we
1349 call->flags |= RX_CALL_WAIT_PACKETS;
1350 MUTEX_ENTER(&rx_refcnt_mutex);
1351 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1352 MUTEX_EXIT(&rx_refcnt_mutex);
1353 MUTEX_EXIT(&call->lock);
1354 rx_waitingForPackets = 1;
1356 #ifdef RX_ENABLE_LOCKS
1357 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1359 osi_rxSleep(&rx_waitingForPackets);
1361 MUTEX_EXIT(&rx_freePktQ_lock);
1362 MUTEX_ENTER(&call->lock);
1363 MUTEX_ENTER(&rx_refcnt_mutex);
1364 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1365 MUTEX_EXIT(&rx_refcnt_mutex);
1366 call->flags &= ~RX_CALL_WAIT_PACKETS;
1375 /* Windows does not use file descriptors. */
1376 #define CountFDs(amax) 0
1378 /* count the number of used FDs */
1387 for (i = 0; i < amax; i++) {
1388 code = fstat(i, &tstat);
1394 #endif /* AFS_NT40_ENV */
1397 #define CountFDs(amax) amax
1401 #if !defined(KERNEL) || defined(UKERNEL)
1403 /* This function reads a single packet from the interface into the
1404 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1405 * (host,port) of the sender are stored in the supplied variables, and
1406 * the data length of the packet is stored in the packet structure.
1407 * The header is decoded. */
1409 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1412 struct sockaddr_in from;
1413 unsigned int nbytes;
1415 afs_uint32 tlen, savelen;
1417 rx_computelen(p, tlen);
1418 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1420 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1421 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1422 * it once in order to avoid races. */
1425 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1433 /* Extend the last iovec for padding, it's just to make sure that the
1434 * read doesn't return more data than we expect, and is done to get around
1435 * our problems caused by the lack of a length field in the rx header.
1436 * Use the extra buffer that follows the localdata in each packet
1438 savelen = p->wirevec[p->niovecs - 1].iov_len;
1439 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1441 memset(&msg, 0, sizeof(msg));
1442 msg.msg_name = (char *)&from;
1443 msg.msg_namelen = sizeof(struct sockaddr_in);
1444 msg.msg_iov = p->wirevec;
1445 msg.msg_iovlen = p->niovecs;
1446 nbytes = rxi_Recvmsg(socket, &msg, 0);
1448 /* restore the vec to its correct state */
1449 p->wirevec[p->niovecs - 1].iov_len = savelen;
1451 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1452 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1453 if (nbytes < 0 && errno == EWOULDBLOCK) {
1454 if (rx_stats_active)
1455 rx_atomic_inc(&rx_stats.noPacketOnRead);
1456 } else if (nbytes <= 0) {
1457 if (rx_stats_active) {
1458 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1459 rx_stats.bogusHost = from.sin_addr.s_addr;
1461 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1462 ntohs(from.sin_port), nbytes));
1467 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1468 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1469 rxi_DecodePacketHeader(p);
1471 *host = from.sin_addr.s_addr;
1472 *port = from.sin_port;
1474 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1475 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1476 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1478 #ifdef RX_TRIMDATABUFS
1479 rxi_TrimDataBufs(p, 1);
1485 /* Extract packet header. */
1486 rxi_DecodePacketHeader(p);
1488 *host = from.sin_addr.s_addr;
1489 *port = from.sin_port;
1490 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1491 if (rx_stats_active) {
1492 struct rx_peer *peer;
1493 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1495 * Try to look up this peer structure. If it doesn't exist,
1496 * don't create a new one -
1497 * we don't keep count of the bytes sent/received if a peer
1498 * structure doesn't already exist.
1500 * The peer/connection cleanup code assumes that there is 1 peer
1501 * per connection. If we actually created a peer structure here
1502 * and this packet was an rxdebug packet, the peer structure would
1503 * never be cleaned up.
1505 peer = rxi_FindPeer(*host, *port, 0, 0);
1506 /* Since this may not be associated with a connection,
1507 * it may have no refCount, meaning we could race with
1510 if (peer && (peer->refCount > 0)) {
1511 MUTEX_ENTER(&peer->peer_lock);
1512 hadd32(peer->bytesReceived, p->length);
1513 MUTEX_EXIT(&peer->peer_lock);
1518 #ifdef RX_TRIMDATABUFS
1519 /* Free any empty packet buffers at the end of this packet */
1520 rxi_TrimDataBufs(p, 1);
1526 #endif /* !KERNEL || UKERNEL */
1528 /* This function splits off the first packet in a jumbo packet.
1529 * As of AFS 3.5, jumbograms contain more than one fixed size
1530 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1531 * last packet header. All packets (except the last) are padded to
1532 * fall on RX_CBUFFERSIZE boundaries.
1533 * HACK: We store the length of the first n-1 packets in the
1534 * last two pad bytes. */
1537 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1540 struct rx_packet *np;
1541 struct rx_jumboHeader *jp;
1547 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1548 * bytes in length. All but the first packet are preceded by
1549 * an abbreviated four byte header. The length of the last packet
1550 * is calculated from the size of the jumbogram. */
1551 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1553 if ((int)p->length < length) {
1554 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1557 niov = p->niovecs - 2;
1559 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1562 iov = &p->wirevec[2];
1563 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1565 /* Get a pointer to the abbreviated packet header */
1566 jp = (struct rx_jumboHeader *)
1567 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1569 /* Set up the iovecs for the next packet */
1570 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1571 np->wirevec[0].iov_len = sizeof(struct rx_header);
1572 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1573 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1574 np->niovecs = niov + 1;
1575 for (i = 2, iov++; i <= niov; i++, iov++) {
1576 np->wirevec[i] = *iov;
1578 np->length = p->length - length;
1579 p->length = RX_JUMBOBUFFERSIZE;
1582 /* Convert the jumbo packet header to host byte order */
1583 temp = ntohl(*(afs_uint32 *) jp);
1584 jp->flags = (u_char) (temp >> 24);
1585 jp->cksum = (u_short) (temp);
1587 /* Fill in the packet header */
1588 np->header = p->header;
1589 np->header.serial = p->header.serial + 1;
1590 np->header.seq = p->header.seq + 1;
1591 np->header.flags = jp->flags;
1592 np->header.spare = jp->cksum;
1598 /* Send a udp datagram */
1600 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1601 int length, int istack)
1606 memset(&msg, 0, sizeof(msg));
1608 msg.msg_iovlen = nvecs;
1609 msg.msg_name = addr;
1610 msg.msg_namelen = sizeof(struct sockaddr_in);
1612 ret = rxi_Sendmsg(socket, &msg, 0);
1616 #elif !defined(UKERNEL)
1618 * message receipt is done in rxk_input or rx_put.
1621 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1623 * Copy an mblock to the contiguous area pointed to by cp.
1624 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1625 * but it doesn't really.
1626 * Returns the number of bytes not transferred.
1627 * The message is NOT changed.
1630 cpytoc(mblk_t * mp, int off, int len, char *cp)
1634 for (; mp && len > 0; mp = mp->b_cont) {
1635 if (mp->b_datap->db_type != M_DATA) {
1638 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1639 memcpy(cp, (char *)mp->b_rptr, n);
1647 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1648 * but it doesn't really.
1649 * This sucks, anyway, do it like m_cpy.... below
1652 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1657 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1658 if (mp->b_datap->db_type != M_DATA) {
1661 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1667 t = iovs[i].iov_len;
1670 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1680 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1681 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1683 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1685 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1688 unsigned int l1, l2, i, t;
1690 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1691 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1694 if (m->m_len <= off) {
1704 p1 = mtod(m, caddr_t) + off;
1705 l1 = m->m_len - off;
1707 p2 = iovs[0].iov_base;
1708 l2 = iovs[0].iov_len;
1711 t = MIN(l1, MIN(l2, (unsigned int)len));
1722 p1 = mtod(m, caddr_t);
1728 p2 = iovs[i].iov_base;
1729 l2 = iovs[i].iov_len;
1737 #endif /* AFS_SUN5_ENV */
1739 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1740 #if defined(AFS_NBSD_ENV)
1742 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1745 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1746 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1752 struct rx_packet *phandle;
1753 int hdr_len, data_len;
1754 #endif /* AFS_NBSD_ENV */
1759 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1766 #endif /*KERNEL && !UKERNEL */
1769 /* send a response to a debug packet */
1772 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1773 afs_uint32 ahost, short aport, int istack)
1775 struct rx_debugIn tin;
1777 struct rx_serverQueueEntry *np, *nqe;
1780 * Only respond to client-initiated Rx debug packets,
1781 * and clear the client flag in the response.
1783 if (ap->header.flags & RX_CLIENT_INITIATED) {
1784 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1785 rxi_EncodePacketHeader(ap);
1790 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1791 /* all done with packet, now set length to the truth, so we can
1792 * reuse this packet */
1793 rx_computelen(ap, ap->length);
1795 tin.type = ntohl(tin.type);
1796 tin.index = ntohl(tin.index);
1798 case RX_DEBUGI_GETSTATS:{
1799 struct rx_debugStats tstat;
1801 /* get basic stats */
1802 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1803 tstat.version = RX_DEBUGI_VERSION;
1804 #ifndef RX_ENABLE_LOCKS
1805 tstat.waitingForPackets = rx_waitingForPackets;
1807 MUTEX_ENTER(&rx_serverPool_lock);
1808 tstat.nFreePackets = htonl(rx_nFreePackets);
1809 tstat.nPackets = htonl(rx_nPackets);
1810 tstat.callsExecuted = htonl(rxi_nCalls);
1811 tstat.packetReclaims = htonl(rx_packetReclaims);
1812 tstat.usedFDs = CountFDs(64);
1813 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1814 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1815 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1817 MUTEX_EXIT(&rx_serverPool_lock);
1818 tstat.idleThreads = htonl(tstat.idleThreads);
1819 tl = sizeof(struct rx_debugStats) - ap->length;
1821 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1824 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1826 ap->length = sizeof(struct rx_debugStats);
1827 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1828 rx_computelen(ap, ap->length);
1833 case RX_DEBUGI_GETALLCONN:
1834 case RX_DEBUGI_GETCONN:{
1836 struct rx_connection *tc;
1837 struct rx_call *tcall;
1838 struct rx_debugConn tconn;
1839 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1842 tl = sizeof(struct rx_debugConn) - ap->length;
1844 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1848 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1849 /* get N'th (maybe) "interesting" connection info */
1850 for (i = 0; i < rx_hashTableSize; i++) {
1851 #if !defined(KERNEL)
1852 /* the time complexity of the algorithm used here
1853 * exponentially increses with the number of connections.
1855 #ifdef AFS_PTHREAD_ENV
1861 MUTEX_ENTER(&rx_connHashTable_lock);
1862 /* We might be slightly out of step since we are not
1863 * locking each call, but this is only debugging output.
1865 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1866 if ((all || rxi_IsConnInteresting(tc))
1867 && tin.index-- <= 0) {
1868 tconn.host = tc->peer->host;
1869 tconn.port = tc->peer->port;
1870 tconn.cid = htonl(tc->cid);
1871 tconn.epoch = htonl(tc->epoch);
1872 tconn.serial = htonl(tc->serial);
1873 for (j = 0; j < RX_MAXCALLS; j++) {
1874 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1875 if ((tcall = tc->call[j])) {
1876 tconn.callState[j] = tcall->state;
1877 tconn.callMode[j] = tcall->mode;
1878 tconn.callFlags[j] = tcall->flags;
1879 if (queue_IsNotEmpty(&tcall->rq))
1880 tconn.callOther[j] |= RX_OTHER_IN;
1881 if (queue_IsNotEmpty(&tcall->tq))
1882 tconn.callOther[j] |= RX_OTHER_OUT;
1884 tconn.callState[j] = RX_STATE_NOTINIT;
1887 tconn.natMTU = htonl(tc->peer->natMTU);
1888 tconn.error = htonl(tc->error);
1889 tconn.flags = tc->flags;
1890 tconn.type = tc->type;
1891 tconn.securityIndex = tc->securityIndex;
1892 if (tc->securityObject) {
1893 RXS_GetStats(tc->securityObject, tc,
1895 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1896 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1899 DOHTONL(packetsReceived);
1900 DOHTONL(packetsSent);
1901 DOHTONL(bytesReceived);
1905 sizeof(tconn.secStats.spares) /
1910 sizeof(tconn.secStats.sparel) /
1911 sizeof(afs_int32); i++)
1915 MUTEX_EXIT(&rx_connHashTable_lock);
1916 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1919 ap->length = sizeof(struct rx_debugConn);
1920 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1926 MUTEX_EXIT(&rx_connHashTable_lock);
1928 /* if we make it here, there are no interesting packets */
1929 tconn.cid = htonl(0xffffffff); /* means end */
1930 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1933 ap->length = sizeof(struct rx_debugConn);
1934 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1940 * Pass back all the peer structures we have available
1943 case RX_DEBUGI_GETPEER:{
1946 struct rx_debugPeer tpeer;
1949 tl = sizeof(struct rx_debugPeer) - ap->length;
1951 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1955 memset(&tpeer, 0, sizeof(tpeer));
1956 for (i = 0; i < rx_hashTableSize; i++) {
1957 #if !defined(KERNEL)
1958 /* the time complexity of the algorithm used here
1959 * exponentially increses with the number of peers.
1961 * Yielding after processing each hash table entry
1962 * and dropping rx_peerHashTable_lock.
1963 * also increases the risk that we will miss a new
1964 * entry - but we are willing to live with this
1965 * limitation since this is meant for debugging only
1967 #ifdef AFS_PTHREAD_ENV
1973 MUTEX_ENTER(&rx_peerHashTable_lock);
1974 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1975 if (tin.index-- <= 0) {
1977 MUTEX_EXIT(&rx_peerHashTable_lock);
1979 MUTEX_ENTER(&tp->peer_lock);
1980 tpeer.host = tp->host;
1981 tpeer.port = tp->port;
1982 tpeer.ifMTU = htons(tp->ifMTU);
1983 tpeer.idleWhen = htonl(tp->idleWhen);
1984 tpeer.refCount = htons(tp->refCount);
1985 tpeer.burstSize = tp->burstSize;
1986 tpeer.burst = tp->burst;
1987 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1988 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1989 tpeer.rtt = htonl(tp->rtt);
1990 tpeer.rtt_dev = htonl(tp->rtt_dev);
1991 tpeer.nSent = htonl(tp->nSent);
1992 tpeer.reSends = htonl(tp->reSends);
1993 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1994 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1995 tpeer.natMTU = htons(tp->natMTU);
1996 tpeer.maxMTU = htons(tp->maxMTU);
1997 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1998 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1999 tpeer.MTU = htons(tp->MTU);
2000 tpeer.cwind = htons(tp->cwind);
2001 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2002 tpeer.congestSeq = htons(tp->congestSeq);
2003 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2004 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2005 tpeer.bytesReceived.high =
2006 htonl(tp->bytesReceived.high);
2007 tpeer.bytesReceived.low =
2008 htonl(tp->bytesReceived.low);
2009 MUTEX_EXIT(&tp->peer_lock);
2011 MUTEX_ENTER(&rx_peerHashTable_lock);
2013 MUTEX_EXIT(&rx_peerHashTable_lock);
2015 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2018 ap->length = sizeof(struct rx_debugPeer);
2019 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2025 MUTEX_EXIT(&rx_peerHashTable_lock);
2027 /* if we make it here, there are no interesting packets */
2028 tpeer.host = htonl(0xffffffff); /* means end */
2029 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2032 ap->length = sizeof(struct rx_debugPeer);
2033 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2038 case RX_DEBUGI_RXSTATS:{
2042 tl = sizeof(rx_stats) - ap->length;
2044 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2048 /* Since its all int32s convert to network order with a loop. */
2049 if (rx_stats_active)
2050 MUTEX_ENTER(&rx_stats_mutex);
2051 s = (afs_int32 *) & rx_stats;
2052 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2053 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2056 ap->length = sizeof(rx_stats);
2057 if (rx_stats_active)
2058 MUTEX_EXIT(&rx_stats_mutex);
2059 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2065 /* error response packet */
2066 tin.type = htonl(RX_DEBUGI_BADTYPE);
2067 tin.index = tin.type;
2068 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2070 ap->length = sizeof(struct rx_debugIn);
2071 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2079 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2080 afs_uint32 ahost, short aport, int istack)
2085 * Only respond to client-initiated version requests, and
2086 * clear that flag in the response.
2088 if (ap->header.flags & RX_CLIENT_INITIATED) {
2091 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2092 rxi_EncodePacketHeader(ap);
2093 memset(buf, 0, sizeof(buf));
2094 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2095 rx_packetwrite(ap, 0, 65, buf);
2098 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2106 /* send a debug packet back to the sender */
2108 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2109 afs_uint32 ahost, short aport, afs_int32 istack)
2111 struct sockaddr_in taddr;
2112 unsigned int i, nbytes, savelen = 0;
2115 int waslocked = ISAFS_GLOCK();
2118 taddr.sin_family = AF_INET;
2119 taddr.sin_port = aport;
2120 taddr.sin_addr.s_addr = ahost;
2121 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2122 taddr.sin_len = sizeof(struct sockaddr_in);
2125 /* We need to trim the niovecs. */
2126 nbytes = apacket->length;
2127 for (i = 1; i < apacket->niovecs; i++) {
2128 if (nbytes <= apacket->wirevec[i].iov_len) {
2129 savelen = apacket->wirevec[i].iov_len;
2130 saven = apacket->niovecs;
2131 apacket->wirevec[i].iov_len = nbytes;
2132 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2134 nbytes -= apacket->wirevec[i].iov_len;
2137 #ifdef RX_KERNEL_TRACE
2138 if (ICL_SETACTIVE(afs_iclSetp)) {
2141 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2142 "before osi_NetSend()");
2150 /* debug packets are not reliably delivered, hence the cast below. */
2151 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2152 apacket->length + RX_HEADER_SIZE, istack);
2154 #ifdef RX_KERNEL_TRACE
2155 if (ICL_SETACTIVE(afs_iclSetp)) {
2157 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2158 "after osi_NetSend()");
2167 if (saven) { /* means we truncated the packet above. */
2168 apacket->wirevec[i - 1].iov_len = savelen;
2169 apacket->niovecs = saven;
2174 /* Send the packet to appropriate destination for the specified
2175 * call. The header is first encoded and placed in the packet.
2178 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2179 struct rx_packet *p, int istack)
2185 struct sockaddr_in addr;
2186 struct rx_peer *peer = conn->peer;
2189 char deliveryType = 'S';
2191 /* The address we're sending the packet to */
2192 memset(&addr, 0, sizeof(addr));
2193 addr.sin_family = AF_INET;
2194 addr.sin_port = peer->port;
2195 addr.sin_addr.s_addr = peer->host;
2197 /* This stuff should be revamped, I think, so that most, if not
2198 * all, of the header stuff is always added here. We could
2199 * probably do away with the encode/decode routines. XXXXX */
2201 /* Stamp each packet with a unique serial number. The serial
2202 * number is maintained on a connection basis because some types
2203 * of security may be based on the serial number of the packet,
2204 * and security is handled on a per authenticated-connection
2206 /* Pre-increment, to guarantee no zero serial number; a zero
2207 * serial number means the packet was never sent. */
2208 MUTEX_ENTER(&conn->conn_data_lock);
2209 p->header.serial = ++conn->serial;
2210 if (p->length > conn->peer->maxPacketSize) {
2211 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2212 (p->header.flags & RX_REQUEST_ACK)) {
2213 conn->lastPingSize = p->length;
2214 conn->lastPingSizeSer = p->header.serial;
2215 } else if (p->header.seq != 0) {
2216 conn->lastPacketSize = p->length;
2217 conn->lastPacketSizeSeq = p->header.seq;
2220 MUTEX_EXIT(&conn->conn_data_lock);
2221 /* This is so we can adjust retransmit time-outs better in the face of
2222 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2224 if (p->firstSerial == 0) {
2225 p->firstSerial = p->header.serial;
2228 /* If an output tracer function is defined, call it with the packet and
2229 * network address. Note this function may modify its arguments. */
2230 if (rx_almostSent) {
2231 int drop = (*rx_almostSent) (p, &addr);
2232 /* drop packet if return value is non-zero? */
2234 deliveryType = 'D'; /* Drop the packet */
2238 /* Get network byte order header */
2239 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2240 * touch ALL the fields */
2242 /* Send the packet out on the same socket that related packets are being
2246 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2249 /* Possibly drop this packet, for testing purposes */
2250 if ((deliveryType == 'D')
2251 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2252 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2253 deliveryType = 'D'; /* Drop the packet */
2255 deliveryType = 'S'; /* Send the packet */
2256 #endif /* RXDEBUG */
2258 /* Loop until the packet is sent. We'd prefer just to use a
2259 * blocking socket, but unfortunately the interface doesn't
2260 * allow us to have the socket block in send mode, and not
2261 * block in receive mode */
2263 waslocked = ISAFS_GLOCK();
2264 #ifdef RX_KERNEL_TRACE
2265 if (ICL_SETACTIVE(afs_iclSetp)) {
2268 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2269 "before osi_NetSend()");
2278 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2279 p->length + RX_HEADER_SIZE, istack)) != 0) {
2280 /* send failed, so let's hurry up the resend, eh? */
2281 if (rx_stats_active)
2282 rx_atomic_inc(&rx_stats.netSendFailures);
2283 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2285 /* Some systems are nice and tell us right away that we cannot
2286 * reach this recipient by returning an error code.
2287 * So, when this happens let's "down" the host NOW so
2288 * we don't sit around waiting for this host to timeout later.
2292 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2293 #elif defined(AFS_LINUX20_ENV)
2294 code == -ENETUNREACH
2295 #elif defined(AFS_DARWIN_ENV)
2296 code == EHOSTUNREACH
2301 call->lastReceiveTime = 0;
2304 #ifdef RX_KERNEL_TRACE
2305 if (ICL_SETACTIVE(afs_iclSetp)) {
2307 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2308 "after osi_NetSend()");
2319 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2320 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2321 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2322 p->header.seq, p->header.flags, p, p->length));
2324 if (rx_stats_active) {
2325 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2326 MUTEX_ENTER(&peer->peer_lock);
2327 hadd32(peer->bytesSent, p->length);
2328 MUTEX_EXIT(&peer->peer_lock);
2332 /* Send a list of packets to appropriate destination for the specified
2333 * connection. The headers are first encoded and placed in the packets.
2336 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2337 struct rx_packet **list, int len, int istack)
2339 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2342 struct sockaddr_in addr;
2343 struct rx_peer *peer = conn->peer;
2345 struct rx_packet *p = NULL;
2346 struct iovec wirevec[RX_MAXIOVECS];
2347 int i, length, code;
2350 struct rx_jumboHeader *jp;
2352 char deliveryType = 'S';
2354 /* The address we're sending the packet to */
2355 addr.sin_family = AF_INET;
2356 addr.sin_port = peer->port;
2357 addr.sin_addr.s_addr = peer->host;
2359 if (len + 1 > RX_MAXIOVECS) {
2360 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2364 * Stamp the packets in this jumbogram with consecutive serial numbers
2366 MUTEX_ENTER(&conn->conn_data_lock);
2367 serial = conn->serial;
2368 conn->serial += len;
2369 for (i = 0; i < len; i++) {
2371 if (p->length > conn->peer->maxPacketSize) {
2372 /* a ping *or* a sequenced packet can count */
2373 if ((p->length > conn->peer->maxPacketSize)) {
2374 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2375 (p->header.flags & RX_REQUEST_ACK)) &&
2376 ((i == 0) || (p->length >= conn->lastPingSize))) {
2377 conn->lastPingSize = p->length;
2378 conn->lastPingSizeSer = serial + i;
2379 } else if ((p->header.seq != 0) &&
2380 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2381 conn->lastPacketSize = p->length;
2382 conn->lastPacketSizeSeq = p->header.seq;
2387 MUTEX_EXIT(&conn->conn_data_lock);
2390 /* This stuff should be revamped, I think, so that most, if not
2391 * all, of the header stuff is always added here. We could
2392 * probably do away with the encode/decode routines. XXXXX */
2395 length = RX_HEADER_SIZE;
2396 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2397 wirevec[0].iov_len = RX_HEADER_SIZE;
2398 for (i = 0; i < len; i++) {
2401 /* The whole 3.5 jumbogram scheme relies on packets fitting
2402 * in a single packet buffer. */
2403 if (p->niovecs > 2) {
2404 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2407 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2410 if (p->length != RX_JUMBOBUFFERSIZE) {
2411 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2413 p->header.flags |= RX_JUMBO_PACKET;
2414 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2415 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2417 wirevec[i + 1].iov_len = p->length;
2418 length += p->length;
2420 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2422 /* Convert jumbo packet header to network byte order */
2423 temp = (afs_uint32) (p->header.flags) << 24;
2424 temp |= (afs_uint32) (p->header.spare);
2425 *(afs_uint32 *) jp = htonl(temp);
2427 jp = (struct rx_jumboHeader *)
2428 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2430 /* Stamp each packet with a unique serial number. The serial
2431 * number is maintained on a connection basis because some types
2432 * of security may be based on the serial number of the packet,
2433 * and security is handled on a per authenticated-connection
2435 /* Pre-increment, to guarantee no zero serial number; a zero
2436 * serial number means the packet was never sent. */
2437 p->header.serial = ++serial;
2438 /* This is so we can adjust retransmit time-outs better in the face of
2439 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2441 if (p->firstSerial == 0) {
2442 p->firstSerial = p->header.serial;
2445 /* If an output tracer function is defined, call it with the packet and
2446 * network address. Note this function may modify its arguments. */
2447 if (rx_almostSent) {
2448 int drop = (*rx_almostSent) (p, &addr);
2449 /* drop packet if return value is non-zero? */
2451 deliveryType = 'D'; /* Drop the packet */
2455 /* Get network byte order header */
2456 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2457 * touch ALL the fields */
2460 /* Send the packet out on the same socket that related packets are being
2464 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2467 /* Possibly drop this packet, for testing purposes */
2468 if ((deliveryType == 'D')
2469 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2470 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2471 deliveryType = 'D'; /* Drop the packet */
2473 deliveryType = 'S'; /* Send the packet */
2474 #endif /* RXDEBUG */
2476 /* Loop until the packet is sent. We'd prefer just to use a
2477 * blocking socket, but unfortunately the interface doesn't
2478 * allow us to have the socket block in send mode, and not
2479 * block in receive mode */
2480 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2481 waslocked = ISAFS_GLOCK();
2482 if (!istack && waslocked)
2486 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2488 /* send failed, so let's hurry up the resend, eh? */
2489 if (rx_stats_active)
2490 rx_atomic_inc(&rx_stats.netSendFailures);
2491 for (i = 0; i < len; i++) {
2493 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2495 /* Some systems are nice and tell us right away that we cannot
2496 * reach this recipient by returning an error code.
2497 * So, when this happens let's "down" the host NOW so
2498 * we don't sit around waiting for this host to timeout later.
2502 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2503 #elif defined(AFS_LINUX20_ENV)
2504 code == -ENETUNREACH
2505 #elif defined(AFS_DARWIN_ENV)
2506 code == EHOSTUNREACH
2511 call->lastReceiveTime = 0;
2513 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2514 if (!istack && waslocked)
2520 osi_Assert(p != NULL);
2522 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2523 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2524 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2525 p->header.seq, p->header.flags, p, p->length));
2528 if (rx_stats_active) {
2529 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2530 MUTEX_ENTER(&peer->peer_lock);
2531 hadd32(peer->bytesSent, p->length);
2532 MUTEX_EXIT(&peer->peer_lock);
2537 /* Send a "special" packet to the peer connection. If call is
2538 * specified, then the packet is directed to a specific call channel
2539 * associated with the connection, otherwise it is directed to the
2540 * connection only. Uses optionalPacket if it is supplied, rather than
2541 * allocating a new packet buffer. Nbytes is the length of the data
2542 * portion of the packet. If data is non-null, nbytes of data are
2543 * copied into the packet. Type is the type of the packet, as defined
2544 * in rx.h. Bug: there's a lot of duplication between this and other
2545 * routines. This needs to be cleaned up. */
2547 rxi_SendSpecial(struct rx_call *call,
2548 struct rx_connection *conn,
2549 struct rx_packet *optionalPacket, int type, char *data,
2550 int nbytes, int istack)
2552 /* Some of the following stuff should be common code for all
2553 * packet sends (it's repeated elsewhere) */
2554 struct rx_packet *p;
2556 int savelen = 0, saven = 0;
2557 int channel, callNumber;
2559 channel = call->channel;
2560 callNumber = *call->callNumber;
2561 /* BUSY packets refer to the next call on this connection */
2562 if (type == RX_PACKET_TYPE_BUSY) {
2571 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2573 osi_Panic("rxi_SendSpecial failure");
2580 p->header.serviceId = conn->serviceId;
2581 p->header.securityIndex = conn->securityIndex;
2582 p->header.cid = (conn->cid | channel);
2583 p->header.callNumber = callNumber;
2585 p->header.epoch = conn->epoch;
2586 p->header.type = type;
2587 p->header.flags = 0;
2588 if (conn->type == RX_CLIENT_CONNECTION)
2589 p->header.flags |= RX_CLIENT_INITIATED;
2591 rx_packetwrite(p, 0, nbytes, data);
2593 for (i = 1; i < p->niovecs; i++) {
2594 if (nbytes <= p->wirevec[i].iov_len) {
2595 savelen = p->wirevec[i].iov_len;
2597 p->wirevec[i].iov_len = nbytes;
2598 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2600 nbytes -= p->wirevec[i].iov_len;
2604 rxi_Send(call, p, istack);
2606 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2607 if (saven) { /* means we truncated the packet above. We probably don't */
2608 /* really need to do this, but it seems safer this way, given that */
2609 /* sneaky optionalPacket... */
2610 p->wirevec[i - 1].iov_len = savelen;
2613 if (!optionalPacket)
2615 return optionalPacket;
2619 /* Encode the packet's header (from the struct header in the packet to
2620 * the net byte order representation in the wire representation of the
2621 * packet, which is what is actually sent out on the wire) */
2623 rxi_EncodePacketHeader(struct rx_packet *p)
2625 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2627 memset(buf, 0, RX_HEADER_SIZE);
2628 *buf++ = htonl(p->header.epoch);
2629 *buf++ = htonl(p->header.cid);
2630 *buf++ = htonl(p->header.callNumber);
2631 *buf++ = htonl(p->header.seq);
2632 *buf++ = htonl(p->header.serial);
2633 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2634 | (((afs_uint32) p->header.flags) << 16)
2635 | (p->header.userStatus << 8) | p->header.securityIndex);
2636 /* Note: top 16 bits of this next word were reserved */
2637 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2640 /* Decode the packet's header (from net byte order to a struct header) */
2642 rxi_DecodePacketHeader(struct rx_packet *p)
2644 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2647 p->header.epoch = ntohl(*buf);
2649 p->header.cid = ntohl(*buf);
2651 p->header.callNumber = ntohl(*buf);
2653 p->header.seq = ntohl(*buf);
2655 p->header.serial = ntohl(*buf);
2661 /* C will truncate byte fields to bytes for me */
2662 p->header.type = temp >> 24;
2663 p->header.flags = temp >> 16;
2664 p->header.userStatus = temp >> 8;
2665 p->header.securityIndex = temp >> 0;
2670 p->header.serviceId = (temp & 0xffff);
2671 p->header.spare = temp >> 16;
2672 /* Note: top 16 bits of this last word are the security checksum */
2676 * LOCKS HELD: called with call->lock held.
2678 * PrepareSendPacket is the only place in the code that
2679 * can increment call->tnext. This could become an atomic
2680 * in the future. Beyond that there is nothing in this
2681 * function that requires the call being locked. This
2682 * function can only be called by the application thread.
2685 rxi_PrepareSendPacket(struct rx_call *call,
2686 struct rx_packet *p, int last)
2688 struct rx_connection *conn = call->conn;
2689 afs_uint32 seq = call->tnext++;
2691 afs_int32 len; /* len must be a signed type; it can go negative */
2693 /* No data packets on call 0. Where do these come from? */
2694 if (*call->callNumber == 0)
2695 *call->callNumber = 1;
2697 MUTEX_EXIT(&call->lock);
2698 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2700 p->header.cid = (conn->cid | call->channel);
2701 p->header.serviceId = conn->serviceId;
2702 p->header.securityIndex = conn->securityIndex;
2704 p->header.callNumber = *call->callNumber;
2705 p->header.seq = seq;
2706 p->header.epoch = conn->epoch;
2707 p->header.type = RX_PACKET_TYPE_DATA;
2708 p->header.flags = 0;
2709 p->header.spare = 0;
2710 if (conn->type == RX_CLIENT_CONNECTION)
2711 p->header.flags |= RX_CLIENT_INITIATED;
2714 p->header.flags |= RX_LAST_PACKET;
2716 clock_Zero(&p->firstSent); /* Never yet transmitted */
2717 p->header.serial = 0; /* Another way of saying never transmitted... */
2719 /* Now that we're sure this is the last data on the call, make sure
2720 * that the "length" and the sum of the iov_lens matches. */
2721 len = p->length + call->conn->securityHeaderSize;
2723 for (i = 1; i < p->niovecs && len > 0; i++) {
2724 len -= p->wirevec[i].iov_len;
2727 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2728 } else if (i < p->niovecs) {
2729 /* Free any extra elements in the wirevec */
2730 #if defined(RX_ENABLE_TSFPQ)
2731 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2732 #else /* !RX_ENABLE_TSFPQ */
2733 MUTEX_ENTER(&rx_freePktQ_lock);
2734 rxi_FreeDataBufsNoLock(p, i);
2735 MUTEX_EXIT(&rx_freePktQ_lock);
2736 #endif /* !RX_ENABLE_TSFPQ */
2741 p->wirevec[i - 1].iov_len += len;
2742 RXS_PreparePacket(conn->securityObject, call, p);
2743 MUTEX_ENTER(&call->lock);
2746 /* Given an interface MTU size, calculate an adjusted MTU size that
2747 * will make efficient use of the RX buffers when the peer is sending
2748 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2750 rxi_AdjustIfMTU(int mtu)
2755 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2757 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2758 if (mtu <= adjMTU) {
2765 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2766 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2769 /* Given an interface MTU size, and the peer's advertised max receive
2770 * size, calculate an adjisted maxMTU size that makes efficient use
2771 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2773 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2775 int maxMTU = mtu * rxi_nSendFrags;
2776 maxMTU = MIN(maxMTU, peerMaxMTU);
2777 return rxi_AdjustIfMTU(maxMTU);
2780 /* Given a packet size, figure out how many datagram packet will fit.
2781 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2782 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2783 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2785 rxi_AdjustDgramPackets(int frags, int mtu)
2788 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2791 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2792 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2793 /* subtract the size of the first and last packets */
2794 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2798 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2803 * This function can be used by the Windows Cache Manager
2804 * to dump the list of all rx packets so that we can determine
2805 * where the packet leakage is.
2807 int rx_DumpPackets(FILE *outputFile, char *cookie)
2809 #ifdef RXDEBUG_PACKET
2810 struct rx_packet *p;
2814 #define RXDPRINTF sprintf
2815 #define RXDPRINTOUT output
2817 #define RXDPRINTF fprintf
2818 #define RXDPRINTOUT outputFile
2822 MUTEX_ENTER(&rx_freePktQ_lock);
2823 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2825 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2828 for (p = rx_mallocedP; p; p = p->allNextp) {
2829 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2830 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2831 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2832 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2833 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2834 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2836 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2840 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2842 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2845 MUTEX_EXIT(&rx_freePktQ_lock);
2847 #endif /* RXDEBUG_PACKET */