2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
47 # if defined(AFS_NT40_ENV)
49 # define EWOULDBLOCK WSAEWOULDBLOCK
52 # include "rx_xmit_nt.h"
58 # include <sys/sysmacros.h>
64 #include "rx_packet.h"
65 #include "rx_atomic.h"
66 #include "rx_globals.h"
67 #include "rx_internal.h"
75 /* rxdb_fileID is used to identify the lock location, along with line#. */
76 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
77 #endif /* RX_LOCKS_DB */
78 static struct rx_packet *rx_mallocedP = 0;
80 static afs_uint32 rx_packet_id = 0;
83 extern char cml_version_number[];
85 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
87 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
88 afs_uint32 ahost, short aport,
90 static struct rx_packet *rxi_AllocPacketNoLock(int class);
93 static void rxi_MorePacketsNoLock(int apackets);
96 #ifdef RX_ENABLE_TSFPQ
97 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
99 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
100 int allow_overcommit);
102 static void rxi_FreePacketNoLock(struct rx_packet *p);
103 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
104 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
105 struct rx_queue * q);
108 extern struct rx_queue rx_idleServerQueue;
110 /* some rules about packets:
111 * 1. When a packet is allocated, the final iov_buf contains room for
112 * a security trailer, but iov_len masks that fact. If the security
113 * package wants to add the trailer, it may do so, and then extend
114 * iov_len appropriately. For this reason, packet's niovecs and
115 * iov_len fields should be accurate before calling PreparePacket.
119 * all packet buffers (iov_base) are integral multiples of
121 * offset is an integral multiple of the word size.
124 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
128 for (l = 0, i = 1; i < packet->niovecs; i++) {
129 if (l + packet->wirevec[i].iov_len > offset) {
131 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
134 l += packet->wirevec[i].iov_len;
141 * all packet buffers (iov_base) are integral multiples of the word size.
142 * offset is an integral multiple of the word size.
145 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
149 for (l = 0, i = 1; i < packet->niovecs; i++) {
150 if (l + packet->wirevec[i].iov_len > offset) {
151 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
152 (offset - l))) = data;
155 l += packet->wirevec[i].iov_len;
162 * all packet buffers (iov_base) are integral multiples of the
164 * offset is an integral multiple of the word size.
166 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
169 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
172 unsigned int i, j, l, r;
173 for (l = 0, i = 1; i < packet->niovecs; i++) {
174 if (l + packet->wirevec[i].iov_len > offset) {
177 l += packet->wirevec[i].iov_len;
180 /* i is the iovec which contains the first little bit of data in which we
181 * are interested. l is the total length of everything prior to this iovec.
182 * j is the number of bytes we can safely copy out of this iovec.
183 * offset only applies to the first iovec.
186 while ((r > 0) && (i < packet->niovecs)) {
187 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
188 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
191 l += packet->wirevec[i].iov_len;
196 return (r ? (resid - r) : resid);
201 * all packet buffers (iov_base) are integral multiples of the
203 * offset is an integral multiple of the word size.
206 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
208 unsigned int i, j, l, o, r;
211 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
212 if (l + packet->wirevec[i].iov_len > o) {
215 l += packet->wirevec[i].iov_len;
218 /* i is the iovec which contains the first little bit of data in which we
219 * are interested. l is the total length of everything prior to this iovec.
220 * j is the number of bytes we can safely copy out of this iovec.
221 * offset only applies to the first iovec.
224 while ((r > 0) && (i <= RX_MAXWVECS)) {
225 if (i >= packet->niovecs)
226 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
229 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
230 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
234 l += packet->wirevec[i].iov_len;
239 return (r ? (resid - r) : resid);
243 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
245 struct rx_packet *p, *np;
247 num_pkts = AllocPacketBufs(class, num_pkts, q);
249 for (queue_Scan(q, p, np, rx_packet)) {
250 RX_PACKET_IOV_FULLINIT(p);
256 #ifdef RX_ENABLE_TSFPQ
258 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
260 struct rx_ts_info_t * rx_ts_info;
264 RX_TS_INFO_GET(rx_ts_info);
266 transfer = num_pkts - rx_ts_info->_FPQ.len;
269 MUTEX_ENTER(&rx_freePktQ_lock);
270 transfer = MAX(transfer, rx_TSFPQGlobSize);
271 if (transfer > rx_nFreePackets) {
272 /* alloc enough for us, plus a few globs for other threads */
273 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
276 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
278 MUTEX_EXIT(&rx_freePktQ_lock);
282 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
286 #else /* RX_ENABLE_TSFPQ */
288 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
299 MUTEX_ENTER(&rx_freePktQ_lock);
302 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
303 num_pkts--, overq++);
306 rxi_NeedMorePackets = TRUE;
307 if (rx_stats_active) {
309 case RX_PACKET_CLASS_RECEIVE:
310 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
312 case RX_PACKET_CLASS_SEND:
313 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
315 case RX_PACKET_CLASS_SPECIAL:
316 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
318 case RX_PACKET_CLASS_RECV_CBUF:
319 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
321 case RX_PACKET_CLASS_SEND_CBUF:
322 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
328 if (rx_nFreePackets < num_pkts)
329 num_pkts = rx_nFreePackets;
332 rxi_NeedMorePackets = TRUE;
336 if (rx_nFreePackets < num_pkts) {
337 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
341 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
343 i++, c=queue_Next(c, rx_packet)) {
347 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
349 rx_nFreePackets -= num_pkts;
354 MUTEX_EXIT(&rx_freePktQ_lock);
359 #endif /* RX_ENABLE_TSFPQ */
362 * Free a packet currently used as a continuation buffer
364 #ifdef RX_ENABLE_TSFPQ
365 /* num_pkts=0 means queue length is unknown */
367 rxi_FreePackets(int num_pkts, struct rx_queue * q)
369 struct rx_ts_info_t * rx_ts_info;
370 struct rx_packet *c, *nc;
373 osi_Assert(num_pkts >= 0);
374 RX_TS_INFO_GET(rx_ts_info);
377 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
378 rxi_FreeDataBufsTSFPQ(c, 2, 0);
381 for (queue_Scan(q, c, nc, rx_packet)) {
382 rxi_FreeDataBufsTSFPQ(c, 2, 0);
387 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
390 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
392 MUTEX_ENTER(&rx_freePktQ_lock);
394 RX_TS_FPQ_LTOG(rx_ts_info);
396 /* Wakeup anyone waiting for packets */
399 MUTEX_EXIT(&rx_freePktQ_lock);
405 #else /* RX_ENABLE_TSFPQ */
406 /* num_pkts=0 means queue length is unknown */
408 rxi_FreePackets(int num_pkts, struct rx_queue *q)
411 struct rx_packet *p, *np;
415 osi_Assert(num_pkts >= 0);
419 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
420 if (p->niovecs > 2) {
421 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
428 for (queue_Scan(q, p, np, rx_packet)) {
429 if (p->niovecs > 2) {
430 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
437 queue_SpliceAppend(q, &cbs);
443 MUTEX_ENTER(&rx_freePktQ_lock);
445 queue_SpliceAppend(&rx_freePacketQueue, q);
446 rx_nFreePackets += qlen;
448 /* Wakeup anyone waiting for packets */
451 MUTEX_EXIT(&rx_freePktQ_lock);
456 #endif /* RX_ENABLE_TSFPQ */
458 /* this one is kind of awful.
459 * In rxkad, the packet has been all shortened, and everything, ready for
460 * sending. All of a sudden, we discover we need some of that space back.
461 * This isn't terribly general, because it knows that the packets are only
462 * rounded up to the EBS (userdata + security header).
465 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
469 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
470 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
471 p->wirevec[i].iov_len += nb;
475 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
476 p->wirevec[i].iov_len += nb;
484 /* get sufficient space to store nb bytes of data (or more), and hook
485 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
486 * returns the number of bytes >0 which it failed to come up with.
487 * Don't need to worry about locking on packet, since only
488 * one thread can manipulate one at a time. Locking on continution
489 * packets is handled by AllocPacketBufs */
490 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
492 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
496 struct rx_packet *cb, *ncb;
498 /* compute the number of cbuf's we need */
499 nv = nb / RX_CBUFFERSIZE;
500 if ((nv * RX_CBUFFERSIZE) < nb)
502 if ((nv + p->niovecs) > RX_MAXWVECS)
503 nv = RX_MAXWVECS - p->niovecs;
507 /* allocate buffers */
509 nv = AllocPacketBufs(class, nv, &q);
511 /* setup packet iovs */
512 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
514 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
515 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
518 nb -= (nv * RX_CBUFFERSIZE);
519 p->length += (nv * RX_CBUFFERSIZE);
525 /* Add more packet buffers */
526 #ifdef RX_ENABLE_TSFPQ
528 rxi_MorePackets(int apackets)
530 struct rx_packet *p, *e;
531 struct rx_ts_info_t * rx_ts_info;
535 getme = apackets * sizeof(struct rx_packet);
536 p = osi_Alloc(getme);
539 PIN(p, getme); /* XXXXX */
541 RX_TS_INFO_GET(rx_ts_info);
543 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
544 /* TSFPQ patch also needs to keep track of total packets */
546 MUTEX_ENTER(&rx_packets_mutex);
547 rx_nPackets += apackets;
548 RX_TS_FPQ_COMPUTE_LIMITS;
549 MUTEX_EXIT(&rx_packets_mutex);
551 for (e = p + apackets; p < e; p++) {
552 RX_PACKET_IOV_INIT(p);
555 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
558 MUTEX_ENTER(&rx_freePktQ_lock);
559 #ifdef RXDEBUG_PACKET
560 p->packetId = rx_packet_id++;
561 p->allNextp = rx_mallocedP;
562 #endif /* RXDEBUG_PACKET */
564 MUTEX_EXIT(&rx_freePktQ_lock);
567 rx_ts_info->_FPQ.delta += apackets;
569 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
571 MUTEX_ENTER(&rx_freePktQ_lock);
573 RX_TS_FPQ_LTOG(rx_ts_info);
574 rxi_NeedMorePackets = FALSE;
577 MUTEX_EXIT(&rx_freePktQ_lock);
581 #else /* RX_ENABLE_TSFPQ */
583 rxi_MorePackets(int apackets)
585 struct rx_packet *p, *e;
589 getme = apackets * sizeof(struct rx_packet);
590 p = osi_Alloc(getme);
593 PIN(p, getme); /* XXXXX */
596 MUTEX_ENTER(&rx_freePktQ_lock);
598 for (e = p + apackets; p < e; p++) {
599 RX_PACKET_IOV_INIT(p);
600 #ifdef RX_TRACK_PACKETS
601 p->flags |= RX_PKTFLAG_FREE;
605 queue_Append(&rx_freePacketQueue, p);
606 #ifdef RXDEBUG_PACKET
607 p->packetId = rx_packet_id++;
608 p->allNextp = rx_mallocedP;
609 #endif /* RXDEBUG_PACKET */
613 rx_nPackets += apackets;
614 rx_nFreePackets += apackets;
615 rxi_NeedMorePackets = FALSE;
618 MUTEX_EXIT(&rx_freePktQ_lock);
621 #endif /* RX_ENABLE_TSFPQ */
623 #ifdef RX_ENABLE_TSFPQ
625 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
627 struct rx_packet *p, *e;
628 struct rx_ts_info_t * rx_ts_info;
632 getme = apackets * sizeof(struct rx_packet);
633 p = osi_Alloc(getme);
635 PIN(p, getme); /* XXXXX */
637 RX_TS_INFO_GET(rx_ts_info);
639 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
640 /* TSFPQ patch also needs to keep track of total packets */
641 MUTEX_ENTER(&rx_packets_mutex);
642 rx_nPackets += apackets;
643 RX_TS_FPQ_COMPUTE_LIMITS;
644 MUTEX_EXIT(&rx_packets_mutex);
646 for (e = p + apackets; p < e; p++) {
647 RX_PACKET_IOV_INIT(p);
649 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
652 MUTEX_ENTER(&rx_freePktQ_lock);
653 #ifdef RXDEBUG_PACKET
654 p->packetId = rx_packet_id++;
655 p->allNextp = rx_mallocedP;
656 #endif /* RXDEBUG_PACKET */
658 MUTEX_EXIT(&rx_freePktQ_lock);
661 rx_ts_info->_FPQ.delta += apackets;
664 (num_keep_local < apackets)) {
666 MUTEX_ENTER(&rx_freePktQ_lock);
668 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
669 rxi_NeedMorePackets = FALSE;
672 MUTEX_EXIT(&rx_freePktQ_lock);
676 #endif /* RX_ENABLE_TSFPQ */
679 /* Add more packet buffers */
681 rxi_MorePacketsNoLock(int apackets)
683 #ifdef RX_ENABLE_TSFPQ
684 struct rx_ts_info_t * rx_ts_info;
685 #endif /* RX_ENABLE_TSFPQ */
686 struct rx_packet *p, *e;
689 /* allocate enough packets that 1/4 of the packets will be able
690 * to hold maximal amounts of data */
691 apackets += (apackets / 4)
692 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
694 getme = apackets * sizeof(struct rx_packet);
695 p = osi_Alloc(getme);
697 apackets -= apackets / 4;
698 osi_Assert(apackets > 0);
703 #ifdef RX_ENABLE_TSFPQ
704 RX_TS_INFO_GET(rx_ts_info);
705 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
706 #endif /* RX_ENABLE_TSFPQ */
708 for (e = p + apackets; p < e; p++) {
709 RX_PACKET_IOV_INIT(p);
710 #ifdef RX_TRACK_PACKETS
711 p->flags |= RX_PKTFLAG_FREE;
715 queue_Append(&rx_freePacketQueue, p);
716 #ifdef RXDEBUG_PACKET
717 p->packetId = rx_packet_id++;
718 p->allNextp = rx_mallocedP;
719 #endif /* RXDEBUG_PACKET */
723 rx_nFreePackets += apackets;
724 MUTEX_ENTER(&rx_packets_mutex);
725 rx_nPackets += apackets;
726 #ifdef RX_ENABLE_TSFPQ
727 RX_TS_FPQ_COMPUTE_LIMITS;
728 #endif /* RX_ENABLE_TSFPQ */
729 MUTEX_EXIT(&rx_packets_mutex);
730 rxi_NeedMorePackets = FALSE;
736 rxi_FreeAllPackets(void)
738 /* must be called at proper interrupt level, etcetera */
739 /* MTUXXX need to free all Packets */
740 osi_Free(rx_mallocedP,
741 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
742 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
745 #ifdef RX_ENABLE_TSFPQ
747 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
749 struct rx_ts_info_t * rx_ts_info;
753 RX_TS_INFO_GET(rx_ts_info);
755 if (num_keep_local != rx_ts_info->_FPQ.len) {
757 MUTEX_ENTER(&rx_freePktQ_lock);
758 if (num_keep_local < rx_ts_info->_FPQ.len) {
759 xfer = rx_ts_info->_FPQ.len - num_keep_local;
760 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
763 xfer = num_keep_local - rx_ts_info->_FPQ.len;
764 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
765 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
766 if (rx_nFreePackets < xfer) {
767 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
769 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
771 MUTEX_EXIT(&rx_freePktQ_lock);
777 rxi_FlushLocalPacketsTSFPQ(void)
779 rxi_AdjustLocalPacketsTSFPQ(0, 0);
781 #endif /* RX_ENABLE_TSFPQ */
783 /* Allocate more packets iff we need more continuation buffers */
784 /* In kernel, can't page in memory with interrupts disabled, so we
785 * don't use the event mechanism. */
787 rx_CheckPackets(void)
789 if (rxi_NeedMorePackets) {
790 rxi_MorePackets(rx_maxSendWindow);
794 /* In the packet freeing routine below, the assumption is that
795 we want all of the packets to be used equally frequently, so that we
796 don't get packet buffers paging out. It would be just as valid to
797 assume that we DO want them to page out if not many are being used.
798 In any event, we assume the former, and append the packets to the end
800 /* This explanation is bogus. The free list doesn't remain in any kind of
801 useful order for afs_int32: the packets in use get pretty much randomly scattered
802 across all the pages. In order to permit unused {packets,bufs} to page out, they
803 must be stored so that packets which are adjacent in memory are adjacent in the
804 free list. An array springs rapidly to mind.
807 /* Actually free the packet p. */
808 #ifndef RX_ENABLE_TSFPQ
810 rxi_FreePacketNoLock(struct rx_packet *p)
812 dpf(("Free %"AFS_PTR_FMT"\n", p));
816 queue_Append(&rx_freePacketQueue, p);
818 #endif /* RX_ENABLE_TSFPQ */
820 #ifdef RX_ENABLE_TSFPQ
822 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
824 struct rx_ts_info_t * rx_ts_info;
825 dpf(("Free %"AFS_PTR_FMT"\n", p));
827 RX_TS_INFO_GET(rx_ts_info);
828 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
830 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
832 MUTEX_ENTER(&rx_freePktQ_lock);
834 RX_TS_FPQ_LTOG(rx_ts_info);
836 /* Wakeup anyone waiting for packets */
839 MUTEX_EXIT(&rx_freePktQ_lock);
843 #endif /* RX_ENABLE_TSFPQ */
846 * free continuation buffers off a packet into a queue
848 * [IN] p -- packet from which continuation buffers will be freed
849 * [IN] first -- iovec offset of first continuation buffer to free
850 * [IN] q -- queue into which continuation buffers will be chained
853 * number of continuation buffers freed
855 #ifndef RX_ENABLE_TSFPQ
857 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
860 struct rx_packet * cb;
863 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
864 iov = &p->wirevec[first];
866 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
867 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
868 RX_FPQ_MARK_FREE(cb);
878 * free packet continuation buffers into the global free packet pool
880 * [IN] p -- packet from which to free continuation buffers
881 * [IN] first -- iovec offset of first continuation buffer to free
887 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
891 for (first = MAX(2, first); first < p->niovecs; first++) {
892 iov = &p->wirevec[first];
894 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
895 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
906 * free packet continuation buffers into the thread-local free pool
908 * [IN] p -- packet from which continuation buffers will be freed
909 * [IN] first -- iovec offset of first continuation buffer to free
910 * any value less than 2, the min number of iovecs,
911 * is treated as if it is 2.
912 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
913 * global free pool before returning
919 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
922 struct rx_ts_info_t * rx_ts_info;
924 RX_TS_INFO_GET(rx_ts_info);
926 for (first = MAX(2, first); first < p->niovecs; first++) {
927 iov = &p->wirevec[first];
929 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
930 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
935 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
937 MUTEX_ENTER(&rx_freePktQ_lock);
939 RX_TS_FPQ_LTOG(rx_ts_info);
941 /* Wakeup anyone waiting for packets */
944 MUTEX_EXIT(&rx_freePktQ_lock);
949 #endif /* RX_ENABLE_TSFPQ */
951 int rxi_nBadIovecs = 0;
953 /* rxi_RestoreDataBufs
955 * Restore the correct sizes to the iovecs. Called when reusing a packet
956 * for reading off the wire.
959 rxi_RestoreDataBufs(struct rx_packet *p)
964 RX_PACKET_IOV_INIT(p);
966 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
967 if (!iov->iov_base) {
972 iov->iov_len = RX_CBUFFERSIZE;
976 #ifdef RX_ENABLE_TSFPQ
978 rxi_TrimDataBufs(struct rx_packet *p, int first)
981 struct iovec *iov, *end;
982 struct rx_ts_info_t * rx_ts_info;
986 osi_Panic("TrimDataBufs 1: first must be 1");
988 /* Skip over continuation buffers containing message data */
989 iov = &p->wirevec[2];
990 end = iov + (p->niovecs - 2);
991 length = p->length - p->wirevec[1].iov_len;
992 for (; iov < end && length > 0; iov++) {
994 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
995 length -= iov->iov_len;
998 /* iov now points to the first empty data buffer. */
1002 RX_TS_INFO_GET(rx_ts_info);
1003 for (; iov < end; iov++) {
1005 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1006 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1009 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1011 MUTEX_ENTER(&rx_freePktQ_lock);
1013 RX_TS_FPQ_LTOG(rx_ts_info);
1014 rxi_PacketsUnWait();
1016 MUTEX_EXIT(&rx_freePktQ_lock);
1022 #else /* RX_ENABLE_TSFPQ */
1024 rxi_TrimDataBufs(struct rx_packet *p, int first)
1027 struct iovec *iov, *end;
1031 osi_Panic("TrimDataBufs 1: first must be 1");
1033 /* Skip over continuation buffers containing message data */
1034 iov = &p->wirevec[2];
1035 end = iov + (p->niovecs - 2);
1036 length = p->length - p->wirevec[1].iov_len;
1037 for (; iov < end && length > 0; iov++) {
1039 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1040 length -= iov->iov_len;
1043 /* iov now points to the first empty data buffer. */
1048 MUTEX_ENTER(&rx_freePktQ_lock);
1050 for (; iov < end; iov++) {
1052 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1053 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1056 rxi_PacketsUnWait();
1058 MUTEX_EXIT(&rx_freePktQ_lock);
1063 #endif /* RX_ENABLE_TSFPQ */
1065 /* Free the packet p. P is assumed not to be on any queue, i.e.
1066 * remove it yourself first if you call this routine. */
1067 #ifdef RX_ENABLE_TSFPQ
1069 rxi_FreePacket(struct rx_packet *p)
1071 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1072 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1074 #else /* RX_ENABLE_TSFPQ */
1076 rxi_FreePacket(struct rx_packet *p)
1081 MUTEX_ENTER(&rx_freePktQ_lock);
1083 rxi_FreeDataBufsNoLock(p, 2);
1084 rxi_FreePacketNoLock(p);
1085 /* Wakeup anyone waiting for packets */
1086 rxi_PacketsUnWait();
1088 MUTEX_EXIT(&rx_freePktQ_lock);
1091 #endif /* RX_ENABLE_TSFPQ */
1093 /* rxi_AllocPacket sets up p->length so it reflects the number of
1094 * bytes in the packet at this point, **not including** the header.
1095 * The header is absolutely necessary, besides, this is the way the
1096 * length field is usually used */
1097 #ifdef RX_ENABLE_TSFPQ
1098 static struct rx_packet *
1099 rxi_AllocPacketNoLock(int class)
1101 struct rx_packet *p;
1102 struct rx_ts_info_t * rx_ts_info;
1104 RX_TS_INFO_GET(rx_ts_info);
1107 if (rxi_OverQuota(class)) {
1108 rxi_NeedMorePackets = TRUE;
1109 if (rx_stats_active) {
1111 case RX_PACKET_CLASS_RECEIVE:
1112 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1114 case RX_PACKET_CLASS_SEND:
1115 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1117 case RX_PACKET_CLASS_SPECIAL:
1118 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1120 case RX_PACKET_CLASS_RECV_CBUF:
1121 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1123 case RX_PACKET_CLASS_SEND_CBUF:
1124 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1128 return (struct rx_packet *)0;
1132 if (rx_stats_active)
1133 rx_atomic_inc(&rx_stats.packetRequests);
1134 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1137 if (queue_IsEmpty(&rx_freePacketQueue))
1138 osi_Panic("rxi_AllocPacket error");
1140 if (queue_IsEmpty(&rx_freePacketQueue))
1141 rxi_MorePacketsNoLock(rx_maxSendWindow);
1145 RX_TS_FPQ_GTOL(rx_ts_info);
1148 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1150 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1153 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1154 * order to truncate outbound packets. In the near future, may need
1155 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1157 RX_PACKET_IOV_FULLINIT(p);
1160 #else /* RX_ENABLE_TSFPQ */
1161 static struct rx_packet *
1162 rxi_AllocPacketNoLock(int class)
1164 struct rx_packet *p;
1167 if (rxi_OverQuota(class)) {
1168 rxi_NeedMorePackets = TRUE;
1169 if (rx_stats_active) {
1171 case RX_PACKET_CLASS_RECEIVE:
1172 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1174 case RX_PACKET_CLASS_SEND:
1175 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1177 case RX_PACKET_CLASS_SPECIAL:
1178 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1180 case RX_PACKET_CLASS_RECV_CBUF:
1181 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1183 case RX_PACKET_CLASS_SEND_CBUF:
1184 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1188 return (struct rx_packet *)0;
1192 if (rx_stats_active)
1193 rx_atomic_inc(&rx_stats.packetRequests);
1196 if (queue_IsEmpty(&rx_freePacketQueue))
1197 osi_Panic("rxi_AllocPacket error");
1199 if (queue_IsEmpty(&rx_freePacketQueue))
1200 rxi_MorePacketsNoLock(rx_maxSendWindow);
1204 p = queue_First(&rx_freePacketQueue, rx_packet);
1206 RX_FPQ_MARK_USED(p);
1208 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1211 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1212 * order to truncate outbound packets. In the near future, may need
1213 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1215 RX_PACKET_IOV_FULLINIT(p);
1218 #endif /* RX_ENABLE_TSFPQ */
1220 #ifdef RX_ENABLE_TSFPQ
1221 static struct rx_packet *
1222 rxi_AllocPacketTSFPQ(int class, int pull_global)
1224 struct rx_packet *p;
1225 struct rx_ts_info_t * rx_ts_info;
1227 RX_TS_INFO_GET(rx_ts_info);
1229 if (rx_stats_active)
1230 rx_atomic_inc(&rx_stats.packetRequests);
1231 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1232 MUTEX_ENTER(&rx_freePktQ_lock);
1234 if (queue_IsEmpty(&rx_freePacketQueue))
1235 rxi_MorePacketsNoLock(rx_maxSendWindow);
1237 RX_TS_FPQ_GTOL(rx_ts_info);
1239 MUTEX_EXIT(&rx_freePktQ_lock);
1240 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1244 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1246 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1248 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1249 * order to truncate outbound packets. In the near future, may need
1250 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1252 RX_PACKET_IOV_FULLINIT(p);
1255 #endif /* RX_ENABLE_TSFPQ */
1257 #ifdef RX_ENABLE_TSFPQ
1259 rxi_AllocPacket(int class)
1261 struct rx_packet *p;
1263 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1266 #else /* RX_ENABLE_TSFPQ */
1268 rxi_AllocPacket(int class)
1270 struct rx_packet *p;
1272 MUTEX_ENTER(&rx_freePktQ_lock);
1273 p = rxi_AllocPacketNoLock(class);
1274 MUTEX_EXIT(&rx_freePktQ_lock);
1277 #endif /* RX_ENABLE_TSFPQ */
1279 /* This guy comes up with as many buffers as it {takes,can get} given
1280 * the MTU for this call. It also sets the packet length before
1281 * returning. caution: this is often called at NETPRI
1282 * Called with call locked.
1285 rxi_AllocSendPacket(struct rx_call *call, int want)
1287 struct rx_packet *p = (struct rx_packet *)0;
1292 mud = call->MTU - RX_HEADER_SIZE;
1294 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1295 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1297 #ifdef RX_ENABLE_TSFPQ
1298 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1300 want = MIN(want, mud);
1302 if ((unsigned)want > p->length)
1303 (void)rxi_AllocDataBuf(p, (want - p->length),
1304 RX_PACKET_CLASS_SEND_CBUF);
1306 if (p->length > mud)
1309 if (delta >= p->length) {
1317 #endif /* RX_ENABLE_TSFPQ */
1319 while (!(call->error)) {
1320 MUTEX_ENTER(&rx_freePktQ_lock);
1321 /* if an error occurred, or we get the packet we want, we're done */
1322 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1323 MUTEX_EXIT(&rx_freePktQ_lock);
1326 want = MIN(want, mud);
1328 if ((unsigned)want > p->length)
1329 (void)rxi_AllocDataBuf(p, (want - p->length),
1330 RX_PACKET_CLASS_SEND_CBUF);
1332 if (p->length > mud)
1335 if (delta >= p->length) {
1344 /* no error occurred, and we didn't get a packet, so we sleep.
1345 * At this point, we assume that packets will be returned
1346 * sooner or later, as packets are acknowledged, and so we
1349 call->flags |= RX_CALL_WAIT_PACKETS;
1350 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1351 MUTEX_EXIT(&call->lock);
1352 rx_waitingForPackets = 1;
1354 #ifdef RX_ENABLE_LOCKS
1355 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1357 osi_rxSleep(&rx_waitingForPackets);
1359 MUTEX_EXIT(&rx_freePktQ_lock);
1360 MUTEX_ENTER(&call->lock);
1361 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1362 call->flags &= ~RX_CALL_WAIT_PACKETS;
1371 /* Windows does not use file descriptors. */
1372 #define CountFDs(amax) 0
1374 /* count the number of used FDs */
1383 for (i = 0; i < amax; i++) {
1384 code = fstat(i, &tstat);
1390 #endif /* AFS_NT40_ENV */
1393 #define CountFDs(amax) amax
1397 #if !defined(KERNEL) || defined(UKERNEL)
1399 /* This function reads a single packet from the interface into the
1400 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1401 * (host,port) of the sender are stored in the supplied variables, and
1402 * the data length of the packet is stored in the packet structure.
1403 * The header is decoded. */
1405 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1408 struct sockaddr_in from;
1411 afs_uint32 tlen, savelen;
1413 rx_computelen(p, tlen);
1414 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1416 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1417 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1418 * it once in order to avoid races. */
1421 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1429 /* Extend the last iovec for padding, it's just to make sure that the
1430 * read doesn't return more data than we expect, and is done to get around
1431 * our problems caused by the lack of a length field in the rx header.
1432 * Use the extra buffer that follows the localdata in each packet
1434 savelen = p->wirevec[p->niovecs - 1].iov_len;
1435 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1437 memset(&msg, 0, sizeof(msg));
1438 msg.msg_name = (char *)&from;
1439 msg.msg_namelen = sizeof(struct sockaddr_in);
1440 msg.msg_iov = p->wirevec;
1441 msg.msg_iovlen = p->niovecs;
1442 nbytes = rxi_Recvmsg(socket, &msg, 0);
1444 /* restore the vec to its correct state */
1445 p->wirevec[p->niovecs - 1].iov_len = savelen;
1447 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1448 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1449 if (nbytes < 0 && errno == EWOULDBLOCK) {
1450 if (rx_stats_active)
1451 rx_atomic_inc(&rx_stats.noPacketOnRead);
1452 } else if (nbytes <= 0) {
1453 if (rx_stats_active) {
1454 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1455 rx_stats.bogusHost = from.sin_addr.s_addr;
1457 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1458 ntohs(from.sin_port), nbytes));
1463 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1464 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1465 rxi_DecodePacketHeader(p);
1467 *host = from.sin_addr.s_addr;
1468 *port = from.sin_port;
1470 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1471 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1472 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1474 #ifdef RX_TRIMDATABUFS
1475 rxi_TrimDataBufs(p, 1);
1481 /* Extract packet header. */
1482 rxi_DecodePacketHeader(p);
1484 *host = from.sin_addr.s_addr;
1485 *port = from.sin_port;
1487 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1489 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1492 #ifdef RX_TRIMDATABUFS
1493 /* Free any empty packet buffers at the end of this packet */
1494 rxi_TrimDataBufs(p, 1);
1500 #endif /* !KERNEL || UKERNEL */
1502 /* This function splits off the first packet in a jumbo packet.
1503 * As of AFS 3.5, jumbograms contain more than one fixed size
1504 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1505 * last packet header. All packets (except the last) are padded to
1506 * fall on RX_CBUFFERSIZE boundaries.
1507 * HACK: We store the length of the first n-1 packets in the
1508 * last two pad bytes. */
1511 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1514 struct rx_packet *np;
1515 struct rx_jumboHeader *jp;
1521 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1522 * bytes in length. All but the first packet are preceded by
1523 * an abbreviated four byte header. The length of the last packet
1524 * is calculated from the size of the jumbogram. */
1525 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1527 if ((int)p->length < length) {
1528 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1531 niov = p->niovecs - 2;
1533 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1536 iov = &p->wirevec[2];
1537 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1539 /* Get a pointer to the abbreviated packet header */
1540 jp = (struct rx_jumboHeader *)
1541 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1543 /* Set up the iovecs for the next packet */
1544 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1545 np->wirevec[0].iov_len = sizeof(struct rx_header);
1546 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1547 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1548 np->niovecs = niov + 1;
1549 for (i = 2, iov++; i <= niov; i++, iov++) {
1550 np->wirevec[i] = *iov;
1552 np->length = p->length - length;
1553 p->length = RX_JUMBOBUFFERSIZE;
1556 /* Convert the jumbo packet header to host byte order */
1557 temp = ntohl(*(afs_uint32 *) jp);
1558 jp->flags = (u_char) (temp >> 24);
1559 jp->cksum = (u_short) (temp);
1561 /* Fill in the packet header */
1562 np->header = p->header;
1563 np->header.serial = p->header.serial + 1;
1564 np->header.seq = p->header.seq + 1;
1565 np->header.flags = jp->flags;
1566 np->header.spare = jp->cksum;
1572 /* Send a udp datagram */
1574 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1575 int length, int istack)
1580 memset(&msg, 0, sizeof(msg));
1582 msg.msg_iovlen = nvecs;
1583 msg.msg_name = addr;
1584 msg.msg_namelen = sizeof(struct sockaddr_in);
1586 ret = rxi_Sendmsg(socket, &msg, 0);
1590 #elif !defined(UKERNEL)
1592 * message receipt is done in rxk_input or rx_put.
1595 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1597 * Copy an mblock to the contiguous area pointed to by cp.
1598 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1599 * but it doesn't really.
1600 * Returns the number of bytes not transferred.
1601 * The message is NOT changed.
1604 cpytoc(mblk_t * mp, int off, int len, char *cp)
1608 for (; mp && len > 0; mp = mp->b_cont) {
1609 if (mp->b_datap->db_type != M_DATA) {
1612 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1613 memcpy(cp, (char *)mp->b_rptr, n);
1621 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1622 * but it doesn't really.
1623 * This sucks, anyway, do it like m_cpy.... below
1626 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1631 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1632 if (mp->b_datap->db_type != M_DATA) {
1635 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1641 t = iovs[i].iov_len;
1644 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1654 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1655 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1657 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1659 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1662 unsigned int l1, l2, i, t;
1664 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1665 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1668 if (m->m_len <= off) {
1678 p1 = mtod(m, caddr_t) + off;
1679 l1 = m->m_len - off;
1681 p2 = iovs[0].iov_base;
1682 l2 = iovs[0].iov_len;
1685 t = MIN(l1, MIN(l2, (unsigned int)len));
1696 p1 = mtod(m, caddr_t);
1702 p2 = iovs[i].iov_base;
1703 l2 = iovs[i].iov_len;
1711 #endif /* AFS_SUN5_ENV */
1713 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1714 #if defined(AFS_NBSD_ENV)
1716 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1719 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1720 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1726 struct rx_packet *phandle;
1727 int hdr_len, data_len;
1728 #endif /* AFS_NBSD_ENV */
1733 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1740 #endif /*KERNEL && !UKERNEL */
1743 /* send a response to a debug packet */
1746 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1747 afs_uint32 ahost, short aport, int istack)
1749 struct rx_debugIn tin;
1751 struct rx_serverQueueEntry *np, *nqe;
1754 * Only respond to client-initiated Rx debug packets,
1755 * and clear the client flag in the response.
1757 if (ap->header.flags & RX_CLIENT_INITIATED) {
1758 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1759 rxi_EncodePacketHeader(ap);
1764 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1765 /* all done with packet, now set length to the truth, so we can
1766 * reuse this packet */
1767 rx_computelen(ap, ap->length);
1769 tin.type = ntohl(tin.type);
1770 tin.index = ntohl(tin.index);
1772 case RX_DEBUGI_GETSTATS:{
1773 struct rx_debugStats tstat;
1775 /* get basic stats */
1776 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1777 tstat.version = RX_DEBUGI_VERSION;
1778 #ifndef RX_ENABLE_LOCKS
1779 tstat.waitingForPackets = rx_waitingForPackets;
1781 MUTEX_ENTER(&rx_serverPool_lock);
1782 tstat.nFreePackets = htonl(rx_nFreePackets);
1783 tstat.nPackets = htonl(rx_nPackets);
1784 tstat.callsExecuted = htonl(rxi_nCalls);
1785 tstat.packetReclaims = htonl(rx_packetReclaims);
1786 tstat.usedFDs = CountFDs(64);
1787 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1788 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1789 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1791 MUTEX_EXIT(&rx_serverPool_lock);
1792 tstat.idleThreads = htonl(tstat.idleThreads);
1793 tl = sizeof(struct rx_debugStats) - ap->length;
1795 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1798 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1800 ap->length = sizeof(struct rx_debugStats);
1801 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1802 rx_computelen(ap, ap->length);
1807 case RX_DEBUGI_GETALLCONN:
1808 case RX_DEBUGI_GETCONN:{
1810 struct rx_connection *tc;
1811 struct rx_call *tcall;
1812 struct rx_debugConn tconn;
1813 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1816 tl = sizeof(struct rx_debugConn) - ap->length;
1818 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1822 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1823 /* get N'th (maybe) "interesting" connection info */
1824 for (i = 0; i < rx_hashTableSize; i++) {
1825 #if !defined(KERNEL)
1826 /* the time complexity of the algorithm used here
1827 * exponentially increses with the number of connections.
1829 #ifdef AFS_PTHREAD_ENV
1835 MUTEX_ENTER(&rx_connHashTable_lock);
1836 /* We might be slightly out of step since we are not
1837 * locking each call, but this is only debugging output.
1839 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1840 if ((all || rxi_IsConnInteresting(tc))
1841 && tin.index-- <= 0) {
1842 tconn.host = tc->peer->host;
1843 tconn.port = tc->peer->port;
1844 tconn.cid = htonl(tc->cid);
1845 tconn.epoch = htonl(tc->epoch);
1846 tconn.serial = htonl(tc->serial);
1847 for (j = 0; j < RX_MAXCALLS; j++) {
1848 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1849 if ((tcall = tc->call[j])) {
1850 tconn.callState[j] = tcall->state;
1851 tconn.callMode[j] = tcall->mode;
1852 tconn.callFlags[j] = tcall->flags;
1853 if (queue_IsNotEmpty(&tcall->rq))
1854 tconn.callOther[j] |= RX_OTHER_IN;
1855 if (queue_IsNotEmpty(&tcall->tq))
1856 tconn.callOther[j] |= RX_OTHER_OUT;
1858 tconn.callState[j] = RX_STATE_NOTINIT;
1861 tconn.natMTU = htonl(tc->peer->natMTU);
1862 tconn.error = htonl(tc->error);
1863 tconn.flags = tc->flags;
1864 tconn.type = tc->type;
1865 tconn.securityIndex = tc->securityIndex;
1866 if (tc->securityObject) {
1867 RXS_GetStats(tc->securityObject, tc,
1869 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1870 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1873 DOHTONL(packetsReceived);
1874 DOHTONL(packetsSent);
1875 DOHTONL(bytesReceived);
1879 sizeof(tconn.secStats.spares) /
1884 sizeof(tconn.secStats.sparel) /
1885 sizeof(afs_int32); i++)
1889 MUTEX_EXIT(&rx_connHashTable_lock);
1890 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1893 ap->length = sizeof(struct rx_debugConn);
1894 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1900 MUTEX_EXIT(&rx_connHashTable_lock);
1902 /* if we make it here, there are no interesting packets */
1903 tconn.cid = htonl(0xffffffff); /* means end */
1904 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1907 ap->length = sizeof(struct rx_debugConn);
1908 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1914 * Pass back all the peer structures we have available
1917 case RX_DEBUGI_GETPEER:{
1920 struct rx_debugPeer tpeer;
1923 tl = sizeof(struct rx_debugPeer) - ap->length;
1925 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1929 memset(&tpeer, 0, sizeof(tpeer));
1930 for (i = 0; i < rx_hashTableSize; i++) {
1931 #if !defined(KERNEL)
1932 /* the time complexity of the algorithm used here
1933 * exponentially increses with the number of peers.
1935 * Yielding after processing each hash table entry
1936 * and dropping rx_peerHashTable_lock.
1937 * also increases the risk that we will miss a new
1938 * entry - but we are willing to live with this
1939 * limitation since this is meant for debugging only
1941 #ifdef AFS_PTHREAD_ENV
1947 MUTEX_ENTER(&rx_peerHashTable_lock);
1948 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1949 if (tin.index-- <= 0) {
1951 MUTEX_EXIT(&rx_peerHashTable_lock);
1953 MUTEX_ENTER(&tp->peer_lock);
1954 tpeer.host = tp->host;
1955 tpeer.port = tp->port;
1956 tpeer.ifMTU = htons(tp->ifMTU);
1957 tpeer.idleWhen = htonl(tp->idleWhen);
1958 tpeer.refCount = htons(tp->refCount);
1959 tpeer.burstSize = 0;
1961 tpeer.burstWait.sec = 0;
1962 tpeer.burstWait.usec = 0;
1963 tpeer.rtt = htonl(tp->rtt);
1964 tpeer.rtt_dev = htonl(tp->rtt_dev);
1965 tpeer.nSent = htonl(tp->nSent);
1966 tpeer.reSends = htonl(tp->reSends);
1967 tpeer.natMTU = htons(tp->natMTU);
1968 tpeer.maxMTU = htons(tp->maxMTU);
1969 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1970 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1971 tpeer.MTU = htons(tp->MTU);
1972 tpeer.cwind = htons(tp->cwind);
1973 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1974 tpeer.congestSeq = htons(tp->congestSeq);
1975 tpeer.bytesSent.high =
1976 htonl(tp->bytesSent >> 32);
1977 tpeer.bytesSent.low =
1978 htonl(tp->bytesSent & MAX_AFS_UINT32);
1979 tpeer.bytesReceived.high =
1980 htonl(tp->bytesReceived >> 32);
1981 tpeer.bytesReceived.low =
1982 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1983 MUTEX_EXIT(&tp->peer_lock);
1985 MUTEX_ENTER(&rx_peerHashTable_lock);
1987 MUTEX_EXIT(&rx_peerHashTable_lock);
1989 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1992 ap->length = sizeof(struct rx_debugPeer);
1993 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1999 MUTEX_EXIT(&rx_peerHashTable_lock);
2001 /* if we make it here, there are no interesting packets */
2002 tpeer.host = htonl(0xffffffff); /* means end */
2003 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2006 ap->length = sizeof(struct rx_debugPeer);
2007 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2012 case RX_DEBUGI_RXSTATS:{
2016 tl = sizeof(rx_stats) - ap->length;
2018 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2022 /* Since its all int32s convert to network order with a loop. */
2023 if (rx_stats_active)
2024 MUTEX_ENTER(&rx_stats_mutex);
2025 s = (afs_int32 *) & rx_stats;
2026 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2027 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2030 ap->length = sizeof(rx_stats);
2031 if (rx_stats_active)
2032 MUTEX_EXIT(&rx_stats_mutex);
2033 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2039 /* error response packet */
2040 tin.type = htonl(RX_DEBUGI_BADTYPE);
2041 tin.index = tin.type;
2042 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2044 ap->length = sizeof(struct rx_debugIn);
2045 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2053 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2054 afs_uint32 ahost, short aport, int istack)
2059 * Only respond to client-initiated version requests, and
2060 * clear that flag in the response.
2062 if (ap->header.flags & RX_CLIENT_INITIATED) {
2065 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2066 rxi_EncodePacketHeader(ap);
2067 memset(buf, 0, sizeof(buf));
2068 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2069 rx_packetwrite(ap, 0, 65, buf);
2072 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2080 /* send a debug packet back to the sender */
2082 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2083 afs_uint32 ahost, short aport, afs_int32 istack)
2085 struct sockaddr_in taddr;
2086 unsigned int i, nbytes, savelen = 0;
2089 int waslocked = ISAFS_GLOCK();
2092 taddr.sin_family = AF_INET;
2093 taddr.sin_port = aport;
2094 taddr.sin_addr.s_addr = ahost;
2095 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2096 taddr.sin_len = sizeof(struct sockaddr_in);
2099 /* We need to trim the niovecs. */
2100 nbytes = apacket->length;
2101 for (i = 1; i < apacket->niovecs; i++) {
2102 if (nbytes <= apacket->wirevec[i].iov_len) {
2103 savelen = apacket->wirevec[i].iov_len;
2104 saven = apacket->niovecs;
2105 apacket->wirevec[i].iov_len = nbytes;
2106 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2108 nbytes -= apacket->wirevec[i].iov_len;
2111 #ifdef RX_KERNEL_TRACE
2112 if (ICL_SETACTIVE(afs_iclSetp)) {
2115 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2116 "before osi_NetSend()");
2124 /* debug packets are not reliably delivered, hence the cast below. */
2125 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2126 apacket->length + RX_HEADER_SIZE, istack);
2128 #ifdef RX_KERNEL_TRACE
2129 if (ICL_SETACTIVE(afs_iclSetp)) {
2131 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2132 "after osi_NetSend()");
2141 if (saven) { /* means we truncated the packet above. */
2142 apacket->wirevec[i - 1].iov_len = savelen;
2143 apacket->niovecs = saven;
2149 rxi_NetSendError(struct rx_call *call, int code)
2153 if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2156 if (code == -WSAEHOSTUNREACH) {
2159 #elif defined(AFS_LINUX20_ENV)
2160 if (code == -ENETUNREACH) {
2163 #elif defined(AFS_DARWIN_ENV)
2164 if (code == EHOSTUNREACH) {
2169 call->lastReceiveTime = 0;
2173 /* Send the packet to appropriate destination for the specified
2174 * call. The header is first encoded and placed in the packet.
2177 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2178 struct rx_packet *p, int istack)
2184 struct sockaddr_in addr;
2185 struct rx_peer *peer = conn->peer;
2188 char deliveryType = 'S';
2190 /* The address we're sending the packet to */
2191 memset(&addr, 0, sizeof(addr));
2192 addr.sin_family = AF_INET;
2193 addr.sin_port = peer->port;
2194 addr.sin_addr.s_addr = peer->host;
2196 /* This stuff should be revamped, I think, so that most, if not
2197 * all, of the header stuff is always added here. We could
2198 * probably do away with the encode/decode routines. XXXXX */
2200 /* Stamp each packet with a unique serial number. The serial
2201 * number is maintained on a connection basis because some types
2202 * of security may be based on the serial number of the packet,
2203 * and security is handled on a per authenticated-connection
2205 /* Pre-increment, to guarantee no zero serial number; a zero
2206 * serial number means the packet was never sent. */
2207 MUTEX_ENTER(&conn->conn_data_lock);
2208 p->header.serial = ++conn->serial;
2209 if (p->length > conn->peer->maxPacketSize) {
2210 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2211 (p->header.flags & RX_REQUEST_ACK)) {
2212 conn->lastPingSize = p->length;
2213 conn->lastPingSizeSer = p->header.serial;
2214 } else if (p->header.seq != 0) {
2215 conn->lastPacketSize = p->length;
2216 conn->lastPacketSizeSeq = p->header.seq;
2219 MUTEX_EXIT(&conn->conn_data_lock);
2220 /* This is so we can adjust retransmit time-outs better in the face of
2221 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2223 if (p->firstSerial == 0) {
2224 p->firstSerial = p->header.serial;
2227 /* If an output tracer function is defined, call it with the packet and
2228 * network address. Note this function may modify its arguments. */
2229 if (rx_almostSent) {
2230 int drop = (*rx_almostSent) (p, &addr);
2231 /* drop packet if return value is non-zero? */
2233 deliveryType = 'D'; /* Drop the packet */
2237 /* Get network byte order header */
2238 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2239 * touch ALL the fields */
2241 /* Send the packet out on the same socket that related packets are being
2245 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2248 /* Possibly drop this packet, for testing purposes */
2249 if ((deliveryType == 'D')
2250 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2251 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2252 deliveryType = 'D'; /* Drop the packet */
2254 deliveryType = 'S'; /* Send the packet */
2255 #endif /* RXDEBUG */
2257 /* Loop until the packet is sent. We'd prefer just to use a
2258 * blocking socket, but unfortunately the interface doesn't
2259 * allow us to have the socket block in send mode, and not
2260 * block in receive mode */
2262 waslocked = ISAFS_GLOCK();
2263 #ifdef RX_KERNEL_TRACE
2264 if (ICL_SETACTIVE(afs_iclSetp)) {
2267 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2268 "before osi_NetSend()");
2277 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2278 p->length + RX_HEADER_SIZE, istack)) != 0) {
2279 /* send failed, so let's hurry up the resend, eh? */
2280 if (rx_stats_active)
2281 rx_atomic_inc(&rx_stats.netSendFailures);
2282 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2284 /* Some systems are nice and tell us right away that we cannot
2285 * reach this recipient by returning an error code.
2286 * So, when this happens let's "down" the host NOW so
2287 * we don't sit around waiting for this host to timeout later.
2290 rxi_NetSendError(call, code);
2294 #ifdef RX_KERNEL_TRACE
2295 if (ICL_SETACTIVE(afs_iclSetp)) {
2297 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2298 "after osi_NetSend()");
2309 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2310 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2311 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2312 p->header.seq, p->header.flags, p, p->length));
2314 if (rx_stats_active) {
2315 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2316 MUTEX_ENTER(&peer->peer_lock);
2317 peer->bytesSent += p->length;
2318 MUTEX_EXIT(&peer->peer_lock);
2322 /* Send a list of packets to appropriate destination for the specified
2323 * connection. The headers are first encoded and placed in the packets.
2326 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2327 struct rx_packet **list, int len, int istack)
2329 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2332 struct sockaddr_in addr;
2333 struct rx_peer *peer = conn->peer;
2335 struct rx_packet *p = NULL;
2336 struct iovec wirevec[RX_MAXIOVECS];
2337 int i, length, code;
2340 struct rx_jumboHeader *jp;
2342 char deliveryType = 'S';
2344 /* The address we're sending the packet to */
2345 addr.sin_family = AF_INET;
2346 addr.sin_port = peer->port;
2347 addr.sin_addr.s_addr = peer->host;
2349 if (len + 1 > RX_MAXIOVECS) {
2350 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2354 * Stamp the packets in this jumbogram with consecutive serial numbers
2356 MUTEX_ENTER(&conn->conn_data_lock);
2357 serial = conn->serial;
2358 conn->serial += len;
2359 for (i = 0; i < len; i++) {
2361 if (p->length > conn->peer->maxPacketSize) {
2362 /* a ping *or* a sequenced packet can count */
2363 if ((p->length > conn->peer->maxPacketSize)) {
2364 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2365 (p->header.flags & RX_REQUEST_ACK)) &&
2366 ((i == 0) || (p->length >= conn->lastPingSize))) {
2367 conn->lastPingSize = p->length;
2368 conn->lastPingSizeSer = serial + i;
2369 } else if ((p->header.seq != 0) &&
2370 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2371 conn->lastPacketSize = p->length;
2372 conn->lastPacketSizeSeq = p->header.seq;
2377 MUTEX_EXIT(&conn->conn_data_lock);
2380 /* This stuff should be revamped, I think, so that most, if not
2381 * all, of the header stuff is always added here. We could
2382 * probably do away with the encode/decode routines. XXXXX */
2385 length = RX_HEADER_SIZE;
2386 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2387 wirevec[0].iov_len = RX_HEADER_SIZE;
2388 for (i = 0; i < len; i++) {
2391 /* The whole 3.5 jumbogram scheme relies on packets fitting
2392 * in a single packet buffer. */
2393 if (p->niovecs > 2) {
2394 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2397 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2400 if (p->length != RX_JUMBOBUFFERSIZE) {
2401 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2403 p->header.flags |= RX_JUMBO_PACKET;
2404 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2405 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2407 wirevec[i + 1].iov_len = p->length;
2408 length += p->length;
2410 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2412 /* Convert jumbo packet header to network byte order */
2413 temp = (afs_uint32) (p->header.flags) << 24;
2414 temp |= (afs_uint32) (p->header.spare);
2415 *(afs_uint32 *) jp = htonl(temp);
2417 jp = (struct rx_jumboHeader *)
2418 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2420 /* Stamp each packet with a unique serial number. The serial
2421 * number is maintained on a connection basis because some types
2422 * of security may be based on the serial number of the packet,
2423 * and security is handled on a per authenticated-connection
2425 /* Pre-increment, to guarantee no zero serial number; a zero
2426 * serial number means the packet was never sent. */
2427 p->header.serial = ++serial;
2428 /* This is so we can adjust retransmit time-outs better in the face of
2429 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2431 if (p->firstSerial == 0) {
2432 p->firstSerial = p->header.serial;
2435 /* If an output tracer function is defined, call it with the packet and
2436 * network address. Note this function may modify its arguments. */
2437 if (rx_almostSent) {
2438 int drop = (*rx_almostSent) (p, &addr);
2439 /* drop packet if return value is non-zero? */
2441 deliveryType = 'D'; /* Drop the packet */
2445 /* Get network byte order header */
2446 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2447 * touch ALL the fields */
2450 /* Send the packet out on the same socket that related packets are being
2454 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2457 /* Possibly drop this packet, for testing purposes */
2458 if ((deliveryType == 'D')
2459 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2460 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2461 deliveryType = 'D'; /* Drop the packet */
2463 deliveryType = 'S'; /* Send the packet */
2464 #endif /* RXDEBUG */
2466 /* Loop until the packet is sent. We'd prefer just to use a
2467 * blocking socket, but unfortunately the interface doesn't
2468 * allow us to have the socket block in send mode, and not
2469 * block in receive mode */
2470 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2471 waslocked = ISAFS_GLOCK();
2472 if (!istack && waslocked)
2476 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2478 /* send failed, so let's hurry up the resend, eh? */
2479 if (rx_stats_active)
2480 rx_atomic_inc(&rx_stats.netSendFailures);
2481 for (i = 0; i < len; i++) {
2483 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2485 /* Some systems are nice and tell us right away that we cannot
2486 * reach this recipient by returning an error code.
2487 * So, when this happens let's "down" the host NOW so
2488 * we don't sit around waiting for this host to timeout later.
2491 rxi_NetSendError(call, code);
2494 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2495 if (!istack && waslocked)
2501 osi_Assert(p != NULL);
2503 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2504 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2505 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2506 p->header.seq, p->header.flags, p, p->length));
2509 if (rx_stats_active) {
2510 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2511 MUTEX_ENTER(&peer->peer_lock);
2512 peer->bytesSent += p->length;
2513 MUTEX_EXIT(&peer->peer_lock);
2517 /* Send a raw abort packet, without any call or connection structures */
2519 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2520 afs_int32 error, struct rx_packet *source, int istack)
2522 struct rx_header theader;
2523 struct sockaddr_in addr;
2524 struct iovec iov[2];
2526 memset(&theader, 0, sizeof(theader));
2527 theader.epoch = htonl(source->header.epoch);
2528 theader.callNumber = htonl(source->header.callNumber);
2529 theader.serial = htonl(1);
2530 theader.type = RX_PACKET_TYPE_ABORT;
2531 theader.serviceId = htons(source->header.serviceId);
2532 theader.securityIndex = source->header.securityIndex;
2533 theader.cid = htonl(source->header.cid);
2535 error = htonl(error);
2537 iov[0].iov_base = &theader;
2538 iov[0].iov_len = sizeof(struct rx_header);
2539 iov[1].iov_base = &error;
2540 iov[1].iov_len = sizeof(error);
2542 addr.sin_family = AF_INET;
2543 addr.sin_addr.s_addr = host;
2544 addr.sin_port = port;
2545 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2546 addr.sin_len = sizeof(struct sockaddr_in);
2549 osi_NetSend(socket, &addr, iov, 2,
2550 sizeof(struct rx_header) + sizeof(error), istack);
2553 /* Send a "special" packet to the peer connection. If call is
2554 * specified, then the packet is directed to a specific call channel
2555 * associated with the connection, otherwise it is directed to the
2556 * connection only. Uses optionalPacket if it is supplied, rather than
2557 * allocating a new packet buffer. Nbytes is the length of the data
2558 * portion of the packet. If data is non-null, nbytes of data are
2559 * copied into the packet. Type is the type of the packet, as defined
2560 * in rx.h. Bug: there's a lot of duplication between this and other
2561 * routines. This needs to be cleaned up. */
2563 rxi_SendSpecial(struct rx_call *call,
2564 struct rx_connection *conn,
2565 struct rx_packet *optionalPacket, int type, char *data,
2566 int nbytes, int istack)
2568 /* Some of the following stuff should be common code for all
2569 * packet sends (it's repeated elsewhere) */
2570 struct rx_packet *p;
2572 int savelen = 0, saven = 0;
2573 int channel, callNumber;
2575 channel = call->channel;
2576 callNumber = *call->callNumber;
2577 /* BUSY packets refer to the next call on this connection */
2578 if (type == RX_PACKET_TYPE_BUSY) {
2587 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2589 osi_Panic("rxi_SendSpecial failure");
2596 p->header.serviceId = conn->serviceId;
2597 p->header.securityIndex = conn->securityIndex;
2598 p->header.cid = (conn->cid | channel);
2599 p->header.callNumber = callNumber;
2601 p->header.epoch = conn->epoch;
2602 p->header.type = type;
2603 p->header.flags = 0;
2604 if (conn->type == RX_CLIENT_CONNECTION)
2605 p->header.flags |= RX_CLIENT_INITIATED;
2607 rx_packetwrite(p, 0, nbytes, data);
2609 for (i = 1; i < p->niovecs; i++) {
2610 if (nbytes <= p->wirevec[i].iov_len) {
2611 savelen = p->wirevec[i].iov_len;
2613 p->wirevec[i].iov_len = nbytes;
2614 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2616 nbytes -= p->wirevec[i].iov_len;
2620 rxi_Send(call, p, istack);
2622 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2623 if (saven) { /* means we truncated the packet above. We probably don't */
2624 /* really need to do this, but it seems safer this way, given that */
2625 /* sneaky optionalPacket... */
2626 p->wirevec[i - 1].iov_len = savelen;
2629 if (!optionalPacket)
2631 return optionalPacket;
2635 /* Encode the packet's header (from the struct header in the packet to
2636 * the net byte order representation in the wire representation of the
2637 * packet, which is what is actually sent out on the wire) */
2639 rxi_EncodePacketHeader(struct rx_packet *p)
2641 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2643 memset(buf, 0, RX_HEADER_SIZE);
2644 *buf++ = htonl(p->header.epoch);
2645 *buf++ = htonl(p->header.cid);
2646 *buf++ = htonl(p->header.callNumber);
2647 *buf++ = htonl(p->header.seq);
2648 *buf++ = htonl(p->header.serial);
2649 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2650 | (((afs_uint32) p->header.flags) << 16)
2651 | (p->header.userStatus << 8) | p->header.securityIndex);
2652 /* Note: top 16 bits of this next word were reserved */
2653 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2656 /* Decode the packet's header (from net byte order to a struct header) */
2658 rxi_DecodePacketHeader(struct rx_packet *p)
2660 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2663 p->header.epoch = ntohl(*buf);
2665 p->header.cid = ntohl(*buf);
2667 p->header.callNumber = ntohl(*buf);
2669 p->header.seq = ntohl(*buf);
2671 p->header.serial = ntohl(*buf);
2677 /* C will truncate byte fields to bytes for me */
2678 p->header.type = temp >> 24;
2679 p->header.flags = temp >> 16;
2680 p->header.userStatus = temp >> 8;
2681 p->header.securityIndex = temp >> 0;
2686 p->header.serviceId = (temp & 0xffff);
2687 p->header.spare = temp >> 16;
2688 /* Note: top 16 bits of this last word are the security checksum */
2692 * LOCKS HELD: called with call->lock held.
2694 * PrepareSendPacket is the only place in the code that
2695 * can increment call->tnext. This could become an atomic
2696 * in the future. Beyond that there is nothing in this
2697 * function that requires the call being locked. This
2698 * function can only be called by the application thread.
2701 rxi_PrepareSendPacket(struct rx_call *call,
2702 struct rx_packet *p, int last)
2704 struct rx_connection *conn = call->conn;
2705 afs_uint32 seq = call->tnext++;
2707 afs_int32 len; /* len must be a signed type; it can go negative */
2709 /* No data packets on call 0. Where do these come from? */
2710 if (*call->callNumber == 0)
2711 *call->callNumber = 1;
2713 MUTEX_EXIT(&call->lock);
2714 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2716 p->header.cid = (conn->cid | call->channel);
2717 p->header.serviceId = conn->serviceId;
2718 p->header.securityIndex = conn->securityIndex;
2720 p->header.callNumber = *call->callNumber;
2721 p->header.seq = seq;
2722 p->header.epoch = conn->epoch;
2723 p->header.type = RX_PACKET_TYPE_DATA;
2724 p->header.flags = 0;
2725 p->header.spare = 0;
2726 if (conn->type == RX_CLIENT_CONNECTION)
2727 p->header.flags |= RX_CLIENT_INITIATED;
2730 p->header.flags |= RX_LAST_PACKET;
2732 clock_Zero(&p->firstSent); /* Never yet transmitted */
2733 p->header.serial = 0; /* Another way of saying never transmitted... */
2735 /* Now that we're sure this is the last data on the call, make sure
2736 * that the "length" and the sum of the iov_lens matches. */
2737 len = p->length + call->conn->securityHeaderSize;
2739 for (i = 1; i < p->niovecs && len > 0; i++) {
2740 len -= p->wirevec[i].iov_len;
2743 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2744 } else if (i < p->niovecs) {
2745 /* Free any extra elements in the wirevec */
2746 #if defined(RX_ENABLE_TSFPQ)
2747 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2748 #else /* !RX_ENABLE_TSFPQ */
2749 MUTEX_ENTER(&rx_freePktQ_lock);
2750 rxi_FreeDataBufsNoLock(p, i);
2751 MUTEX_EXIT(&rx_freePktQ_lock);
2752 #endif /* !RX_ENABLE_TSFPQ */
2757 p->wirevec[i - 1].iov_len += len;
2758 MUTEX_ENTER(&call->lock);
2759 RXS_PreparePacket(conn->securityObject, call, p);
2762 /* Given an interface MTU size, calculate an adjusted MTU size that
2763 * will make efficient use of the RX buffers when the peer is sending
2764 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2766 rxi_AdjustIfMTU(int mtu)
2771 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2773 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2774 if (mtu <= adjMTU) {
2781 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2782 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2785 /* Given an interface MTU size, and the peer's advertised max receive
2786 * size, calculate an adjisted maxMTU size that makes efficient use
2787 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2789 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2791 int maxMTU = mtu * rxi_nSendFrags;
2792 maxMTU = MIN(maxMTU, peerMaxMTU);
2793 return rxi_AdjustIfMTU(maxMTU);
2796 /* Given a packet size, figure out how many datagram packet will fit.
2797 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2798 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2799 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2801 rxi_AdjustDgramPackets(int frags, int mtu)
2804 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2807 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2808 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2809 /* subtract the size of the first and last packets */
2810 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2814 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2819 * This function can be used by the Windows Cache Manager
2820 * to dump the list of all rx packets so that we can determine
2821 * where the packet leakage is.
2823 int rx_DumpPackets(FILE *outputFile, char *cookie)
2825 #ifdef RXDEBUG_PACKET
2826 struct rx_packet *p;
2830 #define RXDPRINTF sprintf
2831 #define RXDPRINTOUT output
2833 #define RXDPRINTF fprintf
2834 #define RXDPRINTOUT outputFile
2838 MUTEX_ENTER(&rx_freePktQ_lock);
2839 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2841 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2844 for (p = rx_mallocedP; p; p = p->allNextp) {
2845 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2846 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2847 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2848 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2849 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2850 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2852 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2856 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2858 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2861 MUTEX_EXIT(&rx_freePktQ_lock);
2863 #endif /* RXDEBUG_PACKET */