2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
98 /* rxdb_fileID is used to identify the lock location, along with line#. */
99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
100 #endif /* RX_LOCKS_DB */
101 struct rx_packet *rx_mallocedP = 0;
103 extern char cml_version_number[];
104 extern int (*rx_almostSent) ();
106 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
108 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
109 afs_int32 ahost, short aport,
112 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
114 struct rx_queue * q);
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 /* some rules about packets:
120 * 1. When a packet is allocated, the final iov_buf contains room for
121 * a security trailer, but iov_len masks that fact. If the security
122 * package wants to add the trailer, it may do so, and then extend
123 * iov_len appropriately. For this reason, packet's niovecs and
124 * iov_len fields should be accurate before calling PreparePacket.
128 * all packet buffers (iov_base) are integral multiples of
130 * offset is an integral multiple of the word size.
133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the word size.
151 * offset is an integral multiple of the word size.
154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
160 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
161 (offset - l))) = data;
164 l += packet->wirevec[i].iov_len;
171 * all packet buffers (iov_base) are integral multiples of the
173 * offset is an integral multiple of the word size.
175 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
181 unsigned int i, j, l, r;
182 for (l = 0, i = 1; i < packet->niovecs; i++) {
183 if (l + packet->wirevec[i].iov_len > offset) {
186 l += packet->wirevec[i].iov_len;
189 /* i is the iovec which contains the first little bit of data in which we
190 * are interested. l is the total length of everything prior to this iovec.
191 * j is the number of bytes we can safely copy out of this iovec.
192 * offset only applies to the first iovec.
195 while ((resid > 0) && (i < packet->niovecs)) {
196 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
197 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
200 l += packet->wirevec[i].iov_len;
205 return (resid ? (r - resid) : r);
210 * all packet buffers (iov_base) are integral multiples of the
212 * offset is an integral multiple of the word size.
215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
220 for (l = 0, i = 1; i < packet->niovecs; i++) {
221 if (l + packet->wirevec[i].iov_len > offset) {
224 l += packet->wirevec[i].iov_len;
227 /* i is the iovec which contains the first little bit of data in which we
228 * are interested. l is the total length of everything prior to this iovec.
229 * j is the number of bytes we can safely copy out of this iovec.
230 * offset only applies to the first iovec.
233 while ((resid > 0) && (i < RX_MAXWVECS)) {
234 if (i >= packet->niovecs)
235 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
238 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
239 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
243 l += packet->wirevec[i].iov_len;
248 return (resid ? (r - resid) : r);
252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
254 register struct rx_packet *p, *np;
256 num_pkts = AllocPacketBufs(class, num_pkts, q);
258 for (queue_Scan(q, p, np, rx_packet)) {
259 RX_PACKET_IOV_FULLINIT(p);
265 #ifdef RX_ENABLE_TSFPQ
267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
269 register struct rx_ts_info_t * rx_ts_info;
273 RX_TS_INFO_GET(rx_ts_info);
275 transfer = num_pkts - rx_ts_info->_FPQ.len;
278 MUTEX_ENTER(&rx_freePktQ_lock);
280 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
281 transfer += rx_TSFPQGlobSize;
282 } else if (transfer <= rx_nFreePackets) {
283 transfer = rx_nFreePackets;
285 /* alloc enough for us, plus a few globs for other threads */
286 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
287 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
288 transfer += rx_TSFPQGlobSize;
291 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
293 MUTEX_EXIT(&rx_freePktQ_lock);
297 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
301 #else /* RX_ENABLE_TSFPQ */
303 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
311 MUTEX_ENTER(&rx_freePktQ_lock);
314 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
315 num_pkts--, overq++);
318 rxi_NeedMorePackets = TRUE;
320 case RX_PACKET_CLASS_RECEIVE:
321 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
323 case RX_PACKET_CLASS_SEND:
324 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SPECIAL:
327 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
329 case RX_PACKET_CLASS_RECV_CBUF:
330 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
332 case RX_PACKET_CLASS_SEND_CBUF:
333 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
338 if (rx_nFreePackets < num_pkts)
339 num_pkts = rx_nFreePackets;
342 rxi_NeedMorePackets = TRUE;
346 if (rx_nFreePackets < num_pkts) {
347 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
351 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
353 i++, c=queue_Next(c, rx_packet)) {
357 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
359 rx_nFreePackets -= num_pkts;
364 MUTEX_EXIT(&rx_freePktQ_lock);
369 #endif /* RX_ENABLE_TSFPQ */
372 * Free a packet currently used as a continuation buffer
374 #ifdef RX_ENABLE_TSFPQ
375 /* num_pkts=0 means queue length is unknown */
377 rxi_FreePackets(int num_pkts, struct rx_queue * q)
379 register struct rx_ts_info_t * rx_ts_info;
380 register struct rx_packet *c, *nc;
383 osi_Assert(num_pkts >= 0);
384 RX_TS_INFO_GET(rx_ts_info);
387 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
388 rxi_FreeDataBufsTSFPQ(c, 2, 0);
391 for (queue_Scan(q, c, nc, rx_packet)) {
392 rxi_FreeDataBufsTSFPQ(c, 2, 0);
397 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
400 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
402 MUTEX_ENTER(&rx_freePktQ_lock);
404 RX_TS_FPQ_LTOG(rx_ts_info);
406 /* Wakeup anyone waiting for packets */
409 MUTEX_EXIT(&rx_freePktQ_lock);
415 #else /* RX_ENABLE_TSFPQ */
416 /* num_pkts=0 means queue length is unknown */
418 rxi_FreePackets(int num_pkts, struct rx_queue *q)
421 register struct rx_packet *p, *np;
425 osi_Assert(num_pkts >= 0);
429 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
430 if (p->niovecs > 2) {
431 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
438 for (queue_Scan(q, p, np, rx_packet)) {
439 if (p->niovecs > 2) {
440 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
447 queue_SpliceAppend(q, &cbs);
453 MUTEX_ENTER(&rx_freePktQ_lock);
455 queue_SpliceAppend(&rx_freePacketQueue, q);
456 rx_nFreePackets += qlen;
458 /* Wakeup anyone waiting for packets */
461 MUTEX_EXIT(&rx_freePktQ_lock);
466 #endif /* RX_ENABLE_TSFPQ */
468 /* this one is kind of awful.
469 * In rxkad, the packet has been all shortened, and everything, ready for
470 * sending. All of a sudden, we discover we need some of that space back.
471 * This isn't terribly general, because it knows that the packets are only
472 * rounded up to the EBS (userdata + security header).
475 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
479 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
480 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
481 p->wirevec[i].iov_len += nb;
485 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
486 p->wirevec[i].iov_len += nb;
494 /* get sufficient space to store nb bytes of data (or more), and hook
495 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
496 * returns the number of bytes >0 which it failed to come up with.
497 * Don't need to worry about locking on packet, since only
498 * one thread can manipulate one at a time. Locking on continution
499 * packets is handled by AllocPacketBufs */
500 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
502 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
506 register struct rx_packet *cb, *ncb;
508 /* compute the number of cbuf's we need */
509 nv = nb / RX_CBUFFERSIZE;
510 if ((nv * RX_CBUFFERSIZE) < nb)
512 if ((nv + p->niovecs) > RX_MAXWVECS)
513 nv = RX_MAXWVECS - p->niovecs;
517 /* allocate buffers */
519 nv = AllocPacketBufs(class, nv, &q);
521 /* setup packet iovs */
522 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
524 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
525 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
528 nb -= (nv * RX_CBUFFERSIZE);
529 p->length += (nv * RX_CBUFFERSIZE);
535 /* Add more packet buffers */
536 #ifdef RX_ENABLE_TSFPQ
538 rxi_MorePackets(int apackets)
540 struct rx_packet *p, *e;
541 register struct rx_ts_info_t * rx_ts_info;
545 getme = apackets * sizeof(struct rx_packet);
546 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
548 PIN(p, getme); /* XXXXX */
549 memset((char *)p, 0, getme);
550 RX_TS_INFO_GET(rx_ts_info);
552 for (e = p + apackets; p < e; p++) {
553 RX_PACKET_IOV_INIT(p);
556 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
558 rx_ts_info->_FPQ.delta += apackets;
560 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
562 MUTEX_ENTER(&rx_freePktQ_lock);
564 RX_TS_FPQ_LTOG(rx_ts_info);
565 rxi_NeedMorePackets = FALSE;
568 MUTEX_EXIT(&rx_freePktQ_lock);
572 #else /* RX_ENABLE_TSFPQ */
574 rxi_MorePackets(int apackets)
576 struct rx_packet *p, *e;
580 getme = apackets * sizeof(struct rx_packet);
581 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
583 PIN(p, getme); /* XXXXX */
584 memset((char *)p, 0, getme);
586 MUTEX_ENTER(&rx_freePktQ_lock);
588 for (e = p + apackets; p < e; p++) {
589 RX_PACKET_IOV_INIT(p);
590 p->flags |= RX_PKTFLAG_FREE;
593 queue_Append(&rx_freePacketQueue, p);
595 rx_nFreePackets += apackets;
596 rxi_NeedMorePackets = FALSE;
599 MUTEX_EXIT(&rx_freePktQ_lock);
602 #endif /* RX_ENABLE_TSFPQ */
604 #ifdef RX_ENABLE_TSFPQ
606 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
608 struct rx_packet *p, *e;
609 register struct rx_ts_info_t * rx_ts_info;
613 getme = apackets * sizeof(struct rx_packet);
614 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
616 PIN(p, getme); /* XXXXX */
617 memset((char *)p, 0, getme);
618 RX_TS_INFO_GET(rx_ts_info);
620 for (e = p + apackets; p < e; p++) {
621 RX_PACKET_IOV_INIT(p);
624 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
626 rx_ts_info->_FPQ.delta += apackets;
629 (num_keep_local < apackets)) {
631 MUTEX_ENTER(&rx_freePktQ_lock);
633 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
634 rxi_NeedMorePackets = FALSE;
637 MUTEX_EXIT(&rx_freePktQ_lock);
641 #endif /* RX_ENABLE_TSFPQ */
644 /* Add more packet buffers */
646 rxi_MorePacketsNoLock(int apackets)
648 struct rx_packet *p, *e;
651 /* allocate enough packets that 1/4 of the packets will be able
652 * to hold maximal amounts of data */
653 apackets += (apackets / 4)
654 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
655 getme = apackets * sizeof(struct rx_packet);
656 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
658 memset((char *)p, 0, getme);
660 for (e = p + apackets; p < e; p++) {
661 RX_PACKET_IOV_INIT(p);
662 p->flags |= RX_PKTFLAG_FREE;
665 queue_Append(&rx_freePacketQueue, p);
668 rx_nFreePackets += apackets;
669 #ifdef RX_ENABLE_TSFPQ
670 /* TSFPQ patch also needs to keep track of total packets */
671 MUTEX_ENTER(&rx_stats_mutex);
672 rx_nPackets += apackets;
673 RX_TS_FPQ_COMPUTE_LIMITS;
674 MUTEX_EXIT(&rx_stats_mutex);
675 #endif /* RX_ENABLE_TSFPQ */
676 rxi_NeedMorePackets = FALSE;
682 rxi_FreeAllPackets(void)
684 /* must be called at proper interrupt level, etcetera */
685 /* MTUXXX need to free all Packets */
686 osi_Free(rx_mallocedP,
687 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
688 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
691 #ifdef RX_ENABLE_TSFPQ
693 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
695 register struct rx_ts_info_t * rx_ts_info;
699 RX_TS_INFO_GET(rx_ts_info);
701 if (num_keep_local != rx_ts_info->_FPQ.len) {
703 MUTEX_ENTER(&rx_freePktQ_lock);
704 if (num_keep_local < rx_ts_info->_FPQ.len) {
705 xfer = rx_ts_info->_FPQ.len - num_keep_local;
706 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
709 xfer = num_keep_local - rx_ts_info->_FPQ.len;
710 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
711 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
712 if (rx_nFreePackets < xfer) {
713 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
715 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
717 MUTEX_EXIT(&rx_freePktQ_lock);
723 rxi_FlushLocalPacketsTSFPQ(void)
725 rxi_AdjustLocalPacketsTSFPQ(0, 0);
727 #endif /* RX_ENABLE_TSFPQ */
729 /* Allocate more packets iff we need more continuation buffers */
730 /* In kernel, can't page in memory with interrupts disabled, so we
731 * don't use the event mechanism. */
733 rx_CheckPackets(void)
735 if (rxi_NeedMorePackets) {
736 rxi_MorePackets(rx_initSendWindow);
740 /* In the packet freeing routine below, the assumption is that
741 we want all of the packets to be used equally frequently, so that we
742 don't get packet buffers paging out. It would be just as valid to
743 assume that we DO want them to page out if not many are being used.
744 In any event, we assume the former, and append the packets to the end
746 /* This explanation is bogus. The free list doesn't remain in any kind of
747 useful order for afs_int32: the packets in use get pretty much randomly scattered
748 across all the pages. In order to permit unused {packets,bufs} to page out, they
749 must be stored so that packets which are adjacent in memory are adjacent in the
750 free list. An array springs rapidly to mind.
753 /* Actually free the packet p. */
754 #ifdef RX_ENABLE_TSFPQ
756 rxi_FreePacketNoLock(struct rx_packet *p)
758 register struct rx_ts_info_t * rx_ts_info;
759 dpf(("Free %lx\n", (unsigned long)p));
761 RX_TS_INFO_GET(rx_ts_info);
762 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
763 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
764 RX_TS_FPQ_LTOG(rx_ts_info);
767 #else /* RX_ENABLE_TSFPQ */
769 rxi_FreePacketNoLock(struct rx_packet *p)
771 dpf(("Free %lx\n", (unsigned long)p));
775 queue_Append(&rx_freePacketQueue, p);
777 #endif /* RX_ENABLE_TSFPQ */
779 #ifdef RX_ENABLE_TSFPQ
781 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
783 register struct rx_ts_info_t * rx_ts_info;
784 dpf(("Free %lx\n", (unsigned long)p));
786 RX_TS_INFO_GET(rx_ts_info);
787 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
789 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
791 MUTEX_ENTER(&rx_freePktQ_lock);
793 RX_TS_FPQ_LTOG(rx_ts_info);
795 /* Wakeup anyone waiting for packets */
798 MUTEX_EXIT(&rx_freePktQ_lock);
802 #endif /* RX_ENABLE_TSFPQ */
805 * free continuation buffers off a packet into a queue
807 * [IN] p -- packet from which continuation buffers will be freed
808 * [IN] first -- iovec offset of first continuation buffer to free
809 * [IN] q -- queue into which continuation buffers will be chained
812 * number of continuation buffers freed
815 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
818 struct rx_packet * cb;
821 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
822 iov = &p->wirevec[first];
824 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
825 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
826 RX_FPQ_MARK_FREE(cb);
836 * free packet continuation buffers into the global free packet pool
838 * [IN] p -- packet from which to free continuation buffers
839 * [IN] first -- iovec offset of first continuation buffer to free
845 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
849 for (first = MAX(2, first); first < p->niovecs; first++) {
850 iov = &p->wirevec[first];
852 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
853 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
861 #ifdef RX_ENABLE_TSFPQ
863 * free packet continuation buffers into the thread-local free pool
865 * [IN] p -- packet from which continuation buffers will be freed
866 * [IN] first -- iovec offset of first continuation buffer to free
867 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
868 * global free pool before returning
874 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
877 register struct rx_ts_info_t * rx_ts_info;
879 RX_TS_INFO_GET(rx_ts_info);
881 for (first = MAX(2, first); first < p->niovecs; first++) {
882 iov = &p->wirevec[first];
884 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
885 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
890 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
892 MUTEX_ENTER(&rx_freePktQ_lock);
894 RX_TS_FPQ_LTOG(rx_ts_info);
896 /* Wakeup anyone waiting for packets */
899 MUTEX_EXIT(&rx_freePktQ_lock);
904 #endif /* RX_ENABLE_TSFPQ */
906 int rxi_nBadIovecs = 0;
908 /* rxi_RestoreDataBufs
910 * Restore the correct sizes to the iovecs. Called when reusing a packet
911 * for reading off the wire.
914 rxi_RestoreDataBufs(struct rx_packet *p)
917 struct iovec *iov = &p->wirevec[2];
919 RX_PACKET_IOV_INIT(p);
921 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
922 if (!iov->iov_base) {
927 iov->iov_len = RX_CBUFFERSIZE;
931 #ifdef RX_ENABLE_TSFPQ
933 rxi_TrimDataBufs(struct rx_packet *p, int first)
936 struct iovec *iov, *end;
937 register struct rx_ts_info_t * rx_ts_info;
941 osi_Panic("TrimDataBufs 1: first must be 1");
943 /* Skip over continuation buffers containing message data */
944 iov = &p->wirevec[2];
945 end = iov + (p->niovecs - 2);
946 length = p->length - p->wirevec[1].iov_len;
947 for (; iov < end && length > 0; iov++) {
949 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
950 length -= iov->iov_len;
953 /* iov now points to the first empty data buffer. */
957 RX_TS_INFO_GET(rx_ts_info);
958 for (; iov < end; iov++) {
960 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
961 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
964 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
966 MUTEX_ENTER(&rx_freePktQ_lock);
968 RX_TS_FPQ_LTOG(rx_ts_info);
971 MUTEX_EXIT(&rx_freePktQ_lock);
977 #else /* RX_ENABLE_TSFPQ */
979 rxi_TrimDataBufs(struct rx_packet *p, int first)
982 struct iovec *iov, *end;
986 osi_Panic("TrimDataBufs 1: first must be 1");
988 /* Skip over continuation buffers containing message data */
989 iov = &p->wirevec[2];
990 end = iov + (p->niovecs - 2);
991 length = p->length - p->wirevec[1].iov_len;
992 for (; iov < end && length > 0; iov++) {
994 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
995 length -= iov->iov_len;
998 /* iov now points to the first empty data buffer. */
1003 MUTEX_ENTER(&rx_freePktQ_lock);
1005 for (; iov < end; iov++) {
1007 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1008 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1011 rxi_PacketsUnWait();
1013 MUTEX_EXIT(&rx_freePktQ_lock);
1018 #endif /* RX_ENABLE_TSFPQ */
1020 /* Free the packet p. P is assumed not to be on any queue, i.e.
1021 * remove it yourself first if you call this routine. */
1022 #ifdef RX_ENABLE_TSFPQ
1024 rxi_FreePacket(struct rx_packet *p)
1026 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1027 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1029 #else /* RX_ENABLE_TSFPQ */
1031 rxi_FreePacket(struct rx_packet *p)
1036 MUTEX_ENTER(&rx_freePktQ_lock);
1038 rxi_FreeDataBufsNoLock(p, 2);
1039 rxi_FreePacketNoLock(p);
1040 /* Wakeup anyone waiting for packets */
1041 rxi_PacketsUnWait();
1043 MUTEX_EXIT(&rx_freePktQ_lock);
1046 #endif /* RX_ENABLE_TSFPQ */
1048 /* rxi_AllocPacket sets up p->length so it reflects the number of
1049 * bytes in the packet at this point, **not including** the header.
1050 * The header is absolutely necessary, besides, this is the way the
1051 * length field is usually used */
1052 #ifdef RX_ENABLE_TSFPQ
1054 rxi_AllocPacketNoLock(int class)
1056 register struct rx_packet *p;
1057 register struct rx_ts_info_t * rx_ts_info;
1059 RX_TS_INFO_GET(rx_ts_info);
1062 if (rxi_OverQuota(class)) {
1063 rxi_NeedMorePackets = TRUE;
1065 case RX_PACKET_CLASS_RECEIVE:
1066 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1068 case RX_PACKET_CLASS_SEND:
1069 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1071 case RX_PACKET_CLASS_SPECIAL:
1072 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1074 case RX_PACKET_CLASS_RECV_CBUF:
1075 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1077 case RX_PACKET_CLASS_SEND_CBUF:
1078 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1081 return (struct rx_packet *)0;
1085 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1086 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1089 if (queue_IsEmpty(&rx_freePacketQueue))
1090 osi_Panic("rxi_AllocPacket error");
1092 if (queue_IsEmpty(&rx_freePacketQueue))
1093 rxi_MorePacketsNoLock(rx_initSendWindow);
1097 RX_TS_FPQ_GTOL(rx_ts_info);
1100 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1102 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1105 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1106 * order to truncate outbound packets. In the near future, may need
1107 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1109 RX_PACKET_IOV_FULLINIT(p);
1112 #else /* RX_ENABLE_TSFPQ */
1114 rxi_AllocPacketNoLock(int class)
1116 register struct rx_packet *p;
1119 if (rxi_OverQuota(class)) {
1120 rxi_NeedMorePackets = TRUE;
1122 case RX_PACKET_CLASS_RECEIVE:
1123 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1125 case RX_PACKET_CLASS_SEND:
1126 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1128 case RX_PACKET_CLASS_SPECIAL:
1129 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1131 case RX_PACKET_CLASS_RECV_CBUF:
1132 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1134 case RX_PACKET_CLASS_SEND_CBUF:
1135 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1138 return (struct rx_packet *)0;
1142 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1145 if (queue_IsEmpty(&rx_freePacketQueue))
1146 osi_Panic("rxi_AllocPacket error");
1148 if (queue_IsEmpty(&rx_freePacketQueue))
1149 rxi_MorePacketsNoLock(rx_initSendWindow);
1153 p = queue_First(&rx_freePacketQueue, rx_packet);
1155 RX_FPQ_MARK_USED(p);
1157 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1160 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1161 * order to truncate outbound packets. In the near future, may need
1162 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1164 RX_PACKET_IOV_FULLINIT(p);
1167 #endif /* RX_ENABLE_TSFPQ */
1169 #ifdef RX_ENABLE_TSFPQ
1171 rxi_AllocPacketTSFPQ(int class, int pull_global)
1173 register struct rx_packet *p;
1174 register struct rx_ts_info_t * rx_ts_info;
1176 RX_TS_INFO_GET(rx_ts_info);
1178 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1179 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1180 MUTEX_ENTER(&rx_freePktQ_lock);
1182 if (queue_IsEmpty(&rx_freePacketQueue))
1183 rxi_MorePacketsNoLock(rx_initSendWindow);
1185 RX_TS_FPQ_GTOL(rx_ts_info);
1187 MUTEX_EXIT(&rx_freePktQ_lock);
1188 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1192 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1194 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1196 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1197 * order to truncate outbound packets. In the near future, may need
1198 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1200 RX_PACKET_IOV_FULLINIT(p);
1203 #endif /* RX_ENABLE_TSFPQ */
1205 #ifdef RX_ENABLE_TSFPQ
1207 rxi_AllocPacket(int class)
1209 register struct rx_packet *p;
1211 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1214 #else /* RX_ENABLE_TSFPQ */
1216 rxi_AllocPacket(int class)
1218 register struct rx_packet *p;
1220 MUTEX_ENTER(&rx_freePktQ_lock);
1221 p = rxi_AllocPacketNoLock(class);
1222 MUTEX_EXIT(&rx_freePktQ_lock);
1225 #endif /* RX_ENABLE_TSFPQ */
1227 /* This guy comes up with as many buffers as it {takes,can get} given
1228 * the MTU for this call. It also sets the packet length before
1229 * returning. caution: this is often called at NETPRI
1230 * Called with call locked.
1233 rxi_AllocSendPacket(register struct rx_call *call, int want)
1235 register struct rx_packet *p = (struct rx_packet *)0;
1237 register unsigned delta;
1240 mud = call->MTU - RX_HEADER_SIZE;
1242 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1243 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1245 #ifdef RX_ENABLE_TSFPQ
1246 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1248 want = MIN(want, mud);
1250 if ((unsigned)want > p->length)
1251 (void)rxi_AllocDataBuf(p, (want - p->length),
1252 RX_PACKET_CLASS_SEND_CBUF);
1254 if ((unsigned)p->length > mud)
1257 if (delta >= p->length) {
1265 #endif /* RX_ENABLE_TSFPQ */
1267 while (!(call->error)) {
1268 MUTEX_ENTER(&rx_freePktQ_lock);
1269 /* if an error occurred, or we get the packet we want, we're done */
1270 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1271 MUTEX_EXIT(&rx_freePktQ_lock);
1274 want = MIN(want, mud);
1276 if ((unsigned)want > p->length)
1277 (void)rxi_AllocDataBuf(p, (want - p->length),
1278 RX_PACKET_CLASS_SEND_CBUF);
1280 if ((unsigned)p->length > mud)
1283 if (delta >= p->length) {
1292 /* no error occurred, and we didn't get a packet, so we sleep.
1293 * At this point, we assume that packets will be returned
1294 * sooner or later, as packets are acknowledged, and so we
1297 call->flags |= RX_CALL_WAIT_PACKETS;
1298 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1299 MUTEX_EXIT(&call->lock);
1300 rx_waitingForPackets = 1;
1302 #ifdef RX_ENABLE_LOCKS
1303 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1305 osi_rxSleep(&rx_waitingForPackets);
1307 MUTEX_EXIT(&rx_freePktQ_lock);
1308 MUTEX_ENTER(&call->lock);
1309 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1310 call->flags &= ~RX_CALL_WAIT_PACKETS;
1319 /* Windows does not use file descriptors. */
1320 #define CountFDs(amax) 0
1322 /* count the number of used FDs */
1324 CountFDs(register int amax)
1327 register int i, code;
1331 for (i = 0; i < amax; i++) {
1332 code = fstat(i, &tstat);
1338 #endif /* AFS_NT40_ENV */
1341 #define CountFDs(amax) amax
1345 #if !defined(KERNEL) || defined(UKERNEL)
1347 /* This function reads a single packet from the interface into the
1348 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1349 * (host,port) of the sender are stored in the supplied variables, and
1350 * the data length of the packet is stored in the packet structure.
1351 * The header is decoded. */
1353 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1356 struct sockaddr_in from;
1359 register afs_int32 tlen, savelen;
1361 rx_computelen(p, tlen);
1362 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1364 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1365 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1366 * it once in order to avoid races. */
1369 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1377 /* Extend the last iovec for padding, it's just to make sure that the
1378 * read doesn't return more data than we expect, and is done to get around
1379 * our problems caused by the lack of a length field in the rx header.
1380 * Use the extra buffer that follows the localdata in each packet
1382 savelen = p->wirevec[p->niovecs - 1].iov_len;
1383 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1385 memset((char *)&msg, 0, sizeof(msg));
1386 msg.msg_name = (char *)&from;
1387 msg.msg_namelen = sizeof(struct sockaddr_in);
1388 msg.msg_iov = p->wirevec;
1389 msg.msg_iovlen = p->niovecs;
1390 nbytes = rxi_Recvmsg(socket, &msg, 0);
1392 /* restore the vec to its correct state */
1393 p->wirevec[p->niovecs - 1].iov_len = savelen;
1395 p->length = (nbytes - RX_HEADER_SIZE);
1396 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1397 if (nbytes < 0 && errno == EWOULDBLOCK) {
1398 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1399 } else if (nbytes <= 0) {
1400 MUTEX_ENTER(&rx_stats_mutex);
1401 rx_stats.bogusPacketOnRead++;
1402 rx_stats.bogusHost = from.sin_addr.s_addr;
1403 MUTEX_EXIT(&rx_stats_mutex);
1404 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1405 ntohs(from.sin_port), nbytes));
1410 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1411 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1412 rxi_DecodePacketHeader(p);
1414 *host = from.sin_addr.s_addr;
1415 *port = from.sin_port;
1417 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1418 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1419 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1421 rxi_TrimDataBufs(p, 1);
1426 /* Extract packet header. */
1427 rxi_DecodePacketHeader(p);
1429 *host = from.sin_addr.s_addr;
1430 *port = from.sin_port;
1431 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1432 struct rx_peer *peer;
1433 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1435 * Try to look up this peer structure. If it doesn't exist,
1436 * don't create a new one -
1437 * we don't keep count of the bytes sent/received if a peer
1438 * structure doesn't already exist.
1440 * The peer/connection cleanup code assumes that there is 1 peer
1441 * per connection. If we actually created a peer structure here
1442 * and this packet was an rxdebug packet, the peer structure would
1443 * never be cleaned up.
1445 peer = rxi_FindPeer(*host, *port, 0, 0);
1446 /* Since this may not be associated with a connection,
1447 * it may have no refCount, meaning we could race with
1450 if (peer && (peer->refCount > 0)) {
1451 MUTEX_ENTER(&peer->peer_lock);
1452 hadd32(peer->bytesReceived, p->length);
1453 MUTEX_EXIT(&peer->peer_lock);
1457 /* Free any empty packet buffers at the end of this packet */
1458 rxi_TrimDataBufs(p, 1);
1464 #endif /* !KERNEL || UKERNEL */
1466 /* This function splits off the first packet in a jumbo packet.
1467 * As of AFS 3.5, jumbograms contain more than one fixed size
1468 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1469 * last packet header. All packets (except the last) are padded to
1470 * fall on RX_CBUFFERSIZE boundaries.
1471 * HACK: We store the length of the first n-1 packets in the
1472 * last two pad bytes. */
1475 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1478 struct rx_packet *np;
1479 struct rx_jumboHeader *jp;
1485 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1486 * bytes in length. All but the first packet are preceded by
1487 * an abbreviated four byte header. The length of the last packet
1488 * is calculated from the size of the jumbogram. */
1489 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1491 if ((int)p->length < length) {
1492 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1495 niov = p->niovecs - 2;
1497 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1500 iov = &p->wirevec[2];
1501 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1503 /* Get a pointer to the abbreviated packet header */
1504 jp = (struct rx_jumboHeader *)
1505 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1507 /* Set up the iovecs for the next packet */
1508 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1509 np->wirevec[0].iov_len = sizeof(struct rx_header);
1510 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1511 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1512 np->niovecs = niov + 1;
1513 for (i = 2, iov++; i <= niov; i++, iov++) {
1514 np->wirevec[i] = *iov;
1516 np->length = p->length - length;
1517 p->length = RX_JUMBOBUFFERSIZE;
1520 /* Convert the jumbo packet header to host byte order */
1521 temp = ntohl(*(afs_uint32 *) jp);
1522 jp->flags = (u_char) (temp >> 24);
1523 jp->cksum = (u_short) (temp);
1525 /* Fill in the packet header */
1526 np->header = p->header;
1527 np->header.serial = p->header.serial + 1;
1528 np->header.seq = p->header.seq + 1;
1529 np->header.flags = jp->flags;
1530 np->header.spare = jp->cksum;
1536 /* Send a udp datagram */
1538 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1539 int length, int istack)
1544 memset(&msg, 0, sizeof(msg));
1546 msg.msg_iovlen = nvecs;
1547 msg.msg_name = addr;
1548 msg.msg_namelen = sizeof(struct sockaddr_in);
1550 ret = rxi_Sendmsg(socket, &msg, 0);
1554 #elif !defined(UKERNEL)
1556 * message receipt is done in rxk_input or rx_put.
1559 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1561 * Copy an mblock to the contiguous area pointed to by cp.
1562 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1563 * but it doesn't really.
1564 * Returns the number of bytes not transferred.
1565 * The message is NOT changed.
1568 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1572 for (; mp && len > 0; mp = mp->b_cont) {
1573 if (mp->b_datap->db_type != M_DATA) {
1576 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1577 memcpy(cp, (char *)mp->b_rptr, n);
1585 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1586 * but it doesn't really.
1587 * This sucks, anyway, do it like m_cpy.... below
1590 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1593 register int m, n, o, t, i;
1595 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1596 if (mp->b_datap->db_type != M_DATA) {
1599 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1605 t = iovs[i].iov_len;
1608 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1618 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1619 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1621 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1623 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1626 unsigned int l1, l2, i, t;
1628 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1629 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1632 if (m->m_len <= off) {
1642 p1 = mtod(m, caddr_t) + off;
1643 l1 = m->m_len - off;
1645 p2 = iovs[0].iov_base;
1646 l2 = iovs[0].iov_len;
1649 t = MIN(l1, MIN(l2, (unsigned int)len));
1660 p1 = mtod(m, caddr_t);
1666 p2 = iovs[i].iov_base;
1667 l2 = iovs[i].iov_len;
1675 #endif /* AFS_SUN5_ENV */
1677 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1679 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1680 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1686 struct rx_packet *phandle;
1687 int hdr_len, data_len;
1692 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1699 #endif /*KERNEL && !UKERNEL */
1702 /* send a response to a debug packet */
1705 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1706 afs_int32 ahost, short aport, int istack)
1708 struct rx_debugIn tin;
1710 struct rx_serverQueueEntry *np, *nqe;
1713 * Only respond to client-initiated Rx debug packets,
1714 * and clear the client flag in the response.
1716 if (ap->header.flags & RX_CLIENT_INITIATED) {
1717 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1718 rxi_EncodePacketHeader(ap);
1723 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1724 /* all done with packet, now set length to the truth, so we can
1725 * reuse this packet */
1726 rx_computelen(ap, ap->length);
1728 tin.type = ntohl(tin.type);
1729 tin.index = ntohl(tin.index);
1731 case RX_DEBUGI_GETSTATS:{
1732 struct rx_debugStats tstat;
1734 /* get basic stats */
1735 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1736 tstat.version = RX_DEBUGI_VERSION;
1737 #ifndef RX_ENABLE_LOCKS
1738 tstat.waitingForPackets = rx_waitingForPackets;
1740 MUTEX_ENTER(&rx_serverPool_lock);
1741 tstat.nFreePackets = htonl(rx_nFreePackets);
1742 tstat.callsExecuted = htonl(rxi_nCalls);
1743 tstat.packetReclaims = htonl(rx_packetReclaims);
1744 tstat.usedFDs = CountFDs(64);
1745 tstat.nWaiting = htonl(rx_nWaiting);
1746 tstat.nWaited = htonl(rx_nWaited);
1747 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1749 MUTEX_EXIT(&rx_serverPool_lock);
1750 tstat.idleThreads = htonl(tstat.idleThreads);
1751 tl = sizeof(struct rx_debugStats) - ap->length;
1753 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1756 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1758 ap->length = sizeof(struct rx_debugStats);
1759 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1760 rx_computelen(ap, ap->length);
1765 case RX_DEBUGI_GETALLCONN:
1766 case RX_DEBUGI_GETCONN:{
1768 register struct rx_connection *tc;
1769 struct rx_call *tcall;
1770 struct rx_debugConn tconn;
1771 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1774 tl = sizeof(struct rx_debugConn) - ap->length;
1776 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1780 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1781 /* get N'th (maybe) "interesting" connection info */
1782 for (i = 0; i < rx_hashTableSize; i++) {
1783 #if !defined(KERNEL)
1784 /* the time complexity of the algorithm used here
1785 * exponentially increses with the number of connections.
1787 #ifdef AFS_PTHREAD_ENV
1793 MUTEX_ENTER(&rx_connHashTable_lock);
1794 /* We might be slightly out of step since we are not
1795 * locking each call, but this is only debugging output.
1797 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1798 if ((all || rxi_IsConnInteresting(tc))
1799 && tin.index-- <= 0) {
1800 tconn.host = tc->peer->host;
1801 tconn.port = tc->peer->port;
1802 tconn.cid = htonl(tc->cid);
1803 tconn.epoch = htonl(tc->epoch);
1804 tconn.serial = htonl(tc->serial);
1805 for (j = 0; j < RX_MAXCALLS; j++) {
1806 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1807 if ((tcall = tc->call[j])) {
1808 tconn.callState[j] = tcall->state;
1809 tconn.callMode[j] = tcall->mode;
1810 tconn.callFlags[j] = tcall->flags;
1811 if (queue_IsNotEmpty(&tcall->rq))
1812 tconn.callOther[j] |= RX_OTHER_IN;
1813 if (queue_IsNotEmpty(&tcall->tq))
1814 tconn.callOther[j] |= RX_OTHER_OUT;
1816 tconn.callState[j] = RX_STATE_NOTINIT;
1819 tconn.natMTU = htonl(tc->peer->natMTU);
1820 tconn.error = htonl(tc->error);
1821 tconn.flags = tc->flags;
1822 tconn.type = tc->type;
1823 tconn.securityIndex = tc->securityIndex;
1824 if (tc->securityObject) {
1825 RXS_GetStats(tc->securityObject, tc,
1827 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1828 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1831 DOHTONL(packetsReceived);
1832 DOHTONL(packetsSent);
1833 DOHTONL(bytesReceived);
1837 sizeof(tconn.secStats.spares) /
1842 sizeof(tconn.secStats.sparel) /
1843 sizeof(afs_int32); i++)
1847 MUTEX_EXIT(&rx_connHashTable_lock);
1848 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1851 ap->length = sizeof(struct rx_debugConn);
1852 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1858 MUTEX_EXIT(&rx_connHashTable_lock);
1860 /* if we make it here, there are no interesting packets */
1861 tconn.cid = htonl(0xffffffff); /* means end */
1862 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1865 ap->length = sizeof(struct rx_debugConn);
1866 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1872 * Pass back all the peer structures we have available
1875 case RX_DEBUGI_GETPEER:{
1877 register struct rx_peer *tp;
1878 struct rx_debugPeer tpeer;
1881 tl = sizeof(struct rx_debugPeer) - ap->length;
1883 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1887 memset((char *)&tpeer, 0, sizeof(tpeer));
1888 for (i = 0; i < rx_hashTableSize; i++) {
1889 #if !defined(KERNEL)
1890 /* the time complexity of the algorithm used here
1891 * exponentially increses with the number of peers.
1893 * Yielding after processing each hash table entry
1894 * and dropping rx_peerHashTable_lock.
1895 * also increases the risk that we will miss a new
1896 * entry - but we are willing to live with this
1897 * limitation since this is meant for debugging only
1899 #ifdef AFS_PTHREAD_ENV
1905 MUTEX_ENTER(&rx_peerHashTable_lock);
1906 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1907 if (tin.index-- <= 0) {
1908 tpeer.host = tp->host;
1909 tpeer.port = tp->port;
1910 tpeer.ifMTU = htons(tp->ifMTU);
1911 tpeer.idleWhen = htonl(tp->idleWhen);
1912 tpeer.refCount = htons(tp->refCount);
1913 tpeer.burstSize = tp->burstSize;
1914 tpeer.burst = tp->burst;
1915 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1916 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1917 tpeer.rtt = htonl(tp->rtt);
1918 tpeer.rtt_dev = htonl(tp->rtt_dev);
1919 tpeer.timeout.sec = htonl(tp->timeout.sec);
1920 tpeer.timeout.usec = htonl(tp->timeout.usec);
1921 tpeer.nSent = htonl(tp->nSent);
1922 tpeer.reSends = htonl(tp->reSends);
1923 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1924 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1925 tpeer.rateFlag = htonl(tp->rateFlag);
1926 tpeer.natMTU = htons(tp->natMTU);
1927 tpeer.maxMTU = htons(tp->maxMTU);
1928 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1929 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1930 tpeer.MTU = htons(tp->MTU);
1931 tpeer.cwind = htons(tp->cwind);
1932 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1933 tpeer.congestSeq = htons(tp->congestSeq);
1934 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1935 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1936 tpeer.bytesReceived.high =
1937 htonl(tp->bytesReceived.high);
1938 tpeer.bytesReceived.low =
1939 htonl(tp->bytesReceived.low);
1941 MUTEX_EXIT(&rx_peerHashTable_lock);
1942 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1945 ap->length = sizeof(struct rx_debugPeer);
1946 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1952 MUTEX_EXIT(&rx_peerHashTable_lock);
1954 /* if we make it here, there are no interesting packets */
1955 tpeer.host = htonl(0xffffffff); /* means end */
1956 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1959 ap->length = sizeof(struct rx_debugPeer);
1960 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1965 case RX_DEBUGI_RXSTATS:{
1969 tl = sizeof(rx_stats) - ap->length;
1971 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1975 /* Since its all int32s convert to network order with a loop. */
1976 MUTEX_ENTER(&rx_stats_mutex);
1977 s = (afs_int32 *) & rx_stats;
1978 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1979 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1982 ap->length = sizeof(rx_stats);
1983 MUTEX_EXIT(&rx_stats_mutex);
1984 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1990 /* error response packet */
1991 tin.type = htonl(RX_DEBUGI_BADTYPE);
1992 tin.index = tin.type;
1993 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1995 ap->length = sizeof(struct rx_debugIn);
1996 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2004 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2005 afs_int32 ahost, short aport, int istack)
2010 * Only respond to client-initiated version requests, and
2011 * clear that flag in the response.
2013 if (ap->header.flags & RX_CLIENT_INITIATED) {
2016 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2017 rxi_EncodePacketHeader(ap);
2018 memset(buf, 0, sizeof(buf));
2019 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2020 rx_packetwrite(ap, 0, 65, buf);
2023 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2031 /* send a debug packet back to the sender */
2033 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2034 afs_int32 ahost, short aport, afs_int32 istack)
2036 struct sockaddr_in taddr;
2042 int waslocked = ISAFS_GLOCK();
2045 taddr.sin_family = AF_INET;
2046 taddr.sin_port = aport;
2047 taddr.sin_addr.s_addr = ahost;
2048 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2049 taddr.sin_len = sizeof(struct sockaddr_in);
2052 /* We need to trim the niovecs. */
2053 nbytes = apacket->length;
2054 for (i = 1; i < apacket->niovecs; i++) {
2055 if (nbytes <= apacket->wirevec[i].iov_len) {
2056 savelen = apacket->wirevec[i].iov_len;
2057 saven = apacket->niovecs;
2058 apacket->wirevec[i].iov_len = nbytes;
2059 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2061 nbytes -= apacket->wirevec[i].iov_len;
2064 #ifdef RX_KERNEL_TRACE
2065 if (ICL_SETACTIVE(afs_iclSetp)) {
2068 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2069 "before osi_NetSend()");
2077 /* debug packets are not reliably delivered, hence the cast below. */
2078 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2079 apacket->length + RX_HEADER_SIZE, istack);
2081 #ifdef RX_KERNEL_TRACE
2082 if (ICL_SETACTIVE(afs_iclSetp)) {
2084 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2085 "after osi_NetSend()");
2094 if (saven) { /* means we truncated the packet above. */
2095 apacket->wirevec[i - 1].iov_len = savelen;
2096 apacket->niovecs = saven;
2101 /* Send the packet to appropriate destination for the specified
2102 * call. The header is first encoded and placed in the packet.
2105 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2106 struct rx_packet *p, int istack)
2112 struct sockaddr_in addr;
2113 register struct rx_peer *peer = conn->peer;
2116 char deliveryType = 'S';
2118 /* The address we're sending the packet to */
2119 memset(&addr, 0, sizeof(addr));
2120 addr.sin_family = AF_INET;
2121 addr.sin_port = peer->port;
2122 addr.sin_addr.s_addr = peer->host;
2124 /* This stuff should be revamped, I think, so that most, if not
2125 * all, of the header stuff is always added here. We could
2126 * probably do away with the encode/decode routines. XXXXX */
2128 /* Stamp each packet with a unique serial number. The serial
2129 * number is maintained on a connection basis because some types
2130 * of security may be based on the serial number of the packet,
2131 * and security is handled on a per authenticated-connection
2133 /* Pre-increment, to guarantee no zero serial number; a zero
2134 * serial number means the packet was never sent. */
2135 MUTEX_ENTER(&conn->conn_data_lock);
2136 p->header.serial = ++conn->serial;
2137 MUTEX_EXIT(&conn->conn_data_lock);
2138 /* This is so we can adjust retransmit time-outs better in the face of
2139 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2141 if (p->firstSerial == 0) {
2142 p->firstSerial = p->header.serial;
2145 /* If an output tracer function is defined, call it with the packet and
2146 * network address. Note this function may modify its arguments. */
2147 if (rx_almostSent) {
2148 int drop = (*rx_almostSent) (p, &addr);
2149 /* drop packet if return value is non-zero? */
2151 deliveryType = 'D'; /* Drop the packet */
2155 /* Get network byte order header */
2156 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2157 * touch ALL the fields */
2159 /* Send the packet out on the same socket that related packets are being
2163 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2166 /* Possibly drop this packet, for testing purposes */
2167 if ((deliveryType == 'D')
2168 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2169 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2170 deliveryType = 'D'; /* Drop the packet */
2172 deliveryType = 'S'; /* Send the packet */
2173 #endif /* RXDEBUG */
2175 /* Loop until the packet is sent. We'd prefer just to use a
2176 * blocking socket, but unfortunately the interface doesn't
2177 * allow us to have the socket block in send mode, and not
2178 * block in receive mode */
2180 waslocked = ISAFS_GLOCK();
2181 #ifdef RX_KERNEL_TRACE
2182 if (ICL_SETACTIVE(afs_iclSetp)) {
2185 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2186 "before osi_NetSend()");
2195 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2196 p->length + RX_HEADER_SIZE, istack)) != 0) {
2197 /* send failed, so let's hurry up the resend, eh? */
2198 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2199 p->retryTime = p->timeSent; /* resend it very soon */
2200 clock_Addmsec(&(p->retryTime),
2201 10 + (((afs_uint32) p->backoff) << 8));
2202 /* Some systems are nice and tell us right away that we cannot
2203 * reach this recipient by returning an error code.
2204 * So, when this happens let's "down" the host NOW so
2205 * we don't sit around waiting for this host to timeout later.
2209 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2210 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2211 code == -ENETUNREACH
2212 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2213 code == EHOSTUNREACH
2218 call->lastReceiveTime = 0;
2221 #ifdef RX_KERNEL_TRACE
2222 if (ICL_SETACTIVE(afs_iclSetp)) {
2224 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2225 "after osi_NetSend()");
2236 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2238 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2239 MUTEX_ENTER(&peer->peer_lock);
2240 hadd32(peer->bytesSent, p->length);
2241 MUTEX_EXIT(&peer->peer_lock);
2244 /* Send a list of packets to appropriate destination for the specified
2245 * connection. The headers are first encoded and placed in the packets.
2248 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2249 struct rx_packet **list, int len, int istack)
2251 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2254 struct sockaddr_in addr;
2255 register struct rx_peer *peer = conn->peer;
2257 struct rx_packet *p = NULL;
2258 struct iovec wirevec[RX_MAXIOVECS];
2259 int i, length, code;
2262 struct rx_jumboHeader *jp;
2264 char deliveryType = 'S';
2266 /* The address we're sending the packet to */
2267 addr.sin_family = AF_INET;
2268 addr.sin_port = peer->port;
2269 addr.sin_addr.s_addr = peer->host;
2271 if (len + 1 > RX_MAXIOVECS) {
2272 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2276 * Stamp the packets in this jumbogram with consecutive serial numbers
2278 MUTEX_ENTER(&conn->conn_data_lock);
2279 serial = conn->serial;
2280 conn->serial += len;
2281 MUTEX_EXIT(&conn->conn_data_lock);
2284 /* This stuff should be revamped, I think, so that most, if not
2285 * all, of the header stuff is always added here. We could
2286 * probably do away with the encode/decode routines. XXXXX */
2289 length = RX_HEADER_SIZE;
2290 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2291 wirevec[0].iov_len = RX_HEADER_SIZE;
2292 for (i = 0; i < len; i++) {
2295 /* The whole 3.5 jumbogram scheme relies on packets fitting
2296 * in a single packet buffer. */
2297 if (p->niovecs > 2) {
2298 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2301 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2304 if (p->length != RX_JUMBOBUFFERSIZE) {
2305 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2307 p->header.flags |= RX_JUMBO_PACKET;
2308 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2309 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2311 wirevec[i + 1].iov_len = p->length;
2312 length += p->length;
2314 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2316 /* Convert jumbo packet header to network byte order */
2317 temp = (afs_uint32) (p->header.flags) << 24;
2318 temp |= (afs_uint32) (p->header.spare);
2319 *(afs_uint32 *) jp = htonl(temp);
2321 jp = (struct rx_jumboHeader *)
2322 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2324 /* Stamp each packet with a unique serial number. The serial
2325 * number is maintained on a connection basis because some types
2326 * of security may be based on the serial number of the packet,
2327 * and security is handled on a per authenticated-connection
2329 /* Pre-increment, to guarantee no zero serial number; a zero
2330 * serial number means the packet was never sent. */
2331 p->header.serial = ++serial;
2332 /* This is so we can adjust retransmit time-outs better in the face of
2333 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2335 if (p->firstSerial == 0) {
2336 p->firstSerial = p->header.serial;
2339 /* If an output tracer function is defined, call it with the packet and
2340 * network address. Note this function may modify its arguments. */
2341 if (rx_almostSent) {
2342 int drop = (*rx_almostSent) (p, &addr);
2343 /* drop packet if return value is non-zero? */
2345 deliveryType = 'D'; /* Drop the packet */
2349 /* Get network byte order header */
2350 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2351 * touch ALL the fields */
2354 /* Send the packet out on the same socket that related packets are being
2358 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2361 /* Possibly drop this packet, for testing purposes */
2362 if ((deliveryType == 'D')
2363 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2364 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2365 deliveryType = 'D'; /* Drop the packet */
2367 deliveryType = 'S'; /* Send the packet */
2368 #endif /* RXDEBUG */
2370 /* Loop until the packet is sent. We'd prefer just to use a
2371 * blocking socket, but unfortunately the interface doesn't
2372 * allow us to have the socket block in send mode, and not
2373 * block in receive mode */
2374 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2375 waslocked = ISAFS_GLOCK();
2376 if (!istack && waslocked)
2380 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2382 /* send failed, so let's hurry up the resend, eh? */
2383 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2384 for (i = 0; i < len; i++) {
2386 p->retryTime = p->timeSent; /* resend it very soon */
2387 clock_Addmsec(&(p->retryTime),
2388 10 + (((afs_uint32) p->backoff) << 8));
2390 /* Some systems are nice and tell us right away that we cannot
2391 * reach this recipient by returning an error code.
2392 * So, when this happens let's "down" the host NOW so
2393 * we don't sit around waiting for this host to timeout later.
2397 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2398 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2399 code == -ENETUNREACH
2400 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2401 code == EHOSTUNREACH
2406 call->lastReceiveTime = 0;
2408 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2409 if (!istack && waslocked)
2417 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2420 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2421 MUTEX_ENTER(&peer->peer_lock);
2422 hadd32(peer->bytesSent, p->length);
2423 MUTEX_EXIT(&peer->peer_lock);
2427 /* Send a "special" packet to the peer connection. If call is
2428 * specified, then the packet is directed to a specific call channel
2429 * associated with the connection, otherwise it is directed to the
2430 * connection only. Uses optionalPacket if it is supplied, rather than
2431 * allocating a new packet buffer. Nbytes is the length of the data
2432 * portion of the packet. If data is non-null, nbytes of data are
2433 * copied into the packet. Type is the type of the packet, as defined
2434 * in rx.h. Bug: there's a lot of duplication between this and other
2435 * routines. This needs to be cleaned up. */
2437 rxi_SendSpecial(register struct rx_call *call,
2438 register struct rx_connection *conn,
2439 struct rx_packet *optionalPacket, int type, char *data,
2440 int nbytes, int istack)
2442 /* Some of the following stuff should be common code for all
2443 * packet sends (it's repeated elsewhere) */
2444 register struct rx_packet *p;
2446 int savelen = 0, saven = 0;
2447 int channel, callNumber;
2449 channel = call->channel;
2450 callNumber = *call->callNumber;
2451 /* BUSY packets refer to the next call on this connection */
2452 if (type == RX_PACKET_TYPE_BUSY) {
2461 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2463 osi_Panic("rxi_SendSpecial failure");
2470 p->header.serviceId = conn->serviceId;
2471 p->header.securityIndex = conn->securityIndex;
2472 p->header.cid = (conn->cid | channel);
2473 p->header.callNumber = callNumber;
2475 p->header.epoch = conn->epoch;
2476 p->header.type = type;
2477 p->header.flags = 0;
2478 if (conn->type == RX_CLIENT_CONNECTION)
2479 p->header.flags |= RX_CLIENT_INITIATED;
2481 rx_packetwrite(p, 0, nbytes, data);
2483 for (i = 1; i < p->niovecs; i++) {
2484 if (nbytes <= p->wirevec[i].iov_len) {
2485 savelen = p->wirevec[i].iov_len;
2487 p->wirevec[i].iov_len = nbytes;
2488 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2490 nbytes -= p->wirevec[i].iov_len;
2494 rxi_Send(call, p, istack);
2496 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2497 if (saven) { /* means we truncated the packet above. We probably don't */
2498 /* really need to do this, but it seems safer this way, given that */
2499 /* sneaky optionalPacket... */
2500 p->wirevec[i - 1].iov_len = savelen;
2503 if (!optionalPacket)
2505 return optionalPacket;
2509 /* Encode the packet's header (from the struct header in the packet to
2510 * the net byte order representation in the wire representation of the
2511 * packet, which is what is actually sent out on the wire) */
2513 rxi_EncodePacketHeader(register struct rx_packet *p)
2515 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2517 memset((char *)buf, 0, RX_HEADER_SIZE);
2518 *buf++ = htonl(p->header.epoch);
2519 *buf++ = htonl(p->header.cid);
2520 *buf++ = htonl(p->header.callNumber);
2521 *buf++ = htonl(p->header.seq);
2522 *buf++ = htonl(p->header.serial);
2523 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2524 | (((afs_uint32) p->header.flags) << 16)
2525 | (p->header.userStatus << 8) | p->header.securityIndex);
2526 /* Note: top 16 bits of this next word were reserved */
2527 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2530 /* Decode the packet's header (from net byte order to a struct header) */
2532 rxi_DecodePacketHeader(register struct rx_packet *p)
2534 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2537 p->header.epoch = ntohl(*buf);
2539 p->header.cid = ntohl(*buf);
2541 p->header.callNumber = ntohl(*buf);
2543 p->header.seq = ntohl(*buf);
2545 p->header.serial = ntohl(*buf);
2551 /* C will truncate byte fields to bytes for me */
2552 p->header.type = temp >> 24;
2553 p->header.flags = temp >> 16;
2554 p->header.userStatus = temp >> 8;
2555 p->header.securityIndex = temp >> 0;
2560 p->header.serviceId = (temp & 0xffff);
2561 p->header.spare = temp >> 16;
2562 /* Note: top 16 bits of this last word are the security checksum */
2566 rxi_PrepareSendPacket(register struct rx_call *call,
2567 register struct rx_packet *p, register int last)
2569 register struct rx_connection *conn = call->conn;
2571 ssize_t len; /* len must be a signed type; it can go negative */
2573 p->flags &= ~RX_PKTFLAG_ACKED;
2574 p->header.cid = (conn->cid | call->channel);
2575 p->header.serviceId = conn->serviceId;
2576 p->header.securityIndex = conn->securityIndex;
2578 /* No data packets on call 0. Where do these come from? */
2579 if (*call->callNumber == 0)
2580 *call->callNumber = 1;
2582 p->header.callNumber = *call->callNumber;
2583 p->header.seq = call->tnext++;
2584 p->header.epoch = conn->epoch;
2585 p->header.type = RX_PACKET_TYPE_DATA;
2586 p->header.flags = 0;
2587 p->header.spare = 0;
2588 if (conn->type == RX_CLIENT_CONNECTION)
2589 p->header.flags |= RX_CLIENT_INITIATED;
2592 p->header.flags |= RX_LAST_PACKET;
2594 clock_Zero(&p->retryTime); /* Never yet transmitted */
2595 clock_Zero(&p->firstSent); /* Never yet transmitted */
2596 p->header.serial = 0; /* Another way of saying never transmitted... */
2599 /* Now that we're sure this is the last data on the call, make sure
2600 * that the "length" and the sum of the iov_lens matches. */
2601 len = p->length + call->conn->securityHeaderSize;
2603 for (i = 1; i < p->niovecs && len > 0; i++) {
2604 len -= p->wirevec[i].iov_len;
2607 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2608 } else if (i < p->niovecs) {
2609 /* Free any extra elements in the wirevec */
2610 #if defined(RX_ENABLE_TSFPQ)
2611 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2612 #else /* !RX_ENABLE_TSFPQ */
2613 MUTEX_ENTER(&rx_freePktQ_lock);
2614 rxi_FreeDataBufsNoLock(p, i);
2615 MUTEX_EXIT(&rx_freePktQ_lock);
2616 #endif /* !RX_ENABLE_TSFPQ */
2620 p->wirevec[i - 1].iov_len += len;
2621 RXS_PreparePacket(conn->securityObject, call, p);
2624 /* Given an interface MTU size, calculate an adjusted MTU size that
2625 * will make efficient use of the RX buffers when the peer is sending
2626 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2628 rxi_AdjustIfMTU(int mtu)
2633 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2635 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2636 if (mtu <= adjMTU) {
2643 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2644 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2647 /* Given an interface MTU size, and the peer's advertised max receive
2648 * size, calculate an adjisted maxMTU size that makes efficient use
2649 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2651 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2653 int maxMTU = mtu * rxi_nSendFrags;
2654 maxMTU = MIN(maxMTU, peerMaxMTU);
2655 return rxi_AdjustIfMTU(maxMTU);
2658 /* Given a packet size, figure out how many datagram packet will fit.
2659 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2660 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2661 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2663 rxi_AdjustDgramPackets(int frags, int mtu)
2666 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2669 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2670 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2671 /* subtract the size of the first and last packets */
2672 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2676 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));