2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
98 /* rxdb_fileID is used to identify the lock location, along with line#. */
99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
100 #endif /* RX_LOCKS_DB */
101 struct rx_packet *rx_mallocedP = 0;
103 extern char cml_version_number[];
104 extern int (*rx_almostSent) ();
106 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
108 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
109 afs_int32 ahost, short aport,
112 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
114 struct rx_queue * q);
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 /* some rules about packets:
120 * 1. When a packet is allocated, the final iov_buf contains room for
121 * a security trailer, but iov_len masks that fact. If the security
122 * package wants to add the trailer, it may do so, and then extend
123 * iov_len appropriately. For this reason, packet's niovecs and
124 * iov_len fields should be accurate before calling PreparePacket.
128 * all packet buffers (iov_base) are integral multiples of
130 * offset is an integral multiple of the word size.
133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the word size.
151 * offset is an integral multiple of the word size.
154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
160 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
161 (offset - l))) = data;
164 l += packet->wirevec[i].iov_len;
171 * all packet buffers (iov_base) are integral multiples of the
173 * offset is an integral multiple of the word size.
175 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
181 unsigned int i, j, l, r;
182 for (l = 0, i = 1; i < packet->niovecs; i++) {
183 if (l + packet->wirevec[i].iov_len > offset) {
186 l += packet->wirevec[i].iov_len;
189 /* i is the iovec which contains the first little bit of data in which we
190 * are interested. l is the total length of everything prior to this iovec.
191 * j is the number of bytes we can safely copy out of this iovec.
192 * offset only applies to the first iovec.
195 while ((resid > 0) && (i < packet->niovecs)) {
196 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
197 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
200 l += packet->wirevec[i].iov_len;
205 return (resid ? (r - resid) : r);
210 * all packet buffers (iov_base) are integral multiples of the
212 * offset is an integral multiple of the word size.
215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
220 for (l = 0, i = 1; i < packet->niovecs; i++) {
221 if (l + packet->wirevec[i].iov_len > offset) {
224 l += packet->wirevec[i].iov_len;
227 /* i is the iovec which contains the first little bit of data in which we
228 * are interested. l is the total length of everything prior to this iovec.
229 * j is the number of bytes we can safely copy out of this iovec.
230 * offset only applies to the first iovec.
233 while ((resid > 0) && (i < RX_MAXWVECS)) {
234 if (i >= packet->niovecs)
235 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
238 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
239 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
243 l += packet->wirevec[i].iov_len;
248 return (resid ? (r - resid) : r);
252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
254 register struct rx_packet *p, *np;
256 num_pkts = AllocPacketBufs(class, num_pkts, q);
258 for (queue_Scan(q, p, np, rx_packet)) {
259 RX_PACKET_IOV_FULLINIT(p);
265 #ifdef RX_ENABLE_TSFPQ
267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
269 register struct rx_packet *c;
270 register struct rx_ts_info_t * rx_ts_info;
274 RX_TS_INFO_GET(rx_ts_info);
276 transfer = num_pkts - rx_ts_info->_FPQ.len;
279 MUTEX_ENTER(&rx_freePktQ_lock);
281 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
282 transfer += rx_TSFPQGlobSize;
283 } else if (transfer <= rx_nFreePackets) {
284 transfer = rx_nFreePackets;
286 /* alloc enough for us, plus a few globs for other threads */
287 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
288 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
289 transfer += rx_TSFPQGlobSize;
292 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
294 MUTEX_EXIT(&rx_freePktQ_lock);
298 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
302 #else /* RX_ENABLE_TSFPQ */
304 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
312 MUTEX_ENTER(&rx_freePktQ_lock);
315 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
316 num_pkts--, overq++);
319 rxi_NeedMorePackets = TRUE;
320 MUTEX_ENTER(&rx_stats_mutex);
322 case RX_PACKET_CLASS_RECEIVE:
323 rx_stats.receivePktAllocFailures++;
325 case RX_PACKET_CLASS_SEND:
326 rx_stats.sendPktAllocFailures++;
328 case RX_PACKET_CLASS_SPECIAL:
329 rx_stats.specialPktAllocFailures++;
331 case RX_PACKET_CLASS_RECV_CBUF:
332 rx_stats.receiveCbufPktAllocFailures++;
334 case RX_PACKET_CLASS_SEND_CBUF:
335 rx_stats.sendCbufPktAllocFailures++;
338 MUTEX_EXIT(&rx_stats_mutex);
341 if (rx_nFreePackets < num_pkts)
342 num_pkts = rx_nFreePackets;
345 rxi_NeedMorePackets = TRUE;
349 if (rx_nFreePackets < num_pkts) {
350 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
354 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
356 i++, c=queue_Next(c, rx_packet)) {
360 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
362 rx_nFreePackets -= num_pkts;
367 MUTEX_EXIT(&rx_freePktQ_lock);
372 #endif /* RX_ENABLE_TSFPQ */
375 * Free a packet currently used as a continuation buffer
377 #ifdef RX_ENABLE_TSFPQ
378 /* num_pkts=0 means queue length is unknown */
380 rxi_FreePackets(int num_pkts, struct rx_queue * q)
382 register struct rx_ts_info_t * rx_ts_info;
383 register struct rx_packet *c, *nc;
386 osi_Assert(num_pkts >= 0);
387 RX_TS_INFO_GET(rx_ts_info);
390 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
391 rxi_FreeDataBufsTSFPQ(c, 2, 0);
394 for (queue_Scan(q, c, nc, rx_packet)) {
395 rxi_FreeDataBufsTSFPQ(c, 2, 0);
400 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
403 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
405 MUTEX_ENTER(&rx_freePktQ_lock);
407 RX_TS_FPQ_LTOG(rx_ts_info);
409 /* Wakeup anyone waiting for packets */
412 MUTEX_EXIT(&rx_freePktQ_lock);
418 #else /* RX_ENABLE_TSFPQ */
419 /* num_pkts=0 means queue length is unknown */
421 rxi_FreePackets(int num_pkts, struct rx_queue *q)
424 register struct rx_packet *p, *np;
428 osi_Assert(num_pkts >= 0);
432 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
433 if (p->niovecs > 2) {
434 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
441 for (queue_Scan(q, p, np, rx_packet)) {
442 if (p->niovecs > 2) {
443 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
450 queue_SpliceAppend(q, &cbs);
456 MUTEX_ENTER(&rx_freePktQ_lock);
458 queue_SpliceAppend(&rx_freePacketQueue, q);
459 rx_nFreePackets += qlen;
461 /* Wakeup anyone waiting for packets */
464 MUTEX_EXIT(&rx_freePktQ_lock);
469 #endif /* RX_ENABLE_TSFPQ */
471 /* this one is kind of awful.
472 * In rxkad, the packet has been all shortened, and everything, ready for
473 * sending. All of a sudden, we discover we need some of that space back.
474 * This isn't terribly general, because it knows that the packets are only
475 * rounded up to the EBS (userdata + security header).
478 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
482 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
483 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
484 p->wirevec[i].iov_len += nb;
488 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
489 p->wirevec[i].iov_len += nb;
497 /* get sufficient space to store nb bytes of data (or more), and hook
498 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
499 * returns the number of bytes >0 which it failed to come up with.
500 * Don't need to worry about locking on packet, since only
501 * one thread can manipulate one at a time. Locking on continution
502 * packets is handled by AllocPacketBufs */
503 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
505 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
509 register struct rx_packet *cb, *ncb;
511 /* compute the number of cbuf's we need */
512 nv = nb / RX_CBUFFERSIZE;
513 if ((nv * RX_CBUFFERSIZE) < nb)
515 if ((nv + p->niovecs) > RX_MAXWVECS)
516 nv = RX_MAXWVECS - p->niovecs;
520 /* allocate buffers */
522 nv = AllocPacketBufs(class, nv, &q);
524 /* setup packet iovs */
525 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
527 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
528 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
531 nb -= (nv * RX_CBUFFERSIZE);
532 p->length += (nv * RX_CBUFFERSIZE);
538 /* Add more packet buffers */
539 #ifdef RX_ENABLE_TSFPQ
541 rxi_MorePackets(int apackets)
543 struct rx_packet *p, *e;
544 register struct rx_ts_info_t * rx_ts_info;
548 getme = apackets * sizeof(struct rx_packet);
549 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
551 PIN(p, getme); /* XXXXX */
552 memset((char *)p, 0, getme);
553 RX_TS_INFO_GET(rx_ts_info);
555 for (e = p + apackets; p < e; p++) {
556 RX_PACKET_IOV_INIT(p);
559 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
561 rx_ts_info->_FPQ.delta += apackets;
563 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
565 MUTEX_ENTER(&rx_freePktQ_lock);
567 RX_TS_FPQ_LTOG(rx_ts_info);
568 rxi_NeedMorePackets = FALSE;
571 MUTEX_EXIT(&rx_freePktQ_lock);
575 #else /* RX_ENABLE_TSFPQ */
577 rxi_MorePackets(int apackets)
579 struct rx_packet *p, *e;
583 getme = apackets * sizeof(struct rx_packet);
584 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
586 PIN(p, getme); /* XXXXX */
587 memset((char *)p, 0, getme);
589 MUTEX_ENTER(&rx_freePktQ_lock);
591 for (e = p + apackets; p < e; p++) {
592 RX_PACKET_IOV_INIT(p);
593 p->flags |= RX_PKTFLAG_FREE;
596 queue_Append(&rx_freePacketQueue, p);
598 rx_nFreePackets += apackets;
599 rxi_NeedMorePackets = FALSE;
602 MUTEX_EXIT(&rx_freePktQ_lock);
605 #endif /* RX_ENABLE_TSFPQ */
607 #ifdef RX_ENABLE_TSFPQ
609 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
611 struct rx_packet *p, *e;
612 register struct rx_ts_info_t * rx_ts_info;
616 getme = apackets * sizeof(struct rx_packet);
617 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
619 PIN(p, getme); /* XXXXX */
620 memset((char *)p, 0, getme);
621 RX_TS_INFO_GET(rx_ts_info);
623 for (e = p + apackets; p < e; p++) {
624 RX_PACKET_IOV_INIT(p);
627 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
629 rx_ts_info->_FPQ.delta += apackets;
632 (num_keep_local < apackets)) {
634 MUTEX_ENTER(&rx_freePktQ_lock);
636 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
637 rxi_NeedMorePackets = FALSE;
640 MUTEX_EXIT(&rx_freePktQ_lock);
644 #endif /* RX_ENABLE_TSFPQ */
647 /* Add more packet buffers */
649 rxi_MorePacketsNoLock(int apackets)
651 struct rx_packet *p, *e;
654 /* allocate enough packets that 1/4 of the packets will be able
655 * to hold maximal amounts of data */
656 apackets += (apackets / 4)
657 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
658 getme = apackets * sizeof(struct rx_packet);
659 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
661 memset((char *)p, 0, getme);
663 for (e = p + apackets; p < e; p++) {
664 RX_PACKET_IOV_INIT(p);
665 p->flags |= RX_PKTFLAG_FREE;
668 queue_Append(&rx_freePacketQueue, p);
671 rx_nFreePackets += apackets;
672 #ifdef RX_ENABLE_TSFPQ
673 /* TSFPQ patch also needs to keep track of total packets */
674 MUTEX_ENTER(&rx_stats_mutex);
675 rx_nPackets += apackets;
676 RX_TS_FPQ_COMPUTE_LIMITS;
677 MUTEX_EXIT(&rx_stats_mutex);
678 #endif /* RX_ENABLE_TSFPQ */
679 rxi_NeedMorePackets = FALSE;
685 rxi_FreeAllPackets(void)
687 /* must be called at proper interrupt level, etcetera */
688 /* MTUXXX need to free all Packets */
689 osi_Free(rx_mallocedP,
690 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
691 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
694 #ifdef RX_ENABLE_TSFPQ
696 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
698 register struct rx_ts_info_t * rx_ts_info;
702 RX_TS_INFO_GET(rx_ts_info);
704 if (num_keep_local != rx_ts_info->_FPQ.len) {
706 MUTEX_ENTER(&rx_freePktQ_lock);
707 if (num_keep_local < rx_ts_info->_FPQ.len) {
708 xfer = rx_ts_info->_FPQ.len - num_keep_local;
709 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
712 xfer = num_keep_local - rx_ts_info->_FPQ.len;
713 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
714 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
715 if (rx_nFreePackets < xfer) {
716 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
718 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
720 MUTEX_EXIT(&rx_freePktQ_lock);
726 rxi_FlushLocalPacketsTSFPQ(void)
728 rxi_AdjustLocalPacketsTSFPQ(0, 0);
730 #endif /* RX_ENABLE_TSFPQ */
732 /* Allocate more packets iff we need more continuation buffers */
733 /* In kernel, can't page in memory with interrupts disabled, so we
734 * don't use the event mechanism. */
736 rx_CheckPackets(void)
738 if (rxi_NeedMorePackets) {
739 rxi_MorePackets(rx_initSendWindow);
743 /* In the packet freeing routine below, the assumption is that
744 we want all of the packets to be used equally frequently, so that we
745 don't get packet buffers paging out. It would be just as valid to
746 assume that we DO want them to page out if not many are being used.
747 In any event, we assume the former, and append the packets to the end
749 /* This explanation is bogus. The free list doesn't remain in any kind of
750 useful order for afs_int32: the packets in use get pretty much randomly scattered
751 across all the pages. In order to permit unused {packets,bufs} to page out, they
752 must be stored so that packets which are adjacent in memory are adjacent in the
753 free list. An array springs rapidly to mind.
756 /* Actually free the packet p. */
757 #ifdef RX_ENABLE_TSFPQ
759 rxi_FreePacketNoLock(struct rx_packet *p)
761 register struct rx_ts_info_t * rx_ts_info;
762 dpf(("Free %lx\n", (unsigned long)p));
764 RX_TS_INFO_GET(rx_ts_info);
765 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
766 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
767 RX_TS_FPQ_LTOG(rx_ts_info);
770 #else /* RX_ENABLE_TSFPQ */
772 rxi_FreePacketNoLock(struct rx_packet *p)
774 dpf(("Free %lx\n", (unsigned long)p));
778 queue_Append(&rx_freePacketQueue, p);
780 #endif /* RX_ENABLE_TSFPQ */
782 #ifdef RX_ENABLE_TSFPQ
784 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
786 register struct rx_ts_info_t * rx_ts_info;
787 dpf(("Free %lx\n", (unsigned long)p));
789 RX_TS_INFO_GET(rx_ts_info);
790 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
792 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
794 MUTEX_ENTER(&rx_freePktQ_lock);
796 RX_TS_FPQ_LTOG(rx_ts_info);
798 /* Wakeup anyone waiting for packets */
801 MUTEX_EXIT(&rx_freePktQ_lock);
805 #endif /* RX_ENABLE_TSFPQ */
808 * free continuation buffers off a packet into a queue
810 * [IN] p -- packet from which continuation buffers will be freed
811 * [IN] first -- iovec offset of first continuation buffer to free
812 * [IN] q -- queue into which continuation buffers will be chained
815 * number of continuation buffers freed
818 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
821 struct rx_packet * cb;
824 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
825 iov = &p->wirevec[first];
827 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
828 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
829 RX_FPQ_MARK_FREE(cb);
839 * free packet continuation buffers into the global free packet pool
841 * [IN] p -- packet from which to free continuation buffers
842 * [IN] first -- iovec offset of first continuation buffer to free
848 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
852 for (first = MAX(2, first); first < p->niovecs; first++) {
853 iov = &p->wirevec[first];
855 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
856 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
864 #ifdef RX_ENABLE_TSFPQ
866 * free packet continuation buffers into the thread-local free pool
868 * [IN] p -- packet from which continuation buffers will be freed
869 * [IN] first -- iovec offset of first continuation buffer to free
870 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
871 * global free pool before returning
877 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
880 register struct rx_ts_info_t * rx_ts_info;
882 RX_TS_INFO_GET(rx_ts_info);
884 for (first = MAX(2, first); first < p->niovecs; first++) {
885 iov = &p->wirevec[first];
887 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
888 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
893 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
895 MUTEX_ENTER(&rx_freePktQ_lock);
897 RX_TS_FPQ_LTOG(rx_ts_info);
899 /* Wakeup anyone waiting for packets */
902 MUTEX_EXIT(&rx_freePktQ_lock);
907 #endif /* RX_ENABLE_TSFPQ */
909 int rxi_nBadIovecs = 0;
911 /* rxi_RestoreDataBufs
913 * Restore the correct sizes to the iovecs. Called when reusing a packet
914 * for reading off the wire.
917 rxi_RestoreDataBufs(struct rx_packet *p)
920 struct iovec *iov = &p->wirevec[2];
922 RX_PACKET_IOV_INIT(p);
924 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
925 if (!iov->iov_base) {
930 iov->iov_len = RX_CBUFFERSIZE;
934 #ifdef RX_ENABLE_TSFPQ
936 rxi_TrimDataBufs(struct rx_packet *p, int first)
939 struct iovec *iov, *end;
940 register struct rx_ts_info_t * rx_ts_info;
944 osi_Panic("TrimDataBufs 1: first must be 1");
946 /* Skip over continuation buffers containing message data */
947 iov = &p->wirevec[2];
948 end = iov + (p->niovecs - 2);
949 length = p->length - p->wirevec[1].iov_len;
950 for (; iov < end && length > 0; iov++) {
952 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
953 length -= iov->iov_len;
956 /* iov now points to the first empty data buffer. */
960 RX_TS_INFO_GET(rx_ts_info);
961 for (; iov < end; iov++) {
963 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
964 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
967 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
969 MUTEX_ENTER(&rx_freePktQ_lock);
971 RX_TS_FPQ_LTOG(rx_ts_info);
974 MUTEX_EXIT(&rx_freePktQ_lock);
980 #else /* RX_ENABLE_TSFPQ */
982 rxi_TrimDataBufs(struct rx_packet *p, int first)
985 struct iovec *iov, *end;
989 osi_Panic("TrimDataBufs 1: first must be 1");
991 /* Skip over continuation buffers containing message data */
992 iov = &p->wirevec[2];
993 end = iov + (p->niovecs - 2);
994 length = p->length - p->wirevec[1].iov_len;
995 for (; iov < end && length > 0; iov++) {
997 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
998 length -= iov->iov_len;
1001 /* iov now points to the first empty data buffer. */
1006 MUTEX_ENTER(&rx_freePktQ_lock);
1008 for (; iov < end; iov++) {
1010 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1011 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1014 rxi_PacketsUnWait();
1016 MUTEX_EXIT(&rx_freePktQ_lock);
1021 #endif /* RX_ENABLE_TSFPQ */
1023 /* Free the packet p. P is assumed not to be on any queue, i.e.
1024 * remove it yourself first if you call this routine. */
1025 #ifdef RX_ENABLE_TSFPQ
1027 rxi_FreePacket(struct rx_packet *p)
1029 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1030 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1032 #else /* RX_ENABLE_TSFPQ */
1034 rxi_FreePacket(struct rx_packet *p)
1039 MUTEX_ENTER(&rx_freePktQ_lock);
1041 rxi_FreeDataBufsNoLock(p, 2);
1042 rxi_FreePacketNoLock(p);
1043 /* Wakeup anyone waiting for packets */
1044 rxi_PacketsUnWait();
1046 MUTEX_EXIT(&rx_freePktQ_lock);
1049 #endif /* RX_ENABLE_TSFPQ */
1051 /* rxi_AllocPacket sets up p->length so it reflects the number of
1052 * bytes in the packet at this point, **not including** the header.
1053 * The header is absolutely necessary, besides, this is the way the
1054 * length field is usually used */
1055 #ifdef RX_ENABLE_TSFPQ
1057 rxi_AllocPacketNoLock(int class)
1059 register struct rx_packet *p;
1060 register struct rx_ts_info_t * rx_ts_info;
1062 RX_TS_INFO_GET(rx_ts_info);
1065 if (rxi_OverQuota(class)) {
1066 rxi_NeedMorePackets = TRUE;
1067 MUTEX_ENTER(&rx_stats_mutex);
1069 case RX_PACKET_CLASS_RECEIVE:
1070 rx_stats.receivePktAllocFailures++;
1072 case RX_PACKET_CLASS_SEND:
1073 rx_stats.sendPktAllocFailures++;
1075 case RX_PACKET_CLASS_SPECIAL:
1076 rx_stats.specialPktAllocFailures++;
1078 case RX_PACKET_CLASS_RECV_CBUF:
1079 rx_stats.receiveCbufPktAllocFailures++;
1081 case RX_PACKET_CLASS_SEND_CBUF:
1082 rx_stats.sendCbufPktAllocFailures++;
1085 MUTEX_EXIT(&rx_stats_mutex);
1086 return (struct rx_packet *)0;
1090 MUTEX_ENTER(&rx_stats_mutex);
1091 rx_stats.packetRequests++;
1092 MUTEX_EXIT(&rx_stats_mutex);
1094 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1097 if (queue_IsEmpty(&rx_freePacketQueue))
1098 osi_Panic("rxi_AllocPacket error");
1100 if (queue_IsEmpty(&rx_freePacketQueue))
1101 rxi_MorePacketsNoLock(rx_initSendWindow);
1105 RX_TS_FPQ_GTOL(rx_ts_info);
1108 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1110 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1113 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1114 * order to truncate outbound packets. In the near future, may need
1115 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1117 RX_PACKET_IOV_FULLINIT(p);
1120 #else /* RX_ENABLE_TSFPQ */
1122 rxi_AllocPacketNoLock(int class)
1124 register struct rx_packet *p;
1127 if (rxi_OverQuota(class)) {
1128 rxi_NeedMorePackets = TRUE;
1129 MUTEX_ENTER(&rx_stats_mutex);
1131 case RX_PACKET_CLASS_RECEIVE:
1132 rx_stats.receivePktAllocFailures++;
1134 case RX_PACKET_CLASS_SEND:
1135 rx_stats.sendPktAllocFailures++;
1137 case RX_PACKET_CLASS_SPECIAL:
1138 rx_stats.specialPktAllocFailures++;
1140 case RX_PACKET_CLASS_RECV_CBUF:
1141 rx_stats.receiveCbufPktAllocFailures++;
1143 case RX_PACKET_CLASS_SEND_CBUF:
1144 rx_stats.sendCbufPktAllocFailures++;
1147 MUTEX_EXIT(&rx_stats_mutex);
1148 return (struct rx_packet *)0;
1152 MUTEX_ENTER(&rx_stats_mutex);
1153 rx_stats.packetRequests++;
1154 MUTEX_EXIT(&rx_stats_mutex);
1157 if (queue_IsEmpty(&rx_freePacketQueue))
1158 osi_Panic("rxi_AllocPacket error");
1160 if (queue_IsEmpty(&rx_freePacketQueue))
1161 rxi_MorePacketsNoLock(rx_initSendWindow);
1165 p = queue_First(&rx_freePacketQueue, rx_packet);
1167 RX_FPQ_MARK_USED(p);
1169 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1172 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1173 * order to truncate outbound packets. In the near future, may need
1174 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1176 RX_PACKET_IOV_FULLINIT(p);
1179 #endif /* RX_ENABLE_TSFPQ */
1181 #ifdef RX_ENABLE_TSFPQ
1183 rxi_AllocPacketTSFPQ(int class, int pull_global)
1185 register struct rx_packet *p;
1186 register struct rx_ts_info_t * rx_ts_info;
1188 RX_TS_INFO_GET(rx_ts_info);
1190 MUTEX_ENTER(&rx_stats_mutex);
1191 rx_stats.packetRequests++;
1192 MUTEX_EXIT(&rx_stats_mutex);
1194 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1195 MUTEX_ENTER(&rx_freePktQ_lock);
1197 if (queue_IsEmpty(&rx_freePacketQueue))
1198 rxi_MorePacketsNoLock(rx_initSendWindow);
1200 RX_TS_FPQ_GTOL(rx_ts_info);
1202 MUTEX_EXIT(&rx_freePktQ_lock);
1203 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1207 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1209 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1211 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1212 * order to truncate outbound packets. In the near future, may need
1213 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1215 RX_PACKET_IOV_FULLINIT(p);
1218 #endif /* RX_ENABLE_TSFPQ */
1220 #ifdef RX_ENABLE_TSFPQ
1222 rxi_AllocPacket(int class)
1224 register struct rx_packet *p;
1226 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1229 #else /* RX_ENABLE_TSFPQ */
1231 rxi_AllocPacket(int class)
1233 register struct rx_packet *p;
1235 MUTEX_ENTER(&rx_freePktQ_lock);
1236 p = rxi_AllocPacketNoLock(class);
1237 MUTEX_EXIT(&rx_freePktQ_lock);
1240 #endif /* RX_ENABLE_TSFPQ */
1242 /* This guy comes up with as many buffers as it {takes,can get} given
1243 * the MTU for this call. It also sets the packet length before
1244 * returning. caution: this is often called at NETPRI
1245 * Called with call locked.
1248 rxi_AllocSendPacket(register struct rx_call *call, int want)
1250 register struct rx_packet *p = (struct rx_packet *)0;
1252 register unsigned delta;
1255 mud = call->MTU - RX_HEADER_SIZE;
1257 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1258 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1260 #ifdef RX_ENABLE_TSFPQ
1261 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1263 want = MIN(want, mud);
1265 if ((unsigned)want > p->length)
1266 (void)rxi_AllocDataBuf(p, (want - p->length),
1267 RX_PACKET_CLASS_SEND_CBUF);
1269 if ((unsigned)p->length > mud)
1272 if (delta >= p->length) {
1280 #endif /* RX_ENABLE_TSFPQ */
1282 while (!(call->error)) {
1283 MUTEX_ENTER(&rx_freePktQ_lock);
1284 /* if an error occurred, or we get the packet we want, we're done */
1285 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1286 MUTEX_EXIT(&rx_freePktQ_lock);
1289 want = MIN(want, mud);
1291 if ((unsigned)want > p->length)
1292 (void)rxi_AllocDataBuf(p, (want - p->length),
1293 RX_PACKET_CLASS_SEND_CBUF);
1295 if ((unsigned)p->length > mud)
1298 if (delta >= p->length) {
1307 /* no error occurred, and we didn't get a packet, so we sleep.
1308 * At this point, we assume that packets will be returned
1309 * sooner or later, as packets are acknowledged, and so we
1312 call->flags |= RX_CALL_WAIT_PACKETS;
1313 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1314 MUTEX_EXIT(&call->lock);
1315 rx_waitingForPackets = 1;
1317 #ifdef RX_ENABLE_LOCKS
1318 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1320 osi_rxSleep(&rx_waitingForPackets);
1322 MUTEX_EXIT(&rx_freePktQ_lock);
1323 MUTEX_ENTER(&call->lock);
1324 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1325 call->flags &= ~RX_CALL_WAIT_PACKETS;
1334 /* Windows does not use file descriptors. */
1335 #define CountFDs(amax) 0
1337 /* count the number of used FDs */
1339 CountFDs(register int amax)
1342 register int i, code;
1346 for (i = 0; i < amax; i++) {
1347 code = fstat(i, &tstat);
1353 #endif /* AFS_NT40_ENV */
1356 #define CountFDs(amax) amax
1360 #if !defined(KERNEL) || defined(UKERNEL)
1362 /* This function reads a single packet from the interface into the
1363 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1364 * (host,port) of the sender are stored in the supplied variables, and
1365 * the data length of the packet is stored in the packet structure.
1366 * The header is decoded. */
1368 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1371 struct sockaddr_in from;
1374 register afs_int32 tlen, savelen;
1376 rx_computelen(p, tlen);
1377 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1379 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1380 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1381 * it once in order to avoid races. */
1384 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1392 /* Extend the last iovec for padding, it's just to make sure that the
1393 * read doesn't return more data than we expect, and is done to get around
1394 * our problems caused by the lack of a length field in the rx header.
1395 * Use the extra buffer that follows the localdata in each packet
1397 savelen = p->wirevec[p->niovecs - 1].iov_len;
1398 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1400 memset((char *)&msg, 0, sizeof(msg));
1401 msg.msg_name = (char *)&from;
1402 msg.msg_namelen = sizeof(struct sockaddr_in);
1403 msg.msg_iov = p->wirevec;
1404 msg.msg_iovlen = p->niovecs;
1405 nbytes = rxi_Recvmsg(socket, &msg, 0);
1407 /* restore the vec to its correct state */
1408 p->wirevec[p->niovecs - 1].iov_len = savelen;
1410 p->length = (nbytes - RX_HEADER_SIZE);
1411 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1412 if (nbytes < 0 && errno == EWOULDBLOCK) {
1413 MUTEX_ENTER(&rx_stats_mutex);
1414 rx_stats.noPacketOnRead++;
1415 MUTEX_EXIT(&rx_stats_mutex);
1416 } else if (nbytes <= 0) {
1417 MUTEX_ENTER(&rx_stats_mutex);
1418 rx_stats.bogusPacketOnRead++;
1419 rx_stats.bogusHost = from.sin_addr.s_addr;
1420 MUTEX_EXIT(&rx_stats_mutex);
1421 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1422 ntohs(from.sin_port), nbytes));
1427 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1428 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1429 rxi_DecodePacketHeader(p);
1431 *host = from.sin_addr.s_addr;
1432 *port = from.sin_port;
1434 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1435 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1436 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1438 rxi_TrimDataBufs(p, 1);
1443 /* Extract packet header. */
1444 rxi_DecodePacketHeader(p);
1446 *host = from.sin_addr.s_addr;
1447 *port = from.sin_port;
1448 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1449 struct rx_peer *peer;
1450 MUTEX_ENTER(&rx_stats_mutex);
1451 rx_stats.packetsRead[p->header.type - 1]++;
1452 MUTEX_EXIT(&rx_stats_mutex);
1454 * Try to look up this peer structure. If it doesn't exist,
1455 * don't create a new one -
1456 * we don't keep count of the bytes sent/received if a peer
1457 * structure doesn't already exist.
1459 * The peer/connection cleanup code assumes that there is 1 peer
1460 * per connection. If we actually created a peer structure here
1461 * and this packet was an rxdebug packet, the peer structure would
1462 * never be cleaned up.
1464 peer = rxi_FindPeer(*host, *port, 0, 0);
1465 /* Since this may not be associated with a connection,
1466 * it may have no refCount, meaning we could race with
1469 if (peer && (peer->refCount > 0)) {
1470 MUTEX_ENTER(&peer->peer_lock);
1471 hadd32(peer->bytesReceived, p->length);
1472 MUTEX_EXIT(&peer->peer_lock);
1476 /* Free any empty packet buffers at the end of this packet */
1477 rxi_TrimDataBufs(p, 1);
1483 #endif /* !KERNEL || UKERNEL */
1485 /* This function splits off the first packet in a jumbo packet.
1486 * As of AFS 3.5, jumbograms contain more than one fixed size
1487 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1488 * last packet header. All packets (except the last) are padded to
1489 * fall on RX_CBUFFERSIZE boundaries.
1490 * HACK: We store the length of the first n-1 packets in the
1491 * last two pad bytes. */
1494 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1497 struct rx_packet *np;
1498 struct rx_jumboHeader *jp;
1504 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1505 * bytes in length. All but the first packet are preceded by
1506 * an abbreviated four byte header. The length of the last packet
1507 * is calculated from the size of the jumbogram. */
1508 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1510 if ((int)p->length < length) {
1511 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1514 niov = p->niovecs - 2;
1516 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1519 iov = &p->wirevec[2];
1520 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1522 /* Get a pointer to the abbreviated packet header */
1523 jp = (struct rx_jumboHeader *)
1524 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1526 /* Set up the iovecs for the next packet */
1527 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1528 np->wirevec[0].iov_len = sizeof(struct rx_header);
1529 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1530 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1531 np->niovecs = niov + 1;
1532 for (i = 2, iov++; i <= niov; i++, iov++) {
1533 np->wirevec[i] = *iov;
1535 np->length = p->length - length;
1536 p->length = RX_JUMBOBUFFERSIZE;
1539 /* Convert the jumbo packet header to host byte order */
1540 temp = ntohl(*(afs_uint32 *) jp);
1541 jp->flags = (u_char) (temp >> 24);
1542 jp->cksum = (u_short) (temp);
1544 /* Fill in the packet header */
1545 np->header = p->header;
1546 np->header.serial = p->header.serial + 1;
1547 np->header.seq = p->header.seq + 1;
1548 np->header.flags = jp->flags;
1549 np->header.spare = jp->cksum;
1555 /* Send a udp datagram */
1557 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1558 int length, int istack)
1563 memset(&msg, 0, sizeof(msg));
1565 msg.msg_iovlen = nvecs;
1566 msg.msg_name = addr;
1567 msg.msg_namelen = sizeof(struct sockaddr_in);
1569 ret = rxi_Sendmsg(socket, &msg, 0);
1573 #elif !defined(UKERNEL)
1575 * message receipt is done in rxk_input or rx_put.
1578 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1580 * Copy an mblock to the contiguous area pointed to by cp.
1581 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1582 * but it doesn't really.
1583 * Returns the number of bytes not transferred.
1584 * The message is NOT changed.
1587 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1591 for (; mp && len > 0; mp = mp->b_cont) {
1592 if (mp->b_datap->db_type != M_DATA) {
1595 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1596 memcpy(cp, (char *)mp->b_rptr, n);
1604 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1605 * but it doesn't really.
1606 * This sucks, anyway, do it like m_cpy.... below
1609 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1612 register int m, n, o, t, i;
1614 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1615 if (mp->b_datap->db_type != M_DATA) {
1618 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1624 t = iovs[i].iov_len;
1627 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1637 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1638 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1640 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1642 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1645 unsigned int l1, l2, i, t;
1647 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1648 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1651 if (m->m_len <= off) {
1661 p1 = mtod(m, caddr_t) + off;
1662 l1 = m->m_len - off;
1664 p2 = iovs[0].iov_base;
1665 l2 = iovs[0].iov_len;
1668 t = MIN(l1, MIN(l2, (unsigned int)len));
1679 p1 = mtod(m, caddr_t);
1685 p2 = iovs[i].iov_base;
1686 l2 = iovs[i].iov_len;
1694 #endif /* AFS_SUN5_ENV */
1696 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1698 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1699 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1705 struct rx_packet *phandle;
1706 int hdr_len, data_len;
1711 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1718 #endif /*KERNEL && !UKERNEL */
1721 /* send a response to a debug packet */
1724 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1725 afs_int32 ahost, short aport, int istack)
1727 struct rx_debugIn tin;
1729 struct rx_serverQueueEntry *np, *nqe;
1732 * Only respond to client-initiated Rx debug packets,
1733 * and clear the client flag in the response.
1735 if (ap->header.flags & RX_CLIENT_INITIATED) {
1736 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1737 rxi_EncodePacketHeader(ap);
1742 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1743 /* all done with packet, now set length to the truth, so we can
1744 * reuse this packet */
1745 rx_computelen(ap, ap->length);
1747 tin.type = ntohl(tin.type);
1748 tin.index = ntohl(tin.index);
1750 case RX_DEBUGI_GETSTATS:{
1751 struct rx_debugStats tstat;
1753 /* get basic stats */
1754 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1755 tstat.version = RX_DEBUGI_VERSION;
1756 #ifndef RX_ENABLE_LOCKS
1757 tstat.waitingForPackets = rx_waitingForPackets;
1759 MUTEX_ENTER(&rx_serverPool_lock);
1760 tstat.nFreePackets = htonl(rx_nFreePackets);
1761 tstat.callsExecuted = htonl(rxi_nCalls);
1762 tstat.packetReclaims = htonl(rx_packetReclaims);
1763 tstat.usedFDs = CountFDs(64);
1764 tstat.nWaiting = htonl(rx_nWaiting);
1765 tstat.nWaited = htonl(rx_nWaited);
1766 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1768 MUTEX_EXIT(&rx_serverPool_lock);
1769 tstat.idleThreads = htonl(tstat.idleThreads);
1770 tl = sizeof(struct rx_debugStats) - ap->length;
1772 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1775 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1777 ap->length = sizeof(struct rx_debugStats);
1778 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1779 rx_computelen(ap, ap->length);
1784 case RX_DEBUGI_GETALLCONN:
1785 case RX_DEBUGI_GETCONN:{
1787 register struct rx_connection *tc;
1788 struct rx_call *tcall;
1789 struct rx_debugConn tconn;
1790 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1793 tl = sizeof(struct rx_debugConn) - ap->length;
1795 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1799 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1800 /* get N'th (maybe) "interesting" connection info */
1801 for (i = 0; i < rx_hashTableSize; i++) {
1802 #if !defined(KERNEL)
1803 /* the time complexity of the algorithm used here
1804 * exponentially increses with the number of connections.
1806 #ifdef AFS_PTHREAD_ENV
1812 MUTEX_ENTER(&rx_connHashTable_lock);
1813 /* We might be slightly out of step since we are not
1814 * locking each call, but this is only debugging output.
1816 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1817 if ((all || rxi_IsConnInteresting(tc))
1818 && tin.index-- <= 0) {
1819 tconn.host = tc->peer->host;
1820 tconn.port = tc->peer->port;
1821 tconn.cid = htonl(tc->cid);
1822 tconn.epoch = htonl(tc->epoch);
1823 tconn.serial = htonl(tc->serial);
1824 for (j = 0; j < RX_MAXCALLS; j++) {
1825 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1826 if ((tcall = tc->call[j])) {
1827 tconn.callState[j] = tcall->state;
1828 tconn.callMode[j] = tcall->mode;
1829 tconn.callFlags[j] = tcall->flags;
1830 if (queue_IsNotEmpty(&tcall->rq))
1831 tconn.callOther[j] |= RX_OTHER_IN;
1832 if (queue_IsNotEmpty(&tcall->tq))
1833 tconn.callOther[j] |= RX_OTHER_OUT;
1835 tconn.callState[j] = RX_STATE_NOTINIT;
1838 tconn.natMTU = htonl(tc->peer->natMTU);
1839 tconn.error = htonl(tc->error);
1840 tconn.flags = tc->flags;
1841 tconn.type = tc->type;
1842 tconn.securityIndex = tc->securityIndex;
1843 if (tc->securityObject) {
1844 RXS_GetStats(tc->securityObject, tc,
1846 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1847 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1850 DOHTONL(packetsReceived);
1851 DOHTONL(packetsSent);
1852 DOHTONL(bytesReceived);
1856 sizeof(tconn.secStats.spares) /
1861 sizeof(tconn.secStats.sparel) /
1862 sizeof(afs_int32); i++)
1866 MUTEX_EXIT(&rx_connHashTable_lock);
1867 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1870 ap->length = sizeof(struct rx_debugConn);
1871 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1877 MUTEX_EXIT(&rx_connHashTable_lock);
1879 /* if we make it here, there are no interesting packets */
1880 tconn.cid = htonl(0xffffffff); /* means end */
1881 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1884 ap->length = sizeof(struct rx_debugConn);
1885 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1891 * Pass back all the peer structures we have available
1894 case RX_DEBUGI_GETPEER:{
1896 register struct rx_peer *tp;
1897 struct rx_debugPeer tpeer;
1900 tl = sizeof(struct rx_debugPeer) - ap->length;
1902 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1906 memset((char *)&tpeer, 0, sizeof(tpeer));
1907 for (i = 0; i < rx_hashTableSize; i++) {
1908 #if !defined(KERNEL)
1909 /* the time complexity of the algorithm used here
1910 * exponentially increses with the number of peers.
1912 * Yielding after processing each hash table entry
1913 * and dropping rx_peerHashTable_lock.
1914 * also increases the risk that we will miss a new
1915 * entry - but we are willing to live with this
1916 * limitation since this is meant for debugging only
1918 #ifdef AFS_PTHREAD_ENV
1924 MUTEX_ENTER(&rx_peerHashTable_lock);
1925 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1926 if (tin.index-- <= 0) {
1927 tpeer.host = tp->host;
1928 tpeer.port = tp->port;
1929 tpeer.ifMTU = htons(tp->ifMTU);
1930 tpeer.idleWhen = htonl(tp->idleWhen);
1931 tpeer.refCount = htons(tp->refCount);
1932 tpeer.burstSize = tp->burstSize;
1933 tpeer.burst = tp->burst;
1934 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1935 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1936 tpeer.rtt = htonl(tp->rtt);
1937 tpeer.rtt_dev = htonl(tp->rtt_dev);
1938 tpeer.timeout.sec = htonl(tp->timeout.sec);
1939 tpeer.timeout.usec = htonl(tp->timeout.usec);
1940 tpeer.nSent = htonl(tp->nSent);
1941 tpeer.reSends = htonl(tp->reSends);
1942 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1943 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1944 tpeer.rateFlag = htonl(tp->rateFlag);
1945 tpeer.natMTU = htons(tp->natMTU);
1946 tpeer.maxMTU = htons(tp->maxMTU);
1947 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1948 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1949 tpeer.MTU = htons(tp->MTU);
1950 tpeer.cwind = htons(tp->cwind);
1951 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1952 tpeer.congestSeq = htons(tp->congestSeq);
1953 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1954 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1955 tpeer.bytesReceived.high =
1956 htonl(tp->bytesReceived.high);
1957 tpeer.bytesReceived.low =
1958 htonl(tp->bytesReceived.low);
1960 MUTEX_EXIT(&rx_peerHashTable_lock);
1961 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1964 ap->length = sizeof(struct rx_debugPeer);
1965 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1971 MUTEX_EXIT(&rx_peerHashTable_lock);
1973 /* if we make it here, there are no interesting packets */
1974 tpeer.host = htonl(0xffffffff); /* means end */
1975 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1978 ap->length = sizeof(struct rx_debugPeer);
1979 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1984 case RX_DEBUGI_RXSTATS:{
1988 tl = sizeof(rx_stats) - ap->length;
1990 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1994 /* Since its all int32s convert to network order with a loop. */
1995 MUTEX_ENTER(&rx_stats_mutex);
1996 s = (afs_int32 *) & rx_stats;
1997 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1998 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2001 ap->length = sizeof(rx_stats);
2002 MUTEX_EXIT(&rx_stats_mutex);
2003 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2009 /* error response packet */
2010 tin.type = htonl(RX_DEBUGI_BADTYPE);
2011 tin.index = tin.type;
2012 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2014 ap->length = sizeof(struct rx_debugIn);
2015 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2023 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2024 afs_int32 ahost, short aport, int istack)
2029 * Only respond to client-initiated version requests, and
2030 * clear that flag in the response.
2032 if (ap->header.flags & RX_CLIENT_INITIATED) {
2035 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2036 rxi_EncodePacketHeader(ap);
2037 memset(buf, 0, sizeof(buf));
2038 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2039 rx_packetwrite(ap, 0, 65, buf);
2042 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2050 /* send a debug packet back to the sender */
2052 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2053 afs_int32 ahost, short aport, afs_int32 istack)
2055 struct sockaddr_in taddr;
2061 int waslocked = ISAFS_GLOCK();
2064 taddr.sin_family = AF_INET;
2065 taddr.sin_port = aport;
2066 taddr.sin_addr.s_addr = ahost;
2067 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2068 taddr.sin_len = sizeof(struct sockaddr_in);
2071 /* We need to trim the niovecs. */
2072 nbytes = apacket->length;
2073 for (i = 1; i < apacket->niovecs; i++) {
2074 if (nbytes <= apacket->wirevec[i].iov_len) {
2075 savelen = apacket->wirevec[i].iov_len;
2076 saven = apacket->niovecs;
2077 apacket->wirevec[i].iov_len = nbytes;
2078 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2080 nbytes -= apacket->wirevec[i].iov_len;
2083 #ifdef RX_KERNEL_TRACE
2084 if (ICL_SETACTIVE(afs_iclSetp)) {
2087 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2088 "before osi_NetSend()");
2096 /* debug packets are not reliably delivered, hence the cast below. */
2097 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2098 apacket->length + RX_HEADER_SIZE, istack);
2100 #ifdef RX_KERNEL_TRACE
2101 if (ICL_SETACTIVE(afs_iclSetp)) {
2103 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2104 "after osi_NetSend()");
2113 if (saven) { /* means we truncated the packet above. */
2114 apacket->wirevec[i - 1].iov_len = savelen;
2115 apacket->niovecs = saven;
2120 /* Send the packet to appropriate destination for the specified
2121 * call. The header is first encoded and placed in the packet.
2124 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2125 struct rx_packet *p, int istack)
2131 struct sockaddr_in addr;
2132 register struct rx_peer *peer = conn->peer;
2135 char deliveryType = 'S';
2137 /* The address we're sending the packet to */
2138 memset(&addr, 0, sizeof(addr));
2139 addr.sin_family = AF_INET;
2140 addr.sin_port = peer->port;
2141 addr.sin_addr.s_addr = peer->host;
2143 /* This stuff should be revamped, I think, so that most, if not
2144 * all, of the header stuff is always added here. We could
2145 * probably do away with the encode/decode routines. XXXXX */
2147 /* Stamp each packet with a unique serial number. The serial
2148 * number is maintained on a connection basis because some types
2149 * of security may be based on the serial number of the packet,
2150 * and security is handled on a per authenticated-connection
2152 /* Pre-increment, to guarantee no zero serial number; a zero
2153 * serial number means the packet was never sent. */
2154 MUTEX_ENTER(&conn->conn_data_lock);
2155 p->header.serial = ++conn->serial;
2156 MUTEX_EXIT(&conn->conn_data_lock);
2157 /* This is so we can adjust retransmit time-outs better in the face of
2158 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2160 if (p->firstSerial == 0) {
2161 p->firstSerial = p->header.serial;
2164 /* If an output tracer function is defined, call it with the packet and
2165 * network address. Note this function may modify its arguments. */
2166 if (rx_almostSent) {
2167 int drop = (*rx_almostSent) (p, &addr);
2168 /* drop packet if return value is non-zero? */
2170 deliveryType = 'D'; /* Drop the packet */
2174 /* Get network byte order header */
2175 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2176 * touch ALL the fields */
2178 /* Send the packet out on the same socket that related packets are being
2182 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2185 /* Possibly drop this packet, for testing purposes */
2186 if ((deliveryType == 'D')
2187 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2188 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2189 deliveryType = 'D'; /* Drop the packet */
2191 deliveryType = 'S'; /* Send the packet */
2192 #endif /* RXDEBUG */
2194 /* Loop until the packet is sent. We'd prefer just to use a
2195 * blocking socket, but unfortunately the interface doesn't
2196 * allow us to have the socket block in send mode, and not
2197 * block in receive mode */
2199 waslocked = ISAFS_GLOCK();
2200 #ifdef RX_KERNEL_TRACE
2201 if (ICL_SETACTIVE(afs_iclSetp)) {
2204 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2205 "before osi_NetSend()");
2214 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2215 p->length + RX_HEADER_SIZE, istack)) != 0) {
2216 /* send failed, so let's hurry up the resend, eh? */
2217 MUTEX_ENTER(&rx_stats_mutex);
2218 rx_stats.netSendFailures++;
2219 MUTEX_EXIT(&rx_stats_mutex);
2220 p->retryTime = p->timeSent; /* resend it very soon */
2221 clock_Addmsec(&(p->retryTime),
2222 10 + (((afs_uint32) p->backoff) << 8));
2223 /* Some systems are nice and tell us right away that we cannot
2224 * reach this recipient by returning an error code.
2225 * So, when this happens let's "down" the host NOW so
2226 * we don't sit around waiting for this host to timeout later.
2230 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2231 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2232 code == -ENETUNREACH
2233 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2234 code == EHOSTUNREACH
2239 call->lastReceiveTime = 0;
2242 #ifdef RX_KERNEL_TRACE
2243 if (ICL_SETACTIVE(afs_iclSetp)) {
2245 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2246 "after osi_NetSend()");
2257 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2259 MUTEX_ENTER(&rx_stats_mutex);
2260 rx_stats.packetsSent[p->header.type - 1]++;
2261 MUTEX_EXIT(&rx_stats_mutex);
2262 MUTEX_ENTER(&peer->peer_lock);
2263 hadd32(peer->bytesSent, p->length);
2264 MUTEX_EXIT(&peer->peer_lock);
2267 /* Send a list of packets to appropriate destination for the specified
2268 * connection. The headers are first encoded and placed in the packets.
2271 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2272 struct rx_packet **list, int len, int istack)
2274 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2277 struct sockaddr_in addr;
2278 register struct rx_peer *peer = conn->peer;
2280 struct rx_packet *p = NULL;
2281 struct iovec wirevec[RX_MAXIOVECS];
2282 int i, length, code;
2285 struct rx_jumboHeader *jp;
2287 char deliveryType = 'S';
2289 /* The address we're sending the packet to */
2290 addr.sin_family = AF_INET;
2291 addr.sin_port = peer->port;
2292 addr.sin_addr.s_addr = peer->host;
2294 if (len + 1 > RX_MAXIOVECS) {
2295 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2299 * Stamp the packets in this jumbogram with consecutive serial numbers
2301 MUTEX_ENTER(&conn->conn_data_lock);
2302 serial = conn->serial;
2303 conn->serial += len;
2304 MUTEX_EXIT(&conn->conn_data_lock);
2307 /* This stuff should be revamped, I think, so that most, if not
2308 * all, of the header stuff is always added here. We could
2309 * probably do away with the encode/decode routines. XXXXX */
2312 length = RX_HEADER_SIZE;
2313 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2314 wirevec[0].iov_len = RX_HEADER_SIZE;
2315 for (i = 0; i < len; i++) {
2318 /* The whole 3.5 jumbogram scheme relies on packets fitting
2319 * in a single packet buffer. */
2320 if (p->niovecs > 2) {
2321 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2324 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2327 if (p->length != RX_JUMBOBUFFERSIZE) {
2328 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2330 p->header.flags |= RX_JUMBO_PACKET;
2331 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2332 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2334 wirevec[i + 1].iov_len = p->length;
2335 length += p->length;
2337 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2339 /* Convert jumbo packet header to network byte order */
2340 temp = (afs_uint32) (p->header.flags) << 24;
2341 temp |= (afs_uint32) (p->header.spare);
2342 *(afs_uint32 *) jp = htonl(temp);
2344 jp = (struct rx_jumboHeader *)
2345 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2347 /* Stamp each packet with a unique serial number. The serial
2348 * number is maintained on a connection basis because some types
2349 * of security may be based on the serial number of the packet,
2350 * and security is handled on a per authenticated-connection
2352 /* Pre-increment, to guarantee no zero serial number; a zero
2353 * serial number means the packet was never sent. */
2354 p->header.serial = ++serial;
2355 /* This is so we can adjust retransmit time-outs better in the face of
2356 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2358 if (p->firstSerial == 0) {
2359 p->firstSerial = p->header.serial;
2362 /* If an output tracer function is defined, call it with the packet and
2363 * network address. Note this function may modify its arguments. */
2364 if (rx_almostSent) {
2365 int drop = (*rx_almostSent) (p, &addr);
2366 /* drop packet if return value is non-zero? */
2368 deliveryType = 'D'; /* Drop the packet */
2372 /* Get network byte order header */
2373 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2374 * touch ALL the fields */
2377 /* Send the packet out on the same socket that related packets are being
2381 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2384 /* Possibly drop this packet, for testing purposes */
2385 if ((deliveryType == 'D')
2386 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2387 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2388 deliveryType = 'D'; /* Drop the packet */
2390 deliveryType = 'S'; /* Send the packet */
2391 #endif /* RXDEBUG */
2393 /* Loop until the packet is sent. We'd prefer just to use a
2394 * blocking socket, but unfortunately the interface doesn't
2395 * allow us to have the socket block in send mode, and not
2396 * block in receive mode */
2397 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2398 waslocked = ISAFS_GLOCK();
2399 if (!istack && waslocked)
2403 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2405 /* send failed, so let's hurry up the resend, eh? */
2406 MUTEX_ENTER(&rx_stats_mutex);
2407 rx_stats.netSendFailures++;
2408 MUTEX_EXIT(&rx_stats_mutex);
2409 for (i = 0; i < len; i++) {
2411 p->retryTime = p->timeSent; /* resend it very soon */
2412 clock_Addmsec(&(p->retryTime),
2413 10 + (((afs_uint32) p->backoff) << 8));
2415 /* Some systems are nice and tell us right away that we cannot
2416 * reach this recipient by returning an error code.
2417 * So, when this happens let's "down" the host NOW so
2418 * we don't sit around waiting for this host to timeout later.
2422 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2423 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2424 code == -ENETUNREACH
2425 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2426 code == EHOSTUNREACH
2431 call->lastReceiveTime = 0;
2433 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2434 if (!istack && waslocked)
2442 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2445 MUTEX_ENTER(&rx_stats_mutex);
2446 rx_stats.packetsSent[p->header.type - 1]++;
2447 MUTEX_EXIT(&rx_stats_mutex);
2448 MUTEX_ENTER(&peer->peer_lock);
2450 hadd32(peer->bytesSent, p->length);
2451 MUTEX_EXIT(&peer->peer_lock);
2455 /* Send a "special" packet to the peer connection. If call is
2456 * specified, then the packet is directed to a specific call channel
2457 * associated with the connection, otherwise it is directed to the
2458 * connection only. Uses optionalPacket if it is supplied, rather than
2459 * allocating a new packet buffer. Nbytes is the length of the data
2460 * portion of the packet. If data is non-null, nbytes of data are
2461 * copied into the packet. Type is the type of the packet, as defined
2462 * in rx.h. Bug: there's a lot of duplication between this and other
2463 * routines. This needs to be cleaned up. */
2465 rxi_SendSpecial(register struct rx_call *call,
2466 register struct rx_connection *conn,
2467 struct rx_packet *optionalPacket, int type, char *data,
2468 int nbytes, int istack)
2470 /* Some of the following stuff should be common code for all
2471 * packet sends (it's repeated elsewhere) */
2472 register struct rx_packet *p;
2474 int savelen = 0, saven = 0;
2475 int channel, callNumber;
2477 channel = call->channel;
2478 callNumber = *call->callNumber;
2479 /* BUSY packets refer to the next call on this connection */
2480 if (type == RX_PACKET_TYPE_BUSY) {
2489 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2491 osi_Panic("rxi_SendSpecial failure");
2498 p->header.serviceId = conn->serviceId;
2499 p->header.securityIndex = conn->securityIndex;
2500 p->header.cid = (conn->cid | channel);
2501 p->header.callNumber = callNumber;
2503 p->header.epoch = conn->epoch;
2504 p->header.type = type;
2505 p->header.flags = 0;
2506 if (conn->type == RX_CLIENT_CONNECTION)
2507 p->header.flags |= RX_CLIENT_INITIATED;
2509 rx_packetwrite(p, 0, nbytes, data);
2511 for (i = 1; i < p->niovecs; i++) {
2512 if (nbytes <= p->wirevec[i].iov_len) {
2513 savelen = p->wirevec[i].iov_len;
2515 p->wirevec[i].iov_len = nbytes;
2516 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2518 nbytes -= p->wirevec[i].iov_len;
2522 rxi_Send(call, p, istack);
2524 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2525 if (saven) { /* means we truncated the packet above. We probably don't */
2526 /* really need to do this, but it seems safer this way, given that */
2527 /* sneaky optionalPacket... */
2528 p->wirevec[i - 1].iov_len = savelen;
2531 if (!optionalPacket)
2533 return optionalPacket;
2537 /* Encode the packet's header (from the struct header in the packet to
2538 * the net byte order representation in the wire representation of the
2539 * packet, which is what is actually sent out on the wire) */
2541 rxi_EncodePacketHeader(register struct rx_packet *p)
2543 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2545 memset((char *)buf, 0, RX_HEADER_SIZE);
2546 *buf++ = htonl(p->header.epoch);
2547 *buf++ = htonl(p->header.cid);
2548 *buf++ = htonl(p->header.callNumber);
2549 *buf++ = htonl(p->header.seq);
2550 *buf++ = htonl(p->header.serial);
2551 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2552 | (((afs_uint32) p->header.flags) << 16)
2553 | (p->header.userStatus << 8) | p->header.securityIndex);
2554 /* Note: top 16 bits of this next word were reserved */
2555 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2558 /* Decode the packet's header (from net byte order to a struct header) */
2560 rxi_DecodePacketHeader(register struct rx_packet *p)
2562 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2565 p->header.epoch = ntohl(*buf);
2567 p->header.cid = ntohl(*buf);
2569 p->header.callNumber = ntohl(*buf);
2571 p->header.seq = ntohl(*buf);
2573 p->header.serial = ntohl(*buf);
2579 /* C will truncate byte fields to bytes for me */
2580 p->header.type = temp >> 24;
2581 p->header.flags = temp >> 16;
2582 p->header.userStatus = temp >> 8;
2583 p->header.securityIndex = temp >> 0;
2588 p->header.serviceId = (temp & 0xffff);
2589 p->header.spare = temp >> 16;
2590 /* Note: top 16 bits of this last word are the security checksum */
2594 rxi_PrepareSendPacket(register struct rx_call *call,
2595 register struct rx_packet *p, register int last)
2597 register struct rx_connection *conn = call->conn;
2599 ssize_t len; /* len must be a signed type; it can go negative */
2601 p->flags &= ~RX_PKTFLAG_ACKED;
2602 p->header.cid = (conn->cid | call->channel);
2603 p->header.serviceId = conn->serviceId;
2604 p->header.securityIndex = conn->securityIndex;
2606 /* No data packets on call 0. Where do these come from? */
2607 if (*call->callNumber == 0)
2608 *call->callNumber = 1;
2610 p->header.callNumber = *call->callNumber;
2611 p->header.seq = call->tnext++;
2612 p->header.epoch = conn->epoch;
2613 p->header.type = RX_PACKET_TYPE_DATA;
2614 p->header.flags = 0;
2615 p->header.spare = 0;
2616 if (conn->type == RX_CLIENT_CONNECTION)
2617 p->header.flags |= RX_CLIENT_INITIATED;
2620 p->header.flags |= RX_LAST_PACKET;
2622 clock_Zero(&p->retryTime); /* Never yet transmitted */
2623 clock_Zero(&p->firstSent); /* Never yet transmitted */
2624 p->header.serial = 0; /* Another way of saying never transmitted... */
2627 /* Now that we're sure this is the last data on the call, make sure
2628 * that the "length" and the sum of the iov_lens matches. */
2629 len = p->length + call->conn->securityHeaderSize;
2631 for (i = 1; i < p->niovecs && len > 0; i++) {
2632 len -= p->wirevec[i].iov_len;
2635 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2636 } else if (i < p->niovecs) {
2637 /* Free any extra elements in the wirevec */
2638 #if defined(RX_ENABLE_TSFPQ)
2639 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2640 #else /* !RX_ENABLE_TSFPQ */
2641 MUTEX_ENTER(&rx_freePktQ_lock);
2642 rxi_FreeDataBufsNoLock(p, i);
2643 MUTEX_EXIT(&rx_freePktQ_lock);
2644 #endif /* !RX_ENABLE_TSFPQ */
2648 p->wirevec[i - 1].iov_len += len;
2649 RXS_PreparePacket(conn->securityObject, call, p);
2652 /* Given an interface MTU size, calculate an adjusted MTU size that
2653 * will make efficient use of the RX buffers when the peer is sending
2654 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2656 rxi_AdjustIfMTU(int mtu)
2661 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2662 if (mtu <= adjMTU) {
2669 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2670 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2673 /* Given an interface MTU size, and the peer's advertised max receive
2674 * size, calculate an adjisted maxMTU size that makes efficient use
2675 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2677 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2679 int maxMTU = mtu * rxi_nSendFrags;
2680 maxMTU = MIN(maxMTU, peerMaxMTU);
2681 return rxi_AdjustIfMTU(maxMTU);
2684 /* Given a packet size, figure out how many datagram packet will fit.
2685 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2686 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2687 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2689 rxi_AdjustDgramPackets(int frags, int mtu)
2692 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2695 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2696 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2697 /* subtract the size of the first and last packets */
2698 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2702 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));