2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
98 /* rxdb_fileID is used to identify the lock location, along with line#. */
99 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
100 #endif /* RX_LOCKS_DB */
101 struct rx_packet *rx_mallocedP = 0;
103 extern char cml_version_number[];
105 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
107 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
108 afs_int32 ahost, short aport,
111 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
113 struct rx_queue * q);
114 #ifdef RX_ENABLE_TSFPQ
116 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 /* some rules about packets:
120 * 1. When a packet is allocated, the final iov_buf contains room for
121 * a security trailer, but iov_len masks that fact. If the security
122 * package wants to add the trailer, it may do so, and then extend
123 * iov_len appropriately. For this reason, packet's niovecs and
124 * iov_len fields should be accurate before calling PreparePacket.
128 * all packet buffers (iov_base) are integral multiples of
130 * offset is an integral multiple of the word size.
133 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
137 for (l = 0, i = 1; i < packet->niovecs; i++) {
138 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the word size.
151 * offset is an integral multiple of the word size.
154 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
160 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
161 (offset - l))) = data;
164 l += packet->wirevec[i].iov_len;
171 * all packet buffers (iov_base) are integral multiples of the
173 * offset is an integral multiple of the word size.
175 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
178 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
181 unsigned int i, j, l, r;
182 for (l = 0, i = 1; i < packet->niovecs; i++) {
183 if (l + packet->wirevec[i].iov_len > offset) {
186 l += packet->wirevec[i].iov_len;
189 /* i is the iovec which contains the first little bit of data in which we
190 * are interested. l is the total length of everything prior to this iovec.
191 * j is the number of bytes we can safely copy out of this iovec.
192 * offset only applies to the first iovec.
195 while ((resid > 0) && (i < packet->niovecs)) {
196 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
197 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
200 l += packet->wirevec[i].iov_len;
205 return (resid ? (r - resid) : r);
210 * all packet buffers (iov_base) are integral multiples of the
212 * offset is an integral multiple of the word size.
215 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
220 for (l = 0, i = 1; i < packet->niovecs; i++) {
221 if (l + packet->wirevec[i].iov_len > offset) {
224 l += packet->wirevec[i].iov_len;
227 /* i is the iovec which contains the first little bit of data in which we
228 * are interested. l is the total length of everything prior to this iovec.
229 * j is the number of bytes we can safely copy out of this iovec.
230 * offset only applies to the first iovec.
233 while ((resid > 0) && (i < RX_MAXWVECS)) {
234 if (i >= packet->niovecs)
235 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
238 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
239 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
243 l += packet->wirevec[i].iov_len;
248 return (resid ? (r - resid) : r);
252 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
254 register struct rx_packet *p, *np;
256 num_pkts = AllocPacketBufs(class, num_pkts, q);
258 for (queue_Scan(q, p, np, rx_packet)) {
259 RX_PACKET_IOV_FULLINIT(p);
265 #ifdef RX_ENABLE_TSFPQ
267 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
269 register struct rx_ts_info_t * rx_ts_info;
273 RX_TS_INFO_GET(rx_ts_info);
275 transfer = num_pkts - rx_ts_info->_FPQ.len;
278 MUTEX_ENTER(&rx_freePktQ_lock);
280 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
281 transfer += rx_TSFPQGlobSize;
282 } else if (transfer <= rx_nFreePackets) {
283 transfer = rx_nFreePackets;
285 /* alloc enough for us, plus a few globs for other threads */
286 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
287 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
288 transfer = rx_TSFPQGlobSize;
291 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
293 MUTEX_EXIT(&rx_freePktQ_lock);
297 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
301 #else /* RX_ENABLE_TSFPQ */
303 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
314 MUTEX_ENTER(&rx_freePktQ_lock);
317 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
318 num_pkts--, overq++);
321 rxi_NeedMorePackets = TRUE;
323 case RX_PACKET_CLASS_RECEIVE:
324 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
326 case RX_PACKET_CLASS_SEND:
327 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
329 case RX_PACKET_CLASS_SPECIAL:
330 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
332 case RX_PACKET_CLASS_RECV_CBUF:
333 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
335 case RX_PACKET_CLASS_SEND_CBUF:
336 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
341 if (rx_nFreePackets < num_pkts)
342 num_pkts = rx_nFreePackets;
345 rxi_NeedMorePackets = TRUE;
349 if (rx_nFreePackets < num_pkts) {
350 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
354 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
356 i++, c=queue_Next(c, rx_packet)) {
360 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
362 rx_nFreePackets -= num_pkts;
367 MUTEX_EXIT(&rx_freePktQ_lock);
372 #endif /* RX_ENABLE_TSFPQ */
375 * Free a packet currently used as a continuation buffer
377 #ifdef RX_ENABLE_TSFPQ
378 /* num_pkts=0 means queue length is unknown */
380 rxi_FreePackets(int num_pkts, struct rx_queue * q)
382 register struct rx_ts_info_t * rx_ts_info;
383 register struct rx_packet *c, *nc;
386 osi_Assert(num_pkts >= 0);
387 RX_TS_INFO_GET(rx_ts_info);
390 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
391 rxi_FreeDataBufsTSFPQ(c, 2, 0);
394 for (queue_Scan(q, c, nc, rx_packet)) {
395 rxi_FreeDataBufsTSFPQ(c, 2, 0);
400 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
403 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
405 MUTEX_ENTER(&rx_freePktQ_lock);
407 RX_TS_FPQ_LTOG(rx_ts_info);
409 /* Wakeup anyone waiting for packets */
412 MUTEX_EXIT(&rx_freePktQ_lock);
418 #else /* RX_ENABLE_TSFPQ */
419 /* num_pkts=0 means queue length is unknown */
421 rxi_FreePackets(int num_pkts, struct rx_queue *q)
424 register struct rx_packet *p, *np;
428 osi_Assert(num_pkts >= 0);
432 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
433 if (p->niovecs > 2) {
434 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
441 for (queue_Scan(q, p, np, rx_packet)) {
442 if (p->niovecs > 2) {
443 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
450 queue_SpliceAppend(q, &cbs);
456 MUTEX_ENTER(&rx_freePktQ_lock);
458 queue_SpliceAppend(&rx_freePacketQueue, q);
459 rx_nFreePackets += qlen;
461 /* Wakeup anyone waiting for packets */
464 MUTEX_EXIT(&rx_freePktQ_lock);
469 #endif /* RX_ENABLE_TSFPQ */
471 /* this one is kind of awful.
472 * In rxkad, the packet has been all shortened, and everything, ready for
473 * sending. All of a sudden, we discover we need some of that space back.
474 * This isn't terribly general, because it knows that the packets are only
475 * rounded up to the EBS (userdata + security header).
478 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
482 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
483 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
484 p->wirevec[i].iov_len += nb;
488 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
489 p->wirevec[i].iov_len += nb;
497 /* get sufficient space to store nb bytes of data (or more), and hook
498 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
499 * returns the number of bytes >0 which it failed to come up with.
500 * Don't need to worry about locking on packet, since only
501 * one thread can manipulate one at a time. Locking on continution
502 * packets is handled by AllocPacketBufs */
503 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
505 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
509 register struct rx_packet *cb, *ncb;
511 /* compute the number of cbuf's we need */
512 nv = nb / RX_CBUFFERSIZE;
513 if ((nv * RX_CBUFFERSIZE) < nb)
515 if ((nv + p->niovecs) > RX_MAXWVECS)
516 nv = RX_MAXWVECS - p->niovecs;
520 /* allocate buffers */
522 nv = AllocPacketBufs(class, nv, &q);
524 /* setup packet iovs */
525 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
527 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
528 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
531 nb -= (nv * RX_CBUFFERSIZE);
532 p->length += (nv * RX_CBUFFERSIZE);
538 /* Add more packet buffers */
539 #ifdef RX_ENABLE_TSFPQ
541 rxi_MorePackets(int apackets)
543 struct rx_packet *p, *e;
544 register struct rx_ts_info_t * rx_ts_info;
548 getme = apackets * sizeof(struct rx_packet);
549 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
552 PIN(p, getme); /* XXXXX */
553 memset((char *)p, 0, getme);
554 RX_TS_INFO_GET(rx_ts_info);
556 for (e = p + apackets; p < e; p++) {
557 RX_PACKET_IOV_INIT(p);
560 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
562 rx_ts_info->_FPQ.delta += apackets;
564 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
566 MUTEX_ENTER(&rx_freePktQ_lock);
568 RX_TS_FPQ_LTOG(rx_ts_info);
569 rxi_NeedMorePackets = FALSE;
572 MUTEX_EXIT(&rx_freePktQ_lock);
576 #else /* RX_ENABLE_TSFPQ */
578 rxi_MorePackets(int apackets)
580 struct rx_packet *p, *e;
584 getme = apackets * sizeof(struct rx_packet);
585 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
588 PIN(p, getme); /* XXXXX */
589 memset((char *)p, 0, getme);
591 MUTEX_ENTER(&rx_freePktQ_lock);
593 for (e = p + apackets; p < e; p++) {
594 RX_PACKET_IOV_INIT(p);
595 p->flags |= RX_PKTFLAG_FREE;
598 queue_Append(&rx_freePacketQueue, p);
600 rx_nFreePackets += apackets;
601 rxi_NeedMorePackets = FALSE;
604 MUTEX_EXIT(&rx_freePktQ_lock);
607 #endif /* RX_ENABLE_TSFPQ */
609 #ifdef RX_ENABLE_TSFPQ
611 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
613 struct rx_packet *p, *e;
614 register struct rx_ts_info_t * rx_ts_info;
618 getme = apackets * sizeof(struct rx_packet);
619 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
621 PIN(p, getme); /* XXXXX */
622 memset((char *)p, 0, getme);
623 RX_TS_INFO_GET(rx_ts_info);
625 for (e = p + apackets; p < e; p++) {
626 RX_PACKET_IOV_INIT(p);
629 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
631 rx_ts_info->_FPQ.delta += apackets;
634 (num_keep_local < apackets)) {
636 MUTEX_ENTER(&rx_freePktQ_lock);
638 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
639 rxi_NeedMorePackets = FALSE;
642 MUTEX_EXIT(&rx_freePktQ_lock);
646 #endif /* RX_ENABLE_TSFPQ */
649 /* Add more packet buffers */
651 rxi_MorePacketsNoLock(int apackets)
653 struct rx_packet *p, *e;
656 /* allocate enough packets that 1/4 of the packets will be able
657 * to hold maximal amounts of data */
658 apackets += (apackets / 4)
659 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
661 getme = apackets * sizeof(struct rx_packet);
662 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
664 apackets -= apackets / 4;
665 osi_Assert(apackets > 0);
668 memset((char *)p, 0, getme);
670 for (e = p + apackets; p < e; p++) {
671 RX_PACKET_IOV_INIT(p);
672 p->flags |= RX_PKTFLAG_FREE;
675 queue_Append(&rx_freePacketQueue, p);
678 rx_nFreePackets += apackets;
679 #ifdef RX_ENABLE_TSFPQ
680 /* TSFPQ patch also needs to keep track of total packets */
681 MUTEX_ENTER(&rx_stats_mutex);
682 rx_nPackets += apackets;
683 RX_TS_FPQ_COMPUTE_LIMITS;
684 MUTEX_EXIT(&rx_stats_mutex);
685 #endif /* RX_ENABLE_TSFPQ */
686 rxi_NeedMorePackets = FALSE;
692 rxi_FreeAllPackets(void)
694 /* must be called at proper interrupt level, etcetera */
695 /* MTUXXX need to free all Packets */
696 osi_Free(rx_mallocedP,
697 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
698 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
701 #ifdef RX_ENABLE_TSFPQ
703 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
705 register struct rx_ts_info_t * rx_ts_info;
709 RX_TS_INFO_GET(rx_ts_info);
711 if (num_keep_local != rx_ts_info->_FPQ.len) {
713 MUTEX_ENTER(&rx_freePktQ_lock);
714 if (num_keep_local < rx_ts_info->_FPQ.len) {
715 xfer = rx_ts_info->_FPQ.len - num_keep_local;
716 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
719 xfer = num_keep_local - rx_ts_info->_FPQ.len;
720 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
721 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
722 if (rx_nFreePackets < xfer) {
723 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
725 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
727 MUTEX_EXIT(&rx_freePktQ_lock);
733 rxi_FlushLocalPacketsTSFPQ(void)
735 rxi_AdjustLocalPacketsTSFPQ(0, 0);
737 #endif /* RX_ENABLE_TSFPQ */
739 /* Allocate more packets iff we need more continuation buffers */
740 /* In kernel, can't page in memory with interrupts disabled, so we
741 * don't use the event mechanism. */
743 rx_CheckPackets(void)
745 if (rxi_NeedMorePackets) {
746 rxi_MorePackets(rx_initSendWindow);
750 /* In the packet freeing routine below, the assumption is that
751 we want all of the packets to be used equally frequently, so that we
752 don't get packet buffers paging out. It would be just as valid to
753 assume that we DO want them to page out if not many are being used.
754 In any event, we assume the former, and append the packets to the end
756 /* This explanation is bogus. The free list doesn't remain in any kind of
757 useful order for afs_int32: the packets in use get pretty much randomly scattered
758 across all the pages. In order to permit unused {packets,bufs} to page out, they
759 must be stored so that packets which are adjacent in memory are adjacent in the
760 free list. An array springs rapidly to mind.
763 /* Actually free the packet p. */
764 #ifdef RX_ENABLE_TSFPQ
766 rxi_FreePacketNoLock(struct rx_packet *p)
768 register struct rx_ts_info_t * rx_ts_info;
769 dpf(("Free %lx\n", (unsigned long)p));
771 RX_TS_INFO_GET(rx_ts_info);
772 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
773 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
774 RX_TS_FPQ_LTOG(rx_ts_info);
777 #else /* RX_ENABLE_TSFPQ */
779 rxi_FreePacketNoLock(struct rx_packet *p)
781 dpf(("Free %lx\n", (unsigned long)p));
785 queue_Append(&rx_freePacketQueue, p);
787 #endif /* RX_ENABLE_TSFPQ */
789 #ifdef RX_ENABLE_TSFPQ
791 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
793 register struct rx_ts_info_t * rx_ts_info;
794 dpf(("Free %lx\n", (unsigned long)p));
796 RX_TS_INFO_GET(rx_ts_info);
797 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
799 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
801 MUTEX_ENTER(&rx_freePktQ_lock);
803 RX_TS_FPQ_LTOG(rx_ts_info);
805 /* Wakeup anyone waiting for packets */
808 MUTEX_EXIT(&rx_freePktQ_lock);
812 #endif /* RX_ENABLE_TSFPQ */
815 * free continuation buffers off a packet into a queue
817 * [IN] p -- packet from which continuation buffers will be freed
818 * [IN] first -- iovec offset of first continuation buffer to free
819 * [IN] q -- queue into which continuation buffers will be chained
822 * number of continuation buffers freed
824 #ifndef RX_ENABLE_TSFPQ
826 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
829 struct rx_packet * cb;
832 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
833 iov = &p->wirevec[first];
835 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
836 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
837 RX_FPQ_MARK_FREE(cb);
848 * free packet continuation buffers into the global free packet pool
850 * [IN] p -- packet from which to free continuation buffers
851 * [IN] first -- iovec offset of first continuation buffer to free
857 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
861 for (first = MAX(2, first); first < p->niovecs; first++) {
862 iov = &p->wirevec[first];
864 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
865 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
873 #ifdef RX_ENABLE_TSFPQ
875 * free packet continuation buffers into the thread-local free pool
877 * [IN] p -- packet from which continuation buffers will be freed
878 * [IN] first -- iovec offset of first continuation buffer to free
879 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
880 * global free pool before returning
886 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
889 register struct rx_ts_info_t * rx_ts_info;
891 RX_TS_INFO_GET(rx_ts_info);
893 for (first = MAX(2, first); first < p->niovecs; first++) {
894 iov = &p->wirevec[first];
896 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
897 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
902 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
904 MUTEX_ENTER(&rx_freePktQ_lock);
906 RX_TS_FPQ_LTOG(rx_ts_info);
908 /* Wakeup anyone waiting for packets */
911 MUTEX_EXIT(&rx_freePktQ_lock);
916 #endif /* RX_ENABLE_TSFPQ */
918 int rxi_nBadIovecs = 0;
920 /* rxi_RestoreDataBufs
922 * Restore the correct sizes to the iovecs. Called when reusing a packet
923 * for reading off the wire.
926 rxi_RestoreDataBufs(struct rx_packet *p)
929 struct iovec *iov = &p->wirevec[2];
931 RX_PACKET_IOV_INIT(p);
933 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
934 if (!iov->iov_base) {
939 iov->iov_len = RX_CBUFFERSIZE;
943 #ifdef RX_ENABLE_TSFPQ
945 rxi_TrimDataBufs(struct rx_packet *p, int first)
948 struct iovec *iov, *end;
949 register struct rx_ts_info_t * rx_ts_info;
953 osi_Panic("TrimDataBufs 1: first must be 1");
955 /* Skip over continuation buffers containing message data */
956 iov = &p->wirevec[2];
957 end = iov + (p->niovecs - 2);
958 length = p->length - p->wirevec[1].iov_len;
959 for (; iov < end && length > 0; iov++) {
961 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
962 length -= iov->iov_len;
965 /* iov now points to the first empty data buffer. */
969 RX_TS_INFO_GET(rx_ts_info);
970 for (; iov < end; iov++) {
972 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
973 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
976 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
978 MUTEX_ENTER(&rx_freePktQ_lock);
980 RX_TS_FPQ_LTOG(rx_ts_info);
983 MUTEX_EXIT(&rx_freePktQ_lock);
989 #else /* RX_ENABLE_TSFPQ */
991 rxi_TrimDataBufs(struct rx_packet *p, int first)
994 struct iovec *iov, *end;
998 osi_Panic("TrimDataBufs 1: first must be 1");
1000 /* Skip over continuation buffers containing message data */
1001 iov = &p->wirevec[2];
1002 end = iov + (p->niovecs - 2);
1003 length = p->length - p->wirevec[1].iov_len;
1004 for (; iov < end && length > 0; iov++) {
1006 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1007 length -= iov->iov_len;
1010 /* iov now points to the first empty data buffer. */
1015 MUTEX_ENTER(&rx_freePktQ_lock);
1017 for (; iov < end; iov++) {
1019 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1020 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1023 rxi_PacketsUnWait();
1025 MUTEX_EXIT(&rx_freePktQ_lock);
1030 #endif /* RX_ENABLE_TSFPQ */
1032 /* Free the packet p. P is assumed not to be on any queue, i.e.
1033 * remove it yourself first if you call this routine. */
1034 #ifdef RX_ENABLE_TSFPQ
1036 rxi_FreePacket(struct rx_packet *p)
1038 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1039 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1041 #else /* RX_ENABLE_TSFPQ */
1043 rxi_FreePacket(struct rx_packet *p)
1048 MUTEX_ENTER(&rx_freePktQ_lock);
1050 rxi_FreeDataBufsNoLock(p, 2);
1051 rxi_FreePacketNoLock(p);
1052 /* Wakeup anyone waiting for packets */
1053 rxi_PacketsUnWait();
1055 MUTEX_EXIT(&rx_freePktQ_lock);
1058 #endif /* RX_ENABLE_TSFPQ */
1060 /* rxi_AllocPacket sets up p->length so it reflects the number of
1061 * bytes in the packet at this point, **not including** the header.
1062 * The header is absolutely necessary, besides, this is the way the
1063 * length field is usually used */
1064 #ifdef RX_ENABLE_TSFPQ
1066 rxi_AllocPacketNoLock(int class)
1068 register struct rx_packet *p;
1069 register struct rx_ts_info_t * rx_ts_info;
1071 RX_TS_INFO_GET(rx_ts_info);
1074 if (rxi_OverQuota(class)) {
1075 rxi_NeedMorePackets = TRUE;
1077 case RX_PACKET_CLASS_RECEIVE:
1078 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1080 case RX_PACKET_CLASS_SEND:
1081 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1083 case RX_PACKET_CLASS_SPECIAL:
1084 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1086 case RX_PACKET_CLASS_RECV_CBUF:
1087 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1089 case RX_PACKET_CLASS_SEND_CBUF:
1090 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1093 return (struct rx_packet *)0;
1097 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1098 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1101 if (queue_IsEmpty(&rx_freePacketQueue))
1102 osi_Panic("rxi_AllocPacket error");
1104 if (queue_IsEmpty(&rx_freePacketQueue))
1105 rxi_MorePacketsNoLock(rx_initSendWindow);
1109 RX_TS_FPQ_GTOL(rx_ts_info);
1112 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1114 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1117 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1118 * order to truncate outbound packets. In the near future, may need
1119 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1121 RX_PACKET_IOV_FULLINIT(p);
1124 #else /* RX_ENABLE_TSFPQ */
1126 rxi_AllocPacketNoLock(int class)
1128 register struct rx_packet *p;
1131 if (rxi_OverQuota(class)) {
1132 rxi_NeedMorePackets = TRUE;
1134 case RX_PACKET_CLASS_RECEIVE:
1135 rx_MutexIncrement(rx_stats.receivePktAllocFailures, rx_stats_mutex);
1137 case RX_PACKET_CLASS_SEND:
1138 rx_MutexIncrement(rx_stats.sendPktAllocFailures, rx_stats_mutex);
1140 case RX_PACKET_CLASS_SPECIAL:
1141 rx_MutexIncrement(rx_stats.specialPktAllocFailures, rx_stats_mutex);
1143 case RX_PACKET_CLASS_RECV_CBUF:
1144 rx_MutexIncrement(rx_stats.receiveCbufPktAllocFailures, rx_stats_mutex);
1146 case RX_PACKET_CLASS_SEND_CBUF:
1147 rx_MutexIncrement(rx_stats.sendCbufPktAllocFailures, rx_stats_mutex);
1150 return (struct rx_packet *)0;
1154 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1157 if (queue_IsEmpty(&rx_freePacketQueue))
1158 osi_Panic("rxi_AllocPacket error");
1160 if (queue_IsEmpty(&rx_freePacketQueue))
1161 rxi_MorePacketsNoLock(rx_initSendWindow);
1165 p = queue_First(&rx_freePacketQueue, rx_packet);
1167 RX_FPQ_MARK_USED(p);
1169 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1172 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1173 * order to truncate outbound packets. In the near future, may need
1174 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1176 RX_PACKET_IOV_FULLINIT(p);
1179 #endif /* RX_ENABLE_TSFPQ */
1181 #ifdef RX_ENABLE_TSFPQ
1183 rxi_AllocPacketTSFPQ(int class, int pull_global)
1185 register struct rx_packet *p;
1186 register struct rx_ts_info_t * rx_ts_info;
1188 RX_TS_INFO_GET(rx_ts_info);
1190 rx_MutexIncrement(rx_stats.packetRequests, rx_stats_mutex);
1191 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1192 MUTEX_ENTER(&rx_freePktQ_lock);
1194 if (queue_IsEmpty(&rx_freePacketQueue))
1195 rxi_MorePacketsNoLock(rx_initSendWindow);
1197 RX_TS_FPQ_GTOL(rx_ts_info);
1199 MUTEX_EXIT(&rx_freePktQ_lock);
1200 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1204 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1206 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1208 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1209 * order to truncate outbound packets. In the near future, may need
1210 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1212 RX_PACKET_IOV_FULLINIT(p);
1215 #endif /* RX_ENABLE_TSFPQ */
1217 #ifdef RX_ENABLE_TSFPQ
1219 rxi_AllocPacket(int class)
1221 register struct rx_packet *p;
1223 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1226 #else /* RX_ENABLE_TSFPQ */
1228 rxi_AllocPacket(int class)
1230 register struct rx_packet *p;
1232 MUTEX_ENTER(&rx_freePktQ_lock);
1233 p = rxi_AllocPacketNoLock(class);
1234 MUTEX_EXIT(&rx_freePktQ_lock);
1237 #endif /* RX_ENABLE_TSFPQ */
1239 /* This guy comes up with as many buffers as it {takes,can get} given
1240 * the MTU for this call. It also sets the packet length before
1241 * returning. caution: this is often called at NETPRI
1242 * Called with call locked.
1245 rxi_AllocSendPacket(register struct rx_call *call, int want)
1247 register struct rx_packet *p = (struct rx_packet *)0;
1249 register unsigned delta;
1252 mud = call->MTU - RX_HEADER_SIZE;
1254 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1255 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1257 #ifdef RX_ENABLE_TSFPQ
1258 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1260 want = MIN(want, mud);
1262 if ((unsigned)want > p->length)
1263 (void)rxi_AllocDataBuf(p, (want - p->length),
1264 RX_PACKET_CLASS_SEND_CBUF);
1266 if ((unsigned)p->length > mud)
1269 if (delta >= p->length) {
1277 #endif /* RX_ENABLE_TSFPQ */
1279 while (!(call->error)) {
1280 MUTEX_ENTER(&rx_freePktQ_lock);
1281 /* if an error occurred, or we get the packet we want, we're done */
1282 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1283 MUTEX_EXIT(&rx_freePktQ_lock);
1286 want = MIN(want, mud);
1288 if ((unsigned)want > p->length)
1289 (void)rxi_AllocDataBuf(p, (want - p->length),
1290 RX_PACKET_CLASS_SEND_CBUF);
1292 if ((unsigned)p->length > mud)
1295 if (delta >= p->length) {
1304 /* no error occurred, and we didn't get a packet, so we sleep.
1305 * At this point, we assume that packets will be returned
1306 * sooner or later, as packets are acknowledged, and so we
1309 call->flags |= RX_CALL_WAIT_PACKETS;
1310 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1311 MUTEX_EXIT(&call->lock);
1312 rx_waitingForPackets = 1;
1314 #ifdef RX_ENABLE_LOCKS
1315 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1317 osi_rxSleep(&rx_waitingForPackets);
1319 MUTEX_EXIT(&rx_freePktQ_lock);
1320 MUTEX_ENTER(&call->lock);
1321 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1322 call->flags &= ~RX_CALL_WAIT_PACKETS;
1331 /* Windows does not use file descriptors. */
1332 #define CountFDs(amax) 0
1334 /* count the number of used FDs */
1336 CountFDs(register int amax)
1339 register int i, code;
1343 for (i = 0; i < amax; i++) {
1344 code = fstat(i, &tstat);
1350 #endif /* AFS_NT40_ENV */
1353 #define CountFDs(amax) amax
1357 #if !defined(KERNEL) || defined(UKERNEL)
1359 /* This function reads a single packet from the interface into the
1360 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1361 * (host,port) of the sender are stored in the supplied variables, and
1362 * the data length of the packet is stored in the packet structure.
1363 * The header is decoded. */
1365 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1368 struct sockaddr_in from;
1371 register afs_int32 tlen, savelen;
1373 rx_computelen(p, tlen);
1374 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1376 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1377 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1378 * it once in order to avoid races. */
1381 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1389 /* Extend the last iovec for padding, it's just to make sure that the
1390 * read doesn't return more data than we expect, and is done to get around
1391 * our problems caused by the lack of a length field in the rx header.
1392 * Use the extra buffer that follows the localdata in each packet
1394 savelen = p->wirevec[p->niovecs - 1].iov_len;
1395 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1397 memset((char *)&msg, 0, sizeof(msg));
1398 msg.msg_name = (char *)&from;
1399 msg.msg_namelen = sizeof(struct sockaddr_in);
1400 msg.msg_iov = p->wirevec;
1401 msg.msg_iovlen = p->niovecs;
1402 nbytes = rxi_Recvmsg(socket, &msg, 0);
1404 /* restore the vec to its correct state */
1405 p->wirevec[p->niovecs - 1].iov_len = savelen;
1407 p->length = (nbytes - RX_HEADER_SIZE);
1408 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1409 if (nbytes < 0 && errno == EWOULDBLOCK) {
1410 rx_MutexIncrement(rx_stats.noPacketOnRead, rx_stats_mutex);
1411 } else if (nbytes <= 0) {
1412 MUTEX_ENTER(&rx_stats_mutex);
1413 rx_stats.bogusPacketOnRead++;
1414 rx_stats.bogusHost = from.sin_addr.s_addr;
1415 MUTEX_EXIT(&rx_stats_mutex);
1416 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1417 ntohs(from.sin_port), nbytes));
1422 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1423 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1424 rxi_DecodePacketHeader(p);
1426 *host = from.sin_addr.s_addr;
1427 *port = from.sin_port;
1429 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1430 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1431 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1433 rxi_TrimDataBufs(p, 1);
1438 /* Extract packet header. */
1439 rxi_DecodePacketHeader(p);
1441 *host = from.sin_addr.s_addr;
1442 *port = from.sin_port;
1443 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1444 struct rx_peer *peer;
1445 rx_MutexIncrement(rx_stats.packetsRead[p->header.type - 1], rx_stats_mutex);
1447 * Try to look up this peer structure. If it doesn't exist,
1448 * don't create a new one -
1449 * we don't keep count of the bytes sent/received if a peer
1450 * structure doesn't already exist.
1452 * The peer/connection cleanup code assumes that there is 1 peer
1453 * per connection. If we actually created a peer structure here
1454 * and this packet was an rxdebug packet, the peer structure would
1455 * never be cleaned up.
1457 peer = rxi_FindPeer(*host, *port, 0, 0);
1458 /* Since this may not be associated with a connection,
1459 * it may have no refCount, meaning we could race with
1462 if (peer && (peer->refCount > 0)) {
1463 MUTEX_ENTER(&peer->peer_lock);
1464 hadd32(peer->bytesReceived, p->length);
1465 MUTEX_EXIT(&peer->peer_lock);
1469 /* Free any empty packet buffers at the end of this packet */
1470 rxi_TrimDataBufs(p, 1);
1476 #endif /* !KERNEL || UKERNEL */
1478 /* This function splits off the first packet in a jumbo packet.
1479 * As of AFS 3.5, jumbograms contain more than one fixed size
1480 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1481 * last packet header. All packets (except the last) are padded to
1482 * fall on RX_CBUFFERSIZE boundaries.
1483 * HACK: We store the length of the first n-1 packets in the
1484 * last two pad bytes. */
1487 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1490 struct rx_packet *np;
1491 struct rx_jumboHeader *jp;
1497 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1498 * bytes in length. All but the first packet are preceded by
1499 * an abbreviated four byte header. The length of the last packet
1500 * is calculated from the size of the jumbogram. */
1501 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1503 if ((int)p->length < length) {
1504 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1507 niov = p->niovecs - 2;
1509 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1512 iov = &p->wirevec[2];
1513 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1515 /* Get a pointer to the abbreviated packet header */
1516 jp = (struct rx_jumboHeader *)
1517 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1519 /* Set up the iovecs for the next packet */
1520 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1521 np->wirevec[0].iov_len = sizeof(struct rx_header);
1522 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1523 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1524 np->niovecs = niov + 1;
1525 for (i = 2, iov++; i <= niov; i++, iov++) {
1526 np->wirevec[i] = *iov;
1528 np->length = p->length - length;
1529 p->length = RX_JUMBOBUFFERSIZE;
1532 /* Convert the jumbo packet header to host byte order */
1533 temp = ntohl(*(afs_uint32 *) jp);
1534 jp->flags = (u_char) (temp >> 24);
1535 jp->cksum = (u_short) (temp);
1537 /* Fill in the packet header */
1538 np->header = p->header;
1539 np->header.serial = p->header.serial + 1;
1540 np->header.seq = p->header.seq + 1;
1541 np->header.flags = jp->flags;
1542 np->header.spare = jp->cksum;
1548 /* Send a udp datagram */
1550 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1551 int length, int istack)
1556 memset(&msg, 0, sizeof(msg));
1558 msg.msg_iovlen = nvecs;
1559 msg.msg_name = addr;
1560 msg.msg_namelen = sizeof(struct sockaddr_in);
1562 ret = rxi_Sendmsg(socket, &msg, 0);
1566 #elif !defined(UKERNEL)
1568 * message receipt is done in rxk_input or rx_put.
1571 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1573 * Copy an mblock to the contiguous area pointed to by cp.
1574 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1575 * but it doesn't really.
1576 * Returns the number of bytes not transferred.
1577 * The message is NOT changed.
1580 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1584 for (; mp && len > 0; mp = mp->b_cont) {
1585 if (mp->b_datap->db_type != M_DATA) {
1588 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1589 memcpy(cp, (char *)mp->b_rptr, n);
1597 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1598 * but it doesn't really.
1599 * This sucks, anyway, do it like m_cpy.... below
1602 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1605 register int m, n, o, t, i;
1607 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1608 if (mp->b_datap->db_type != M_DATA) {
1611 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1617 t = iovs[i].iov_len;
1620 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1630 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1631 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1633 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1635 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1638 unsigned int l1, l2, i, t;
1640 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1641 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1644 if (m->m_len <= off) {
1654 p1 = mtod(m, caddr_t) + off;
1655 l1 = m->m_len - off;
1657 p2 = iovs[0].iov_base;
1658 l2 = iovs[0].iov_len;
1661 t = MIN(l1, MIN(l2, (unsigned int)len));
1672 p1 = mtod(m, caddr_t);
1678 p2 = iovs[i].iov_base;
1679 l2 = iovs[i].iov_len;
1687 #endif /* AFS_SUN5_ENV */
1689 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1691 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1692 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1698 struct rx_packet *phandle;
1699 int hdr_len, data_len;
1704 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1711 #endif /*KERNEL && !UKERNEL */
1714 /* send a response to a debug packet */
1717 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1718 afs_int32 ahost, short aport, int istack)
1720 struct rx_debugIn tin;
1722 struct rx_serverQueueEntry *np, *nqe;
1725 * Only respond to client-initiated Rx debug packets,
1726 * and clear the client flag in the response.
1728 if (ap->header.flags & RX_CLIENT_INITIATED) {
1729 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1730 rxi_EncodePacketHeader(ap);
1735 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1736 /* all done with packet, now set length to the truth, so we can
1737 * reuse this packet */
1738 rx_computelen(ap, ap->length);
1740 tin.type = ntohl(tin.type);
1741 tin.index = ntohl(tin.index);
1743 case RX_DEBUGI_GETSTATS:{
1744 struct rx_debugStats tstat;
1746 /* get basic stats */
1747 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1748 tstat.version = RX_DEBUGI_VERSION;
1749 #ifndef RX_ENABLE_LOCKS
1750 tstat.waitingForPackets = rx_waitingForPackets;
1752 MUTEX_ENTER(&rx_serverPool_lock);
1753 tstat.nFreePackets = htonl(rx_nFreePackets);
1754 tstat.callsExecuted = htonl(rxi_nCalls);
1755 tstat.packetReclaims = htonl(rx_packetReclaims);
1756 tstat.usedFDs = CountFDs(64);
1757 tstat.nWaiting = htonl(rx_nWaiting);
1758 tstat.nWaited = htonl(rx_nWaited);
1759 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1761 MUTEX_EXIT(&rx_serverPool_lock);
1762 tstat.idleThreads = htonl(tstat.idleThreads);
1763 tl = sizeof(struct rx_debugStats) - ap->length;
1765 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1768 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1770 ap->length = sizeof(struct rx_debugStats);
1771 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1772 rx_computelen(ap, ap->length);
1777 case RX_DEBUGI_GETALLCONN:
1778 case RX_DEBUGI_GETCONN:{
1780 register struct rx_connection *tc;
1781 struct rx_call *tcall;
1782 struct rx_debugConn tconn;
1783 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1786 tl = sizeof(struct rx_debugConn) - ap->length;
1788 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1792 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1793 /* get N'th (maybe) "interesting" connection info */
1794 for (i = 0; i < rx_hashTableSize; i++) {
1795 #if !defined(KERNEL)
1796 /* the time complexity of the algorithm used here
1797 * exponentially increses with the number of connections.
1799 #ifdef AFS_PTHREAD_ENV
1805 MUTEX_ENTER(&rx_connHashTable_lock);
1806 /* We might be slightly out of step since we are not
1807 * locking each call, but this is only debugging output.
1809 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1810 if ((all || rxi_IsConnInteresting(tc))
1811 && tin.index-- <= 0) {
1812 tconn.host = tc->peer->host;
1813 tconn.port = tc->peer->port;
1814 tconn.cid = htonl(tc->cid);
1815 tconn.epoch = htonl(tc->epoch);
1816 tconn.serial = htonl(tc->serial);
1817 for (j = 0; j < RX_MAXCALLS; j++) {
1818 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1819 if ((tcall = tc->call[j])) {
1820 tconn.callState[j] = tcall->state;
1821 tconn.callMode[j] = tcall->mode;
1822 tconn.callFlags[j] = tcall->flags;
1823 if (queue_IsNotEmpty(&tcall->rq))
1824 tconn.callOther[j] |= RX_OTHER_IN;
1825 if (queue_IsNotEmpty(&tcall->tq))
1826 tconn.callOther[j] |= RX_OTHER_OUT;
1828 tconn.callState[j] = RX_STATE_NOTINIT;
1831 tconn.natMTU = htonl(tc->peer->natMTU);
1832 tconn.error = htonl(tc->error);
1833 tconn.flags = tc->flags;
1834 tconn.type = tc->type;
1835 tconn.securityIndex = tc->securityIndex;
1836 if (tc->securityObject) {
1837 RXS_GetStats(tc->securityObject, tc,
1839 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1840 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1843 DOHTONL(packetsReceived);
1844 DOHTONL(packetsSent);
1845 DOHTONL(bytesReceived);
1849 sizeof(tconn.secStats.spares) /
1854 sizeof(tconn.secStats.sparel) /
1855 sizeof(afs_int32); i++)
1859 MUTEX_EXIT(&rx_connHashTable_lock);
1860 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1863 ap->length = sizeof(struct rx_debugConn);
1864 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1870 MUTEX_EXIT(&rx_connHashTable_lock);
1872 /* if we make it here, there are no interesting packets */
1873 tconn.cid = htonl(0xffffffff); /* means end */
1874 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1877 ap->length = sizeof(struct rx_debugConn);
1878 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1884 * Pass back all the peer structures we have available
1887 case RX_DEBUGI_GETPEER:{
1889 register struct rx_peer *tp;
1890 struct rx_debugPeer tpeer;
1893 tl = sizeof(struct rx_debugPeer) - ap->length;
1895 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1899 memset((char *)&tpeer, 0, sizeof(tpeer));
1900 for (i = 0; i < rx_hashTableSize; i++) {
1901 #if !defined(KERNEL)
1902 /* the time complexity of the algorithm used here
1903 * exponentially increses with the number of peers.
1905 * Yielding after processing each hash table entry
1906 * and dropping rx_peerHashTable_lock.
1907 * also increases the risk that we will miss a new
1908 * entry - but we are willing to live with this
1909 * limitation since this is meant for debugging only
1911 #ifdef AFS_PTHREAD_ENV
1917 MUTEX_ENTER(&rx_peerHashTable_lock);
1918 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1919 if (tin.index-- <= 0) {
1920 tpeer.host = tp->host;
1921 tpeer.port = tp->port;
1922 tpeer.ifMTU = htons(tp->ifMTU);
1923 tpeer.idleWhen = htonl(tp->idleWhen);
1924 tpeer.refCount = htons(tp->refCount);
1925 tpeer.burstSize = tp->burstSize;
1926 tpeer.burst = tp->burst;
1927 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1928 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1929 tpeer.rtt = htonl(tp->rtt);
1930 tpeer.rtt_dev = htonl(tp->rtt_dev);
1931 tpeer.timeout.sec = htonl(tp->timeout.sec);
1932 tpeer.timeout.usec = htonl(tp->timeout.usec);
1933 tpeer.nSent = htonl(tp->nSent);
1934 tpeer.reSends = htonl(tp->reSends);
1935 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1936 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1937 tpeer.rateFlag = htonl(tp->rateFlag);
1938 tpeer.natMTU = htons(tp->natMTU);
1939 tpeer.maxMTU = htons(tp->maxMTU);
1940 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1941 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1942 tpeer.MTU = htons(tp->MTU);
1943 tpeer.cwind = htons(tp->cwind);
1944 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1945 tpeer.congestSeq = htons(tp->congestSeq);
1946 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1947 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1948 tpeer.bytesReceived.high =
1949 htonl(tp->bytesReceived.high);
1950 tpeer.bytesReceived.low =
1951 htonl(tp->bytesReceived.low);
1953 MUTEX_EXIT(&rx_peerHashTable_lock);
1954 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1957 ap->length = sizeof(struct rx_debugPeer);
1958 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1964 MUTEX_EXIT(&rx_peerHashTable_lock);
1966 /* if we make it here, there are no interesting packets */
1967 tpeer.host = htonl(0xffffffff); /* means end */
1968 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1971 ap->length = sizeof(struct rx_debugPeer);
1972 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1977 case RX_DEBUGI_RXSTATS:{
1981 tl = sizeof(rx_stats) - ap->length;
1983 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1987 /* Since its all int32s convert to network order with a loop. */
1988 MUTEX_ENTER(&rx_stats_mutex);
1989 s = (afs_int32 *) & rx_stats;
1990 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1991 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1994 ap->length = sizeof(rx_stats);
1995 MUTEX_EXIT(&rx_stats_mutex);
1996 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2002 /* error response packet */
2003 tin.type = htonl(RX_DEBUGI_BADTYPE);
2004 tin.index = tin.type;
2005 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2007 ap->length = sizeof(struct rx_debugIn);
2008 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2016 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2017 afs_int32 ahost, short aport, int istack)
2022 * Only respond to client-initiated version requests, and
2023 * clear that flag in the response.
2025 if (ap->header.flags & RX_CLIENT_INITIATED) {
2028 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2029 rxi_EncodePacketHeader(ap);
2030 memset(buf, 0, sizeof(buf));
2031 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2032 rx_packetwrite(ap, 0, 65, buf);
2035 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2043 /* send a debug packet back to the sender */
2045 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2046 afs_int32 ahost, short aport, afs_int32 istack)
2048 struct sockaddr_in taddr;
2054 int waslocked = ISAFS_GLOCK();
2057 taddr.sin_family = AF_INET;
2058 taddr.sin_port = aport;
2059 taddr.sin_addr.s_addr = ahost;
2060 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2061 taddr.sin_len = sizeof(struct sockaddr_in);
2064 /* We need to trim the niovecs. */
2065 nbytes = apacket->length;
2066 for (i = 1; i < apacket->niovecs; i++) {
2067 if (nbytes <= apacket->wirevec[i].iov_len) {
2068 savelen = apacket->wirevec[i].iov_len;
2069 saven = apacket->niovecs;
2070 apacket->wirevec[i].iov_len = nbytes;
2071 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2073 nbytes -= apacket->wirevec[i].iov_len;
2076 #ifdef RX_KERNEL_TRACE
2077 if (ICL_SETACTIVE(afs_iclSetp)) {
2080 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2081 "before osi_NetSend()");
2089 /* debug packets are not reliably delivered, hence the cast below. */
2090 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2091 apacket->length + RX_HEADER_SIZE, istack);
2093 #ifdef RX_KERNEL_TRACE
2094 if (ICL_SETACTIVE(afs_iclSetp)) {
2096 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2097 "after osi_NetSend()");
2106 if (saven) { /* means we truncated the packet above. */
2107 apacket->wirevec[i - 1].iov_len = savelen;
2108 apacket->niovecs = saven;
2113 /* Send the packet to appropriate destination for the specified
2114 * call. The header is first encoded and placed in the packet.
2117 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2118 struct rx_packet *p, int istack)
2124 struct sockaddr_in addr;
2125 register struct rx_peer *peer = conn->peer;
2128 char deliveryType = 'S';
2130 /* The address we're sending the packet to */
2131 memset(&addr, 0, sizeof(addr));
2132 addr.sin_family = AF_INET;
2133 addr.sin_port = peer->port;
2134 addr.sin_addr.s_addr = peer->host;
2136 /* This stuff should be revamped, I think, so that most, if not
2137 * all, of the header stuff is always added here. We could
2138 * probably do away with the encode/decode routines. XXXXX */
2140 /* Stamp each packet with a unique serial number. The serial
2141 * number is maintained on a connection basis because some types
2142 * of security may be based on the serial number of the packet,
2143 * and security is handled on a per authenticated-connection
2145 /* Pre-increment, to guarantee no zero serial number; a zero
2146 * serial number means the packet was never sent. */
2147 MUTEX_ENTER(&conn->conn_data_lock);
2148 p->header.serial = ++conn->serial;
2149 MUTEX_EXIT(&conn->conn_data_lock);
2150 /* This is so we can adjust retransmit time-outs better in the face of
2151 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2153 if (p->firstSerial == 0) {
2154 p->firstSerial = p->header.serial;
2157 /* If an output tracer function is defined, call it with the packet and
2158 * network address. Note this function may modify its arguments. */
2159 if (rx_almostSent) {
2160 int drop = (*rx_almostSent) (p, &addr);
2161 /* drop packet if return value is non-zero? */
2163 deliveryType = 'D'; /* Drop the packet */
2167 /* Get network byte order header */
2168 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2169 * touch ALL the fields */
2171 /* Send the packet out on the same socket that related packets are being
2175 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2178 /* Possibly drop this packet, for testing purposes */
2179 if ((deliveryType == 'D')
2180 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2181 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2182 deliveryType = 'D'; /* Drop the packet */
2184 deliveryType = 'S'; /* Send the packet */
2185 #endif /* RXDEBUG */
2187 /* Loop until the packet is sent. We'd prefer just to use a
2188 * blocking socket, but unfortunately the interface doesn't
2189 * allow us to have the socket block in send mode, and not
2190 * block in receive mode */
2192 waslocked = ISAFS_GLOCK();
2193 #ifdef RX_KERNEL_TRACE
2194 if (ICL_SETACTIVE(afs_iclSetp)) {
2197 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2198 "before osi_NetSend()");
2207 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2208 p->length + RX_HEADER_SIZE, istack)) != 0) {
2209 /* send failed, so let's hurry up the resend, eh? */
2210 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2211 p->retryTime = p->timeSent; /* resend it very soon */
2212 clock_Addmsec(&(p->retryTime),
2213 10 + (((afs_uint32) p->backoff) << 8));
2214 /* Some systems are nice and tell us right away that we cannot
2215 * reach this recipient by returning an error code.
2216 * So, when this happens let's "down" the host NOW so
2217 * we don't sit around waiting for this host to timeout later.
2221 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2222 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2223 code == -ENETUNREACH
2224 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2225 code == EHOSTUNREACH
2230 call->lastReceiveTime = 0;
2233 #ifdef RX_KERNEL_TRACE
2234 if (ICL_SETACTIVE(afs_iclSetp)) {
2236 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2237 "after osi_NetSend()");
2248 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2250 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2251 MUTEX_ENTER(&peer->peer_lock);
2252 hadd32(peer->bytesSent, p->length);
2253 MUTEX_EXIT(&peer->peer_lock);
2256 /* Send a list of packets to appropriate destination for the specified
2257 * connection. The headers are first encoded and placed in the packets.
2260 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2261 struct rx_packet **list, int len, int istack)
2263 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2266 struct sockaddr_in addr;
2267 register struct rx_peer *peer = conn->peer;
2269 struct rx_packet *p = NULL;
2270 struct iovec wirevec[RX_MAXIOVECS];
2271 int i, length, code;
2274 struct rx_jumboHeader *jp;
2276 char deliveryType = 'S';
2278 /* The address we're sending the packet to */
2279 addr.sin_family = AF_INET;
2280 addr.sin_port = peer->port;
2281 addr.sin_addr.s_addr = peer->host;
2283 if (len + 1 > RX_MAXIOVECS) {
2284 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2288 * Stamp the packets in this jumbogram with consecutive serial numbers
2290 MUTEX_ENTER(&conn->conn_data_lock);
2291 serial = conn->serial;
2292 conn->serial += len;
2293 MUTEX_EXIT(&conn->conn_data_lock);
2296 /* This stuff should be revamped, I think, so that most, if not
2297 * all, of the header stuff is always added here. We could
2298 * probably do away with the encode/decode routines. XXXXX */
2301 length = RX_HEADER_SIZE;
2302 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2303 wirevec[0].iov_len = RX_HEADER_SIZE;
2304 for (i = 0; i < len; i++) {
2307 /* The whole 3.5 jumbogram scheme relies on packets fitting
2308 * in a single packet buffer. */
2309 if (p->niovecs > 2) {
2310 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2313 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2316 if (p->length != RX_JUMBOBUFFERSIZE) {
2317 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2319 p->header.flags |= RX_JUMBO_PACKET;
2320 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2321 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2323 wirevec[i + 1].iov_len = p->length;
2324 length += p->length;
2326 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2328 /* Convert jumbo packet header to network byte order */
2329 temp = (afs_uint32) (p->header.flags) << 24;
2330 temp |= (afs_uint32) (p->header.spare);
2331 *(afs_uint32 *) jp = htonl(temp);
2333 jp = (struct rx_jumboHeader *)
2334 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2336 /* Stamp each packet with a unique serial number. The serial
2337 * number is maintained on a connection basis because some types
2338 * of security may be based on the serial number of the packet,
2339 * and security is handled on a per authenticated-connection
2341 /* Pre-increment, to guarantee no zero serial number; a zero
2342 * serial number means the packet was never sent. */
2343 p->header.serial = ++serial;
2344 /* This is so we can adjust retransmit time-outs better in the face of
2345 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2347 if (p->firstSerial == 0) {
2348 p->firstSerial = p->header.serial;
2351 /* If an output tracer function is defined, call it with the packet and
2352 * network address. Note this function may modify its arguments. */
2353 if (rx_almostSent) {
2354 int drop = (*rx_almostSent) (p, &addr);
2355 /* drop packet if return value is non-zero? */
2357 deliveryType = 'D'; /* Drop the packet */
2361 /* Get network byte order header */
2362 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2363 * touch ALL the fields */
2366 /* Send the packet out on the same socket that related packets are being
2370 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2373 /* Possibly drop this packet, for testing purposes */
2374 if ((deliveryType == 'D')
2375 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2376 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2377 deliveryType = 'D'; /* Drop the packet */
2379 deliveryType = 'S'; /* Send the packet */
2380 #endif /* RXDEBUG */
2382 /* Loop until the packet is sent. We'd prefer just to use a
2383 * blocking socket, but unfortunately the interface doesn't
2384 * allow us to have the socket block in send mode, and not
2385 * block in receive mode */
2386 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2387 waslocked = ISAFS_GLOCK();
2388 if (!istack && waslocked)
2392 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2394 /* send failed, so let's hurry up the resend, eh? */
2395 rx_MutexIncrement(rx_stats.netSendFailures, rx_stats_mutex);
2396 for (i = 0; i < len; i++) {
2398 p->retryTime = p->timeSent; /* resend it very soon */
2399 clock_Addmsec(&(p->retryTime),
2400 10 + (((afs_uint32) p->backoff) << 8));
2402 /* Some systems are nice and tell us right away that we cannot
2403 * reach this recipient by returning an error code.
2404 * So, when this happens let's "down" the host NOW so
2405 * we don't sit around waiting for this host to timeout later.
2409 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2410 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2411 code == -ENETUNREACH
2412 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2413 code == EHOSTUNREACH
2418 call->lastReceiveTime = 0;
2420 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2421 if (!istack && waslocked)
2429 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2432 rx_MutexIncrement(rx_stats.packetsSent[p->header.type - 1], rx_stats_mutex);
2433 MUTEX_ENTER(&peer->peer_lock);
2434 hadd32(peer->bytesSent, p->length);
2435 MUTEX_EXIT(&peer->peer_lock);
2439 /* Send a "special" packet to the peer connection. If call is
2440 * specified, then the packet is directed to a specific call channel
2441 * associated with the connection, otherwise it is directed to the
2442 * connection only. Uses optionalPacket if it is supplied, rather than
2443 * allocating a new packet buffer. Nbytes is the length of the data
2444 * portion of the packet. If data is non-null, nbytes of data are
2445 * copied into the packet. Type is the type of the packet, as defined
2446 * in rx.h. Bug: there's a lot of duplication between this and other
2447 * routines. This needs to be cleaned up. */
2449 rxi_SendSpecial(register struct rx_call *call,
2450 register struct rx_connection *conn,
2451 struct rx_packet *optionalPacket, int type, char *data,
2452 int nbytes, int istack)
2454 /* Some of the following stuff should be common code for all
2455 * packet sends (it's repeated elsewhere) */
2456 register struct rx_packet *p;
2458 int savelen = 0, saven = 0;
2459 int channel, callNumber;
2461 channel = call->channel;
2462 callNumber = *call->callNumber;
2463 /* BUSY packets refer to the next call on this connection */
2464 if (type == RX_PACKET_TYPE_BUSY) {
2473 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2475 osi_Panic("rxi_SendSpecial failure");
2482 p->header.serviceId = conn->serviceId;
2483 p->header.securityIndex = conn->securityIndex;
2484 p->header.cid = (conn->cid | channel);
2485 p->header.callNumber = callNumber;
2487 p->header.epoch = conn->epoch;
2488 p->header.type = type;
2489 p->header.flags = 0;
2490 if (conn->type == RX_CLIENT_CONNECTION)
2491 p->header.flags |= RX_CLIENT_INITIATED;
2493 rx_packetwrite(p, 0, nbytes, data);
2495 for (i = 1; i < p->niovecs; i++) {
2496 if (nbytes <= p->wirevec[i].iov_len) {
2497 savelen = p->wirevec[i].iov_len;
2499 p->wirevec[i].iov_len = nbytes;
2500 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2502 nbytes -= p->wirevec[i].iov_len;
2506 rxi_Send(call, p, istack);
2508 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2509 if (saven) { /* means we truncated the packet above. We probably don't */
2510 /* really need to do this, but it seems safer this way, given that */
2511 /* sneaky optionalPacket... */
2512 p->wirevec[i - 1].iov_len = savelen;
2515 if (!optionalPacket)
2517 return optionalPacket;
2521 /* Encode the packet's header (from the struct header in the packet to
2522 * the net byte order representation in the wire representation of the
2523 * packet, which is what is actually sent out on the wire) */
2525 rxi_EncodePacketHeader(register struct rx_packet *p)
2527 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2529 memset((char *)buf, 0, RX_HEADER_SIZE);
2530 *buf++ = htonl(p->header.epoch);
2531 *buf++ = htonl(p->header.cid);
2532 *buf++ = htonl(p->header.callNumber);
2533 *buf++ = htonl(p->header.seq);
2534 *buf++ = htonl(p->header.serial);
2535 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2536 | (((afs_uint32) p->header.flags) << 16)
2537 | (p->header.userStatus << 8) | p->header.securityIndex);
2538 /* Note: top 16 bits of this next word were reserved */
2539 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2542 /* Decode the packet's header (from net byte order to a struct header) */
2544 rxi_DecodePacketHeader(register struct rx_packet *p)
2546 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2549 p->header.epoch = ntohl(*buf);
2551 p->header.cid = ntohl(*buf);
2553 p->header.callNumber = ntohl(*buf);
2555 p->header.seq = ntohl(*buf);
2557 p->header.serial = ntohl(*buf);
2563 /* C will truncate byte fields to bytes for me */
2564 p->header.type = temp >> 24;
2565 p->header.flags = temp >> 16;
2566 p->header.userStatus = temp >> 8;
2567 p->header.securityIndex = temp >> 0;
2572 p->header.serviceId = (temp & 0xffff);
2573 p->header.spare = temp >> 16;
2574 /* Note: top 16 bits of this last word are the security checksum */
2578 rxi_PrepareSendPacket(register struct rx_call *call,
2579 register struct rx_packet *p, register int last)
2581 register struct rx_connection *conn = call->conn;
2583 ssize_t len; /* len must be a signed type; it can go negative */
2585 p->flags &= ~RX_PKTFLAG_ACKED;
2586 p->header.cid = (conn->cid | call->channel);
2587 p->header.serviceId = conn->serviceId;
2588 p->header.securityIndex = conn->securityIndex;
2590 /* No data packets on call 0. Where do these come from? */
2591 if (*call->callNumber == 0)
2592 *call->callNumber = 1;
2594 p->header.callNumber = *call->callNumber;
2595 p->header.seq = call->tnext++;
2596 p->header.epoch = conn->epoch;
2597 p->header.type = RX_PACKET_TYPE_DATA;
2598 p->header.flags = 0;
2599 p->header.spare = 0;
2600 if (conn->type == RX_CLIENT_CONNECTION)
2601 p->header.flags |= RX_CLIENT_INITIATED;
2604 p->header.flags |= RX_LAST_PACKET;
2606 clock_Zero(&p->retryTime); /* Never yet transmitted */
2607 clock_Zero(&p->firstSent); /* Never yet transmitted */
2608 p->header.serial = 0; /* Another way of saying never transmitted... */
2611 /* Now that we're sure this is the last data on the call, make sure
2612 * that the "length" and the sum of the iov_lens matches. */
2613 len = p->length + call->conn->securityHeaderSize;
2615 for (i = 1; i < p->niovecs && len > 0; i++) {
2616 len -= p->wirevec[i].iov_len;
2619 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2620 } else if (i < p->niovecs) {
2621 /* Free any extra elements in the wirevec */
2622 #if defined(RX_ENABLE_TSFPQ)
2623 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2624 #else /* !RX_ENABLE_TSFPQ */
2625 MUTEX_ENTER(&rx_freePktQ_lock);
2626 rxi_FreeDataBufsNoLock(p, i);
2627 MUTEX_EXIT(&rx_freePktQ_lock);
2628 #endif /* !RX_ENABLE_TSFPQ */
2632 p->wirevec[i - 1].iov_len += len;
2633 RXS_PreparePacket(conn->securityObject, call, p);
2636 /* Given an interface MTU size, calculate an adjusted MTU size that
2637 * will make efficient use of the RX buffers when the peer is sending
2638 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2640 rxi_AdjustIfMTU(int mtu)
2645 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2647 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2648 if (mtu <= adjMTU) {
2655 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2656 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2659 /* Given an interface MTU size, and the peer's advertised max receive
2660 * size, calculate an adjisted maxMTU size that makes efficient use
2661 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2663 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2665 int maxMTU = mtu * rxi_nSendFrags;
2666 maxMTU = MIN(maxMTU, peerMaxMTU);
2667 return rxi_AdjustIfMTU(maxMTU);
2670 /* Given a packet size, figure out how many datagram packet will fit.
2671 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2672 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2673 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2675 rxi_AdjustDgramPackets(int frags, int mtu)
2678 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2681 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2682 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2683 /* subtract the size of the first and last packets */
2684 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2688 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));