2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
19 #include "afs/sysincludes.h"
20 #include "afsincludes.h"
21 #include "rx/rx_kcommon.h"
22 #include "rx/rx_clock.h"
23 #include "rx/rx_queue.h"
24 #include "rx/rx_packet.h"
25 #include "rx/rx_atomic.h"
26 #include "rx/rx_internal.h"
27 #include "rx/rx_stats.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
54 #include "rx_atomic.h"
56 #include <sys/sysmacros.h>
58 #include "rx/rx_packet.h"
59 #include "rx_internal.h"
61 #endif /* defined(UKERNEL) */
62 #include "rx/rx_globals.h"
64 #include "sys/types.h"
67 #if defined(AFS_NT40_ENV)
70 #define EWOULDBLOCK WSAEWOULDBLOCK
73 #include "rx_xmit_nt.h"
76 #include <sys/socket.h>
77 #include <netinet/in.h>
83 #include <sys/sysmacros.h>
85 #include "rx_packet.h"
86 #include "rx_atomic.h"
87 #include "rx_globals.h"
88 #include "rx_internal.h"
99 /* rxdb_fileID is used to identify the lock location, along with line#. */
100 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
101 #endif /* RX_LOCKS_DB */
102 static struct rx_packet *rx_mallocedP = 0;
103 #ifdef RXDEBUG_PACKET
104 static afs_uint32 rx_packet_id = 0;
107 extern char cml_version_number[];
109 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
112 afs_uint32 ahost, short aport,
115 #ifdef RX_ENABLE_TSFPQ
117 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
119 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
121 struct rx_queue * q);
124 /* some rules about packets:
125 * 1. When a packet is allocated, the final iov_buf contains room for
126 * a security trailer, but iov_len masks that fact. If the security
127 * package wants to add the trailer, it may do so, and then extend
128 * iov_len appropriately. For this reason, packet's niovecs and
129 * iov_len fields should be accurate before calling PreparePacket.
133 * all packet buffers (iov_base) are integral multiples of
135 * offset is an integral multiple of the word size.
138 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
142 for (l = 0, i = 1; i < packet->niovecs; i++) {
143 if (l + packet->wirevec[i].iov_len > offset) {
145 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
148 l += packet->wirevec[i].iov_len;
155 * all packet buffers (iov_base) are integral multiples of the word size.
156 * offset is an integral multiple of the word size.
159 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
163 for (l = 0, i = 1; i < packet->niovecs; i++) {
164 if (l + packet->wirevec[i].iov_len > offset) {
165 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
166 (offset - l))) = data;
169 l += packet->wirevec[i].iov_len;
176 * all packet buffers (iov_base) are integral multiples of the
178 * offset is an integral multiple of the word size.
180 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
183 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
186 unsigned int i, j, l, r;
187 for (l = 0, i = 1; i < packet->niovecs; i++) {
188 if (l + packet->wirevec[i].iov_len > offset) {
191 l += packet->wirevec[i].iov_len;
194 /* i is the iovec which contains the first little bit of data in which we
195 * are interested. l is the total length of everything prior to this iovec.
196 * j is the number of bytes we can safely copy out of this iovec.
197 * offset only applies to the first iovec.
200 while ((r > 0) && (i < packet->niovecs)) {
201 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
202 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
205 l += packet->wirevec[i].iov_len;
210 return (r ? (resid - r) : resid);
215 * all packet buffers (iov_base) are integral multiples of the
217 * offset is an integral multiple of the word size.
220 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
222 unsigned int i, j, l, o, r;
225 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
226 if (l + packet->wirevec[i].iov_len > o) {
229 l += packet->wirevec[i].iov_len;
232 /* i is the iovec which contains the first little bit of data in which we
233 * are interested. l is the total length of everything prior to this iovec.
234 * j is the number of bytes we can safely copy out of this iovec.
235 * offset only applies to the first iovec.
238 while ((r > 0) && (i <= RX_MAXWVECS)) {
239 if (i >= packet->niovecs)
240 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
243 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
244 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
248 l += packet->wirevec[i].iov_len;
253 return (r ? (resid - r) : resid);
257 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
259 struct rx_packet *p, *np;
261 num_pkts = AllocPacketBufs(class, num_pkts, q);
263 for (queue_Scan(q, p, np, rx_packet)) {
264 RX_PACKET_IOV_FULLINIT(p);
270 #ifdef RX_ENABLE_TSFPQ
272 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
274 struct rx_ts_info_t * rx_ts_info;
278 RX_TS_INFO_GET(rx_ts_info);
280 transfer = num_pkts - rx_ts_info->_FPQ.len;
283 MUTEX_ENTER(&rx_freePktQ_lock);
284 transfer = MAX(transfer, rx_TSFPQGlobSize);
285 if (transfer > rx_nFreePackets) {
286 /* alloc enough for us, plus a few globs for other threads */
287 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
290 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
292 MUTEX_EXIT(&rx_freePktQ_lock);
296 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
300 #else /* RX_ENABLE_TSFPQ */
302 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
313 MUTEX_ENTER(&rx_freePktQ_lock);
316 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
317 num_pkts--, overq++);
320 rxi_NeedMorePackets = TRUE;
321 if (rx_stats_active) {
323 case RX_PACKET_CLASS_RECEIVE:
324 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
326 case RX_PACKET_CLASS_SEND:
327 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
329 case RX_PACKET_CLASS_SPECIAL:
330 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
332 case RX_PACKET_CLASS_RECV_CBUF:
333 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
335 case RX_PACKET_CLASS_SEND_CBUF:
336 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
342 if (rx_nFreePackets < num_pkts)
343 num_pkts = rx_nFreePackets;
346 rxi_NeedMorePackets = TRUE;
350 if (rx_nFreePackets < num_pkts) {
351 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
355 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
357 i++, c=queue_Next(c, rx_packet)) {
361 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
363 rx_nFreePackets -= num_pkts;
368 MUTEX_EXIT(&rx_freePktQ_lock);
373 #endif /* RX_ENABLE_TSFPQ */
376 * Free a packet currently used as a continuation buffer
378 #ifdef RX_ENABLE_TSFPQ
379 /* num_pkts=0 means queue length is unknown */
381 rxi_FreePackets(int num_pkts, struct rx_queue * q)
383 struct rx_ts_info_t * rx_ts_info;
384 struct rx_packet *c, *nc;
387 osi_Assert(num_pkts >= 0);
388 RX_TS_INFO_GET(rx_ts_info);
391 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
392 rxi_FreeDataBufsTSFPQ(c, 2, 0);
395 for (queue_Scan(q, c, nc, rx_packet)) {
396 rxi_FreeDataBufsTSFPQ(c, 2, 0);
401 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
404 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
406 MUTEX_ENTER(&rx_freePktQ_lock);
408 RX_TS_FPQ_LTOG(rx_ts_info);
410 /* Wakeup anyone waiting for packets */
413 MUTEX_EXIT(&rx_freePktQ_lock);
419 #else /* RX_ENABLE_TSFPQ */
420 /* num_pkts=0 means queue length is unknown */
422 rxi_FreePackets(int num_pkts, struct rx_queue *q)
425 struct rx_packet *p, *np;
429 osi_Assert(num_pkts >= 0);
433 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
434 if (p->niovecs > 2) {
435 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
442 for (queue_Scan(q, p, np, rx_packet)) {
443 if (p->niovecs > 2) {
444 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
451 queue_SpliceAppend(q, &cbs);
457 MUTEX_ENTER(&rx_freePktQ_lock);
459 queue_SpliceAppend(&rx_freePacketQueue, q);
460 rx_nFreePackets += qlen;
462 /* Wakeup anyone waiting for packets */
465 MUTEX_EXIT(&rx_freePktQ_lock);
470 #endif /* RX_ENABLE_TSFPQ */
472 /* this one is kind of awful.
473 * In rxkad, the packet has been all shortened, and everything, ready for
474 * sending. All of a sudden, we discover we need some of that space back.
475 * This isn't terribly general, because it knows that the packets are only
476 * rounded up to the EBS (userdata + security header).
479 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
483 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
484 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
485 p->wirevec[i].iov_len += nb;
489 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
490 p->wirevec[i].iov_len += nb;
498 /* get sufficient space to store nb bytes of data (or more), and hook
499 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
500 * returns the number of bytes >0 which it failed to come up with.
501 * Don't need to worry about locking on packet, since only
502 * one thread can manipulate one at a time. Locking on continution
503 * packets is handled by AllocPacketBufs */
504 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
506 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
510 struct rx_packet *cb, *ncb;
512 /* compute the number of cbuf's we need */
513 nv = nb / RX_CBUFFERSIZE;
514 if ((nv * RX_CBUFFERSIZE) < nb)
516 if ((nv + p->niovecs) > RX_MAXWVECS)
517 nv = RX_MAXWVECS - p->niovecs;
521 /* allocate buffers */
523 nv = AllocPacketBufs(class, nv, &q);
525 /* setup packet iovs */
526 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
528 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
529 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
532 nb -= (nv * RX_CBUFFERSIZE);
533 p->length += (nv * RX_CBUFFERSIZE);
539 /* Add more packet buffers */
540 #ifdef RX_ENABLE_TSFPQ
542 rxi_MorePackets(int apackets)
544 struct rx_packet *p, *e;
545 struct rx_ts_info_t * rx_ts_info;
549 getme = apackets * sizeof(struct rx_packet);
550 p = (struct rx_packet *)osi_Alloc(getme);
553 PIN(p, getme); /* XXXXX */
555 RX_TS_INFO_GET(rx_ts_info);
557 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
558 /* TSFPQ patch also needs to keep track of total packets */
560 MUTEX_ENTER(&rx_packets_mutex);
561 rx_nPackets += apackets;
562 RX_TS_FPQ_COMPUTE_LIMITS;
563 MUTEX_EXIT(&rx_packets_mutex);
565 for (e = p + apackets; p < e; p++) {
566 RX_PACKET_IOV_INIT(p);
569 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
572 MUTEX_ENTER(&rx_freePktQ_lock);
573 #ifdef RXDEBUG_PACKET
574 p->packetId = rx_packet_id++;
575 p->allNextp = rx_mallocedP;
576 #endif /* RXDEBUG_PACKET */
578 MUTEX_EXIT(&rx_freePktQ_lock);
581 rx_ts_info->_FPQ.delta += apackets;
583 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
585 MUTEX_ENTER(&rx_freePktQ_lock);
587 RX_TS_FPQ_LTOG(rx_ts_info);
588 rxi_NeedMorePackets = FALSE;
591 MUTEX_EXIT(&rx_freePktQ_lock);
595 #else /* RX_ENABLE_TSFPQ */
597 rxi_MorePackets(int apackets)
599 struct rx_packet *p, *e;
603 getme = apackets * sizeof(struct rx_packet);
604 p = (struct rx_packet *)osi_Alloc(getme);
607 PIN(p, getme); /* XXXXX */
610 MUTEX_ENTER(&rx_freePktQ_lock);
612 for (e = p + apackets; p < e; p++) {
613 RX_PACKET_IOV_INIT(p);
614 #ifdef RX_TRACK_PACKETS
615 p->flags |= RX_PKTFLAG_FREE;
619 queue_Append(&rx_freePacketQueue, p);
620 #ifdef RXDEBUG_PACKET
621 p->packetId = rx_packet_id++;
622 p->allNextp = rx_mallocedP;
623 #endif /* RXDEBUG_PACKET */
627 rx_nPackets += apackets;
628 rx_nFreePackets += apackets;
629 rxi_NeedMorePackets = FALSE;
632 MUTEX_EXIT(&rx_freePktQ_lock);
635 #endif /* RX_ENABLE_TSFPQ */
637 #ifdef RX_ENABLE_TSFPQ
639 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
641 struct rx_packet *p, *e;
642 struct rx_ts_info_t * rx_ts_info;
646 getme = apackets * sizeof(struct rx_packet);
647 p = (struct rx_packet *)osi_Alloc(getme);
649 PIN(p, getme); /* XXXXX */
651 RX_TS_INFO_GET(rx_ts_info);
653 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
654 /* TSFPQ patch also needs to keep track of total packets */
655 MUTEX_ENTER(&rx_packets_mutex);
656 rx_nPackets += apackets;
657 RX_TS_FPQ_COMPUTE_LIMITS;
658 MUTEX_EXIT(&rx_packets_mutex);
660 for (e = p + apackets; p < e; p++) {
661 RX_PACKET_IOV_INIT(p);
663 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
666 MUTEX_ENTER(&rx_freePktQ_lock);
667 #ifdef RXDEBUG_PACKET
668 p->packetId = rx_packet_id++;
669 p->allNextp = rx_mallocedP;
670 #endif /* RXDEBUG_PACKET */
672 MUTEX_EXIT(&rx_freePktQ_lock);
675 rx_ts_info->_FPQ.delta += apackets;
678 (num_keep_local < apackets)) {
680 MUTEX_ENTER(&rx_freePktQ_lock);
682 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
683 rxi_NeedMorePackets = FALSE;
686 MUTEX_EXIT(&rx_freePktQ_lock);
690 #endif /* RX_ENABLE_TSFPQ */
693 /* Add more packet buffers */
695 rxi_MorePacketsNoLock(int apackets)
697 #ifdef RX_ENABLE_TSFPQ
698 struct rx_ts_info_t * rx_ts_info;
699 #endif /* RX_ENABLE_TSFPQ */
700 struct rx_packet *p, *e;
703 /* allocate enough packets that 1/4 of the packets will be able
704 * to hold maximal amounts of data */
705 apackets += (apackets / 4)
706 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
708 getme = apackets * sizeof(struct rx_packet);
709 p = (struct rx_packet *)osi_Alloc(getme);
711 apackets -= apackets / 4;
712 osi_Assert(apackets > 0);
717 #ifdef RX_ENABLE_TSFPQ
718 RX_TS_INFO_GET(rx_ts_info);
719 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
720 #endif /* RX_ENABLE_TSFPQ */
722 for (e = p + apackets; p < e; p++) {
723 RX_PACKET_IOV_INIT(p);
724 #ifdef RX_TRACK_PACKETS
725 p->flags |= RX_PKTFLAG_FREE;
729 queue_Append(&rx_freePacketQueue, p);
730 #ifdef RXDEBUG_PACKET
731 p->packetId = rx_packet_id++;
732 p->allNextp = rx_mallocedP;
733 #endif /* RXDEBUG_PACKET */
737 rx_nFreePackets += apackets;
738 MUTEX_ENTER(&rx_packets_mutex);
739 rx_nPackets += apackets;
740 #ifdef RX_ENABLE_TSFPQ
741 RX_TS_FPQ_COMPUTE_LIMITS;
742 #endif /* RX_ENABLE_TSFPQ */
743 MUTEX_EXIT(&rx_packets_mutex);
744 rxi_NeedMorePackets = FALSE;
750 rxi_FreeAllPackets(void)
752 /* must be called at proper interrupt level, etcetera */
753 /* MTUXXX need to free all Packets */
754 osi_Free(rx_mallocedP,
755 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
756 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
759 #ifdef RX_ENABLE_TSFPQ
761 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
763 struct rx_ts_info_t * rx_ts_info;
767 RX_TS_INFO_GET(rx_ts_info);
769 if (num_keep_local != rx_ts_info->_FPQ.len) {
771 MUTEX_ENTER(&rx_freePktQ_lock);
772 if (num_keep_local < rx_ts_info->_FPQ.len) {
773 xfer = rx_ts_info->_FPQ.len - num_keep_local;
774 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
777 xfer = num_keep_local - rx_ts_info->_FPQ.len;
778 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
779 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
780 if (rx_nFreePackets < xfer) {
781 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
783 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
785 MUTEX_EXIT(&rx_freePktQ_lock);
791 rxi_FlushLocalPacketsTSFPQ(void)
793 rxi_AdjustLocalPacketsTSFPQ(0, 0);
795 #endif /* RX_ENABLE_TSFPQ */
797 /* Allocate more packets iff we need more continuation buffers */
798 /* In kernel, can't page in memory with interrupts disabled, so we
799 * don't use the event mechanism. */
801 rx_CheckPackets(void)
803 if (rxi_NeedMorePackets) {
804 rxi_MorePackets(rx_maxSendWindow);
808 /* In the packet freeing routine below, the assumption is that
809 we want all of the packets to be used equally frequently, so that we
810 don't get packet buffers paging out. It would be just as valid to
811 assume that we DO want them to page out if not many are being used.
812 In any event, we assume the former, and append the packets to the end
814 /* This explanation is bogus. The free list doesn't remain in any kind of
815 useful order for afs_int32: the packets in use get pretty much randomly scattered
816 across all the pages. In order to permit unused {packets,bufs} to page out, they
817 must be stored so that packets which are adjacent in memory are adjacent in the
818 free list. An array springs rapidly to mind.
821 /* Actually free the packet p. */
822 #ifdef RX_ENABLE_TSFPQ
824 rxi_FreePacketNoLock(struct rx_packet *p)
826 struct rx_ts_info_t * rx_ts_info;
827 dpf(("Free %"AFS_PTR_FMT"\n", p));
829 RX_TS_INFO_GET(rx_ts_info);
830 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
831 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
832 RX_TS_FPQ_LTOG(rx_ts_info);
835 #else /* RX_ENABLE_TSFPQ */
837 rxi_FreePacketNoLock(struct rx_packet *p)
839 dpf(("Free %"AFS_PTR_FMT"\n", p));
843 queue_Append(&rx_freePacketQueue, p);
845 #endif /* RX_ENABLE_TSFPQ */
847 #ifdef RX_ENABLE_TSFPQ
849 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
851 struct rx_ts_info_t * rx_ts_info;
852 dpf(("Free %"AFS_PTR_FMT"\n", p));
854 RX_TS_INFO_GET(rx_ts_info);
855 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
857 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
859 MUTEX_ENTER(&rx_freePktQ_lock);
861 RX_TS_FPQ_LTOG(rx_ts_info);
863 /* Wakeup anyone waiting for packets */
866 MUTEX_EXIT(&rx_freePktQ_lock);
870 #endif /* RX_ENABLE_TSFPQ */
873 * free continuation buffers off a packet into a queue
875 * [IN] p -- packet from which continuation buffers will be freed
876 * [IN] first -- iovec offset of first continuation buffer to free
877 * [IN] q -- queue into which continuation buffers will be chained
880 * number of continuation buffers freed
882 #ifndef RX_ENABLE_TSFPQ
884 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
887 struct rx_packet * cb;
890 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
891 iov = &p->wirevec[first];
893 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
894 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
895 RX_FPQ_MARK_FREE(cb);
906 * free packet continuation buffers into the global free packet pool
908 * [IN] p -- packet from which to free continuation buffers
909 * [IN] first -- iovec offset of first continuation buffer to free
915 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
919 for (first = MAX(2, first); first < p->niovecs; first++) {
920 iov = &p->wirevec[first];
922 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
923 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
931 #ifdef RX_ENABLE_TSFPQ
933 * free packet continuation buffers into the thread-local free pool
935 * [IN] p -- packet from which continuation buffers will be freed
936 * [IN] first -- iovec offset of first continuation buffer to free
937 * any value less than 2, the min number of iovecs,
938 * is treated as if it is 2.
939 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
940 * global free pool before returning
946 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
949 struct rx_ts_info_t * rx_ts_info;
951 RX_TS_INFO_GET(rx_ts_info);
953 for (first = MAX(2, first); first < p->niovecs; first++) {
954 iov = &p->wirevec[first];
956 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
957 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
962 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
964 MUTEX_ENTER(&rx_freePktQ_lock);
966 RX_TS_FPQ_LTOG(rx_ts_info);
968 /* Wakeup anyone waiting for packets */
971 MUTEX_EXIT(&rx_freePktQ_lock);
976 #endif /* RX_ENABLE_TSFPQ */
978 int rxi_nBadIovecs = 0;
980 /* rxi_RestoreDataBufs
982 * Restore the correct sizes to the iovecs. Called when reusing a packet
983 * for reading off the wire.
986 rxi_RestoreDataBufs(struct rx_packet *p)
989 struct iovec *iov = &p->wirevec[2];
991 RX_PACKET_IOV_INIT(p);
993 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
994 if (!iov->iov_base) {
999 iov->iov_len = RX_CBUFFERSIZE;
1003 #ifdef RX_ENABLE_TSFPQ
1005 rxi_TrimDataBufs(struct rx_packet *p, int first)
1008 struct iovec *iov, *end;
1009 struct rx_ts_info_t * rx_ts_info;
1013 osi_Panic("TrimDataBufs 1: first must be 1");
1015 /* Skip over continuation buffers containing message data */
1016 iov = &p->wirevec[2];
1017 end = iov + (p->niovecs - 2);
1018 length = p->length - p->wirevec[1].iov_len;
1019 for (; iov < end && length > 0; iov++) {
1021 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1022 length -= iov->iov_len;
1025 /* iov now points to the first empty data buffer. */
1029 RX_TS_INFO_GET(rx_ts_info);
1030 for (; iov < end; iov++) {
1032 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1033 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1036 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1038 MUTEX_ENTER(&rx_freePktQ_lock);
1040 RX_TS_FPQ_LTOG(rx_ts_info);
1041 rxi_PacketsUnWait();
1043 MUTEX_EXIT(&rx_freePktQ_lock);
1049 #else /* RX_ENABLE_TSFPQ */
1051 rxi_TrimDataBufs(struct rx_packet *p, int first)
1054 struct iovec *iov, *end;
1058 osi_Panic("TrimDataBufs 1: first must be 1");
1060 /* Skip over continuation buffers containing message data */
1061 iov = &p->wirevec[2];
1062 end = iov + (p->niovecs - 2);
1063 length = p->length - p->wirevec[1].iov_len;
1064 for (; iov < end && length > 0; iov++) {
1066 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1067 length -= iov->iov_len;
1070 /* iov now points to the first empty data buffer. */
1075 MUTEX_ENTER(&rx_freePktQ_lock);
1077 for (; iov < end; iov++) {
1079 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1080 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1083 rxi_PacketsUnWait();
1085 MUTEX_EXIT(&rx_freePktQ_lock);
1090 #endif /* RX_ENABLE_TSFPQ */
1092 /* Free the packet p. P is assumed not to be on any queue, i.e.
1093 * remove it yourself first if you call this routine. */
1094 #ifdef RX_ENABLE_TSFPQ
1096 rxi_FreePacket(struct rx_packet *p)
1098 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1099 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1101 #else /* RX_ENABLE_TSFPQ */
1103 rxi_FreePacket(struct rx_packet *p)
1108 MUTEX_ENTER(&rx_freePktQ_lock);
1110 rxi_FreeDataBufsNoLock(p, 2);
1111 rxi_FreePacketNoLock(p);
1112 /* Wakeup anyone waiting for packets */
1113 rxi_PacketsUnWait();
1115 MUTEX_EXIT(&rx_freePktQ_lock);
1118 #endif /* RX_ENABLE_TSFPQ */
1120 /* rxi_AllocPacket sets up p->length so it reflects the number of
1121 * bytes in the packet at this point, **not including** the header.
1122 * The header is absolutely necessary, besides, this is the way the
1123 * length field is usually used */
1124 #ifdef RX_ENABLE_TSFPQ
1126 rxi_AllocPacketNoLock(int class)
1128 struct rx_packet *p;
1129 struct rx_ts_info_t * rx_ts_info;
1131 RX_TS_INFO_GET(rx_ts_info);
1134 if (rxi_OverQuota(class)) {
1135 rxi_NeedMorePackets = TRUE;
1136 if (rx_stats_active) {
1138 case RX_PACKET_CLASS_RECEIVE:
1139 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1141 case RX_PACKET_CLASS_SEND:
1142 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1144 case RX_PACKET_CLASS_SPECIAL:
1145 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1147 case RX_PACKET_CLASS_RECV_CBUF:
1148 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1150 case RX_PACKET_CLASS_SEND_CBUF:
1151 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1155 return (struct rx_packet *)0;
1159 if (rx_stats_active)
1160 rx_atomic_inc(&rx_stats.packetRequests);
1161 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1164 if (queue_IsEmpty(&rx_freePacketQueue))
1165 osi_Panic("rxi_AllocPacket error");
1167 if (queue_IsEmpty(&rx_freePacketQueue))
1168 rxi_MorePacketsNoLock(rx_maxSendWindow);
1172 RX_TS_FPQ_GTOL(rx_ts_info);
1175 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1177 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1180 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1181 * order to truncate outbound packets. In the near future, may need
1182 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1184 RX_PACKET_IOV_FULLINIT(p);
1187 #else /* RX_ENABLE_TSFPQ */
1189 rxi_AllocPacketNoLock(int class)
1191 struct rx_packet *p;
1194 if (rxi_OverQuota(class)) {
1195 rxi_NeedMorePackets = TRUE;
1196 if (rx_stats_active) {
1198 case RX_PACKET_CLASS_RECEIVE:
1199 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1201 case RX_PACKET_CLASS_SEND:
1202 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1204 case RX_PACKET_CLASS_SPECIAL:
1205 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1207 case RX_PACKET_CLASS_RECV_CBUF:
1208 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1210 case RX_PACKET_CLASS_SEND_CBUF:
1211 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1215 return (struct rx_packet *)0;
1219 if (rx_stats_active)
1220 rx_atomic_inc(&rx_stats.packetRequests);
1223 if (queue_IsEmpty(&rx_freePacketQueue))
1224 osi_Panic("rxi_AllocPacket error");
1226 if (queue_IsEmpty(&rx_freePacketQueue))
1227 rxi_MorePacketsNoLock(rx_maxSendWindow);
1231 p = queue_First(&rx_freePacketQueue, rx_packet);
1233 RX_FPQ_MARK_USED(p);
1235 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1238 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1239 * order to truncate outbound packets. In the near future, may need
1240 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1242 RX_PACKET_IOV_FULLINIT(p);
1245 #endif /* RX_ENABLE_TSFPQ */
1247 #ifdef RX_ENABLE_TSFPQ
1249 rxi_AllocPacketTSFPQ(int class, int pull_global)
1251 struct rx_packet *p;
1252 struct rx_ts_info_t * rx_ts_info;
1254 RX_TS_INFO_GET(rx_ts_info);
1256 if (rx_stats_active)
1257 rx_atomic_inc(&rx_stats.packetRequests);
1258 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1259 MUTEX_ENTER(&rx_freePktQ_lock);
1261 if (queue_IsEmpty(&rx_freePacketQueue))
1262 rxi_MorePacketsNoLock(rx_maxSendWindow);
1264 RX_TS_FPQ_GTOL(rx_ts_info);
1266 MUTEX_EXIT(&rx_freePktQ_lock);
1267 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1271 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1273 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1275 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1276 * order to truncate outbound packets. In the near future, may need
1277 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1279 RX_PACKET_IOV_FULLINIT(p);
1282 #endif /* RX_ENABLE_TSFPQ */
1284 #ifdef RX_ENABLE_TSFPQ
1286 rxi_AllocPacket(int class)
1288 struct rx_packet *p;
1290 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1293 #else /* RX_ENABLE_TSFPQ */
1295 rxi_AllocPacket(int class)
1297 struct rx_packet *p;
1299 MUTEX_ENTER(&rx_freePktQ_lock);
1300 p = rxi_AllocPacketNoLock(class);
1301 MUTEX_EXIT(&rx_freePktQ_lock);
1304 #endif /* RX_ENABLE_TSFPQ */
1306 /* This guy comes up with as many buffers as it {takes,can get} given
1307 * the MTU for this call. It also sets the packet length before
1308 * returning. caution: this is often called at NETPRI
1309 * Called with call locked.
1312 rxi_AllocSendPacket(struct rx_call *call, int want)
1314 struct rx_packet *p = (struct rx_packet *)0;
1319 mud = call->MTU - RX_HEADER_SIZE;
1321 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1322 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1324 #ifdef RX_ENABLE_TSFPQ
1325 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1327 want = MIN(want, mud);
1329 if ((unsigned)want > p->length)
1330 (void)rxi_AllocDataBuf(p, (want - p->length),
1331 RX_PACKET_CLASS_SEND_CBUF);
1333 if (p->length > mud)
1336 if (delta >= p->length) {
1344 #endif /* RX_ENABLE_TSFPQ */
1346 while (!(call->error)) {
1347 MUTEX_ENTER(&rx_freePktQ_lock);
1348 /* if an error occurred, or we get the packet we want, we're done */
1349 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1350 MUTEX_EXIT(&rx_freePktQ_lock);
1353 want = MIN(want, mud);
1355 if ((unsigned)want > p->length)
1356 (void)rxi_AllocDataBuf(p, (want - p->length),
1357 RX_PACKET_CLASS_SEND_CBUF);
1359 if (p->length > mud)
1362 if (delta >= p->length) {
1371 /* no error occurred, and we didn't get a packet, so we sleep.
1372 * At this point, we assume that packets will be returned
1373 * sooner or later, as packets are acknowledged, and so we
1376 call->flags |= RX_CALL_WAIT_PACKETS;
1377 MUTEX_ENTER(&rx_refcnt_mutex);
1378 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1379 MUTEX_EXIT(&rx_refcnt_mutex);
1380 MUTEX_EXIT(&call->lock);
1381 rx_waitingForPackets = 1;
1383 #ifdef RX_ENABLE_LOCKS
1384 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1386 osi_rxSleep(&rx_waitingForPackets);
1388 MUTEX_EXIT(&rx_freePktQ_lock);
1389 MUTEX_ENTER(&call->lock);
1390 MUTEX_ENTER(&rx_refcnt_mutex);
1391 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1392 MUTEX_EXIT(&rx_refcnt_mutex);
1393 call->flags &= ~RX_CALL_WAIT_PACKETS;
1402 /* Windows does not use file descriptors. */
1403 #define CountFDs(amax) 0
1405 /* count the number of used FDs */
1414 for (i = 0; i < amax; i++) {
1415 code = fstat(i, &tstat);
1421 #endif /* AFS_NT40_ENV */
1424 #define CountFDs(amax) amax
1428 #if !defined(KERNEL) || defined(UKERNEL)
1430 /* This function reads a single packet from the interface into the
1431 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1432 * (host,port) of the sender are stored in the supplied variables, and
1433 * the data length of the packet is stored in the packet structure.
1434 * The header is decoded. */
1436 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1439 struct sockaddr_in from;
1440 unsigned int nbytes;
1442 afs_uint32 tlen, savelen;
1444 rx_computelen(p, tlen);
1445 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1447 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1448 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1449 * it once in order to avoid races. */
1452 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1460 /* Extend the last iovec for padding, it's just to make sure that the
1461 * read doesn't return more data than we expect, and is done to get around
1462 * our problems caused by the lack of a length field in the rx header.
1463 * Use the extra buffer that follows the localdata in each packet
1465 savelen = p->wirevec[p->niovecs - 1].iov_len;
1466 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1468 memset(&msg, 0, sizeof(msg));
1469 msg.msg_name = (char *)&from;
1470 msg.msg_namelen = sizeof(struct sockaddr_in);
1471 msg.msg_iov = p->wirevec;
1472 msg.msg_iovlen = p->niovecs;
1473 nbytes = rxi_Recvmsg(socket, &msg, 0);
1475 /* restore the vec to its correct state */
1476 p->wirevec[p->niovecs - 1].iov_len = savelen;
1478 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1479 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1480 if (nbytes < 0 && errno == EWOULDBLOCK) {
1481 if (rx_stats_active)
1482 rx_atomic_inc(&rx_stats.noPacketOnRead);
1483 } else if (nbytes <= 0) {
1484 if (rx_stats_active) {
1485 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1486 rx_stats.bogusHost = from.sin_addr.s_addr;
1488 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1489 ntohs(from.sin_port), nbytes));
1494 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1495 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1496 rxi_DecodePacketHeader(p);
1498 *host = from.sin_addr.s_addr;
1499 *port = from.sin_port;
1501 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1502 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1503 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1505 #ifdef RX_TRIMDATABUFS
1506 rxi_TrimDataBufs(p, 1);
1512 /* Extract packet header. */
1513 rxi_DecodePacketHeader(p);
1515 *host = from.sin_addr.s_addr;
1516 *port = from.sin_port;
1517 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1518 if (rx_stats_active) {
1519 struct rx_peer *peer;
1520 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1522 * Try to look up this peer structure. If it doesn't exist,
1523 * don't create a new one -
1524 * we don't keep count of the bytes sent/received if a peer
1525 * structure doesn't already exist.
1527 * The peer/connection cleanup code assumes that there is 1 peer
1528 * per connection. If we actually created a peer structure here
1529 * and this packet was an rxdebug packet, the peer structure would
1530 * never be cleaned up.
1532 peer = rxi_FindPeer(*host, *port, 0, 0);
1533 /* Since this may not be associated with a connection,
1534 * it may have no refCount, meaning we could race with
1537 if (peer && (peer->refCount > 0)) {
1538 MUTEX_ENTER(&peer->peer_lock);
1539 hadd32(peer->bytesReceived, p->length);
1540 MUTEX_EXIT(&peer->peer_lock);
1545 #ifdef RX_TRIMDATABUFS
1546 /* Free any empty packet buffers at the end of this packet */
1547 rxi_TrimDataBufs(p, 1);
1553 #endif /* !KERNEL || UKERNEL */
1555 /* This function splits off the first packet in a jumbo packet.
1556 * As of AFS 3.5, jumbograms contain more than one fixed size
1557 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1558 * last packet header. All packets (except the last) are padded to
1559 * fall on RX_CBUFFERSIZE boundaries.
1560 * HACK: We store the length of the first n-1 packets in the
1561 * last two pad bytes. */
1564 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1567 struct rx_packet *np;
1568 struct rx_jumboHeader *jp;
1574 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1575 * bytes in length. All but the first packet are preceded by
1576 * an abbreviated four byte header. The length of the last packet
1577 * is calculated from the size of the jumbogram. */
1578 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1580 if ((int)p->length < length) {
1581 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1584 niov = p->niovecs - 2;
1586 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1589 iov = &p->wirevec[2];
1590 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1592 /* Get a pointer to the abbreviated packet header */
1593 jp = (struct rx_jumboHeader *)
1594 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1596 /* Set up the iovecs for the next packet */
1597 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1598 np->wirevec[0].iov_len = sizeof(struct rx_header);
1599 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1600 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1601 np->niovecs = niov + 1;
1602 for (i = 2, iov++; i <= niov; i++, iov++) {
1603 np->wirevec[i] = *iov;
1605 np->length = p->length - length;
1606 p->length = RX_JUMBOBUFFERSIZE;
1609 /* Convert the jumbo packet header to host byte order */
1610 temp = ntohl(*(afs_uint32 *) jp);
1611 jp->flags = (u_char) (temp >> 24);
1612 jp->cksum = (u_short) (temp);
1614 /* Fill in the packet header */
1615 np->header = p->header;
1616 np->header.serial = p->header.serial + 1;
1617 np->header.seq = p->header.seq + 1;
1618 np->header.flags = jp->flags;
1619 np->header.spare = jp->cksum;
1625 /* Send a udp datagram */
1627 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1628 int length, int istack)
1633 memset(&msg, 0, sizeof(msg));
1635 msg.msg_iovlen = nvecs;
1636 msg.msg_name = addr;
1637 msg.msg_namelen = sizeof(struct sockaddr_in);
1639 ret = rxi_Sendmsg(socket, &msg, 0);
1643 #elif !defined(UKERNEL)
1645 * message receipt is done in rxk_input or rx_put.
1648 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1650 * Copy an mblock to the contiguous area pointed to by cp.
1651 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1652 * but it doesn't really.
1653 * Returns the number of bytes not transferred.
1654 * The message is NOT changed.
1657 cpytoc(mblk_t * mp, int off, int len, char *cp)
1661 for (; mp && len > 0; mp = mp->b_cont) {
1662 if (mp->b_datap->db_type != M_DATA) {
1665 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1666 memcpy(cp, (char *)mp->b_rptr, n);
1674 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1675 * but it doesn't really.
1676 * This sucks, anyway, do it like m_cpy.... below
1679 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1684 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1685 if (mp->b_datap->db_type != M_DATA) {
1688 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1694 t = iovs[i].iov_len;
1697 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1707 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1708 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1710 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1712 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1715 unsigned int l1, l2, i, t;
1717 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1718 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1721 if (m->m_len <= off) {
1731 p1 = mtod(m, caddr_t) + off;
1732 l1 = m->m_len - off;
1734 p2 = iovs[0].iov_base;
1735 l2 = iovs[0].iov_len;
1738 t = MIN(l1, MIN(l2, (unsigned int)len));
1749 p1 = mtod(m, caddr_t);
1755 p2 = iovs[i].iov_base;
1756 l2 = iovs[i].iov_len;
1764 #endif /* AFS_SUN5_ENV */
1766 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1768 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1769 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1775 struct rx_packet *phandle;
1776 int hdr_len, data_len;
1781 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1788 #endif /*KERNEL && !UKERNEL */
1791 /* send a response to a debug packet */
1794 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1795 afs_uint32 ahost, short aport, int istack)
1797 struct rx_debugIn tin;
1799 struct rx_serverQueueEntry *np, *nqe;
1802 * Only respond to client-initiated Rx debug packets,
1803 * and clear the client flag in the response.
1805 if (ap->header.flags & RX_CLIENT_INITIATED) {
1806 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1807 rxi_EncodePacketHeader(ap);
1812 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1813 /* all done with packet, now set length to the truth, so we can
1814 * reuse this packet */
1815 rx_computelen(ap, ap->length);
1817 tin.type = ntohl(tin.type);
1818 tin.index = ntohl(tin.index);
1820 case RX_DEBUGI_GETSTATS:{
1821 struct rx_debugStats tstat;
1823 /* get basic stats */
1824 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1825 tstat.version = RX_DEBUGI_VERSION;
1826 #ifndef RX_ENABLE_LOCKS
1827 tstat.waitingForPackets = rx_waitingForPackets;
1829 MUTEX_ENTER(&rx_serverPool_lock);
1830 tstat.nFreePackets = htonl(rx_nFreePackets);
1831 tstat.nPackets = htonl(rx_nPackets);
1832 tstat.callsExecuted = htonl(rxi_nCalls);
1833 tstat.packetReclaims = htonl(rx_packetReclaims);
1834 tstat.usedFDs = CountFDs(64);
1835 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1836 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1837 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1839 MUTEX_EXIT(&rx_serverPool_lock);
1840 tstat.idleThreads = htonl(tstat.idleThreads);
1841 tl = sizeof(struct rx_debugStats) - ap->length;
1843 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1846 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1848 ap->length = sizeof(struct rx_debugStats);
1849 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1850 rx_computelen(ap, ap->length);
1855 case RX_DEBUGI_GETALLCONN:
1856 case RX_DEBUGI_GETCONN:{
1858 struct rx_connection *tc;
1859 struct rx_call *tcall;
1860 struct rx_debugConn tconn;
1861 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1864 tl = sizeof(struct rx_debugConn) - ap->length;
1866 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1870 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1871 /* get N'th (maybe) "interesting" connection info */
1872 for (i = 0; i < rx_hashTableSize; i++) {
1873 #if !defined(KERNEL)
1874 /* the time complexity of the algorithm used here
1875 * exponentially increses with the number of connections.
1877 #ifdef AFS_PTHREAD_ENV
1883 MUTEX_ENTER(&rx_connHashTable_lock);
1884 /* We might be slightly out of step since we are not
1885 * locking each call, but this is only debugging output.
1887 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1888 if ((all || rxi_IsConnInteresting(tc))
1889 && tin.index-- <= 0) {
1890 tconn.host = tc->peer->host;
1891 tconn.port = tc->peer->port;
1892 tconn.cid = htonl(tc->cid);
1893 tconn.epoch = htonl(tc->epoch);
1894 tconn.serial = htonl(tc->serial);
1895 for (j = 0; j < RX_MAXCALLS; j++) {
1896 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1897 if ((tcall = tc->call[j])) {
1898 tconn.callState[j] = tcall->state;
1899 tconn.callMode[j] = tcall->mode;
1900 tconn.callFlags[j] = tcall->flags;
1901 if (queue_IsNotEmpty(&tcall->rq))
1902 tconn.callOther[j] |= RX_OTHER_IN;
1903 if (queue_IsNotEmpty(&tcall->tq))
1904 tconn.callOther[j] |= RX_OTHER_OUT;
1906 tconn.callState[j] = RX_STATE_NOTINIT;
1909 tconn.natMTU = htonl(tc->peer->natMTU);
1910 tconn.error = htonl(tc->error);
1911 tconn.flags = tc->flags;
1912 tconn.type = tc->type;
1913 tconn.securityIndex = tc->securityIndex;
1914 if (tc->securityObject) {
1915 RXS_GetStats(tc->securityObject, tc,
1917 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1918 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1921 DOHTONL(packetsReceived);
1922 DOHTONL(packetsSent);
1923 DOHTONL(bytesReceived);
1927 sizeof(tconn.secStats.spares) /
1932 sizeof(tconn.secStats.sparel) /
1933 sizeof(afs_int32); i++)
1937 MUTEX_EXIT(&rx_connHashTable_lock);
1938 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1941 ap->length = sizeof(struct rx_debugConn);
1942 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1948 MUTEX_EXIT(&rx_connHashTable_lock);
1950 /* if we make it here, there are no interesting packets */
1951 tconn.cid = htonl(0xffffffff); /* means end */
1952 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1955 ap->length = sizeof(struct rx_debugConn);
1956 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1962 * Pass back all the peer structures we have available
1965 case RX_DEBUGI_GETPEER:{
1968 struct rx_debugPeer tpeer;
1971 tl = sizeof(struct rx_debugPeer) - ap->length;
1973 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1977 memset(&tpeer, 0, sizeof(tpeer));
1978 for (i = 0; i < rx_hashTableSize; i++) {
1979 #if !defined(KERNEL)
1980 /* the time complexity of the algorithm used here
1981 * exponentially increses with the number of peers.
1983 * Yielding after processing each hash table entry
1984 * and dropping rx_peerHashTable_lock.
1985 * also increases the risk that we will miss a new
1986 * entry - but we are willing to live with this
1987 * limitation since this is meant for debugging only
1989 #ifdef AFS_PTHREAD_ENV
1995 MUTEX_ENTER(&rx_peerHashTable_lock);
1996 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1997 if (tin.index-- <= 0) {
1999 MUTEX_EXIT(&rx_peerHashTable_lock);
2001 MUTEX_ENTER(&tp->peer_lock);
2002 tpeer.host = tp->host;
2003 tpeer.port = tp->port;
2004 tpeer.ifMTU = htons(tp->ifMTU);
2005 tpeer.idleWhen = htonl(tp->idleWhen);
2006 tpeer.refCount = htons(tp->refCount);
2007 tpeer.burstSize = tp->burstSize;
2008 tpeer.burst = tp->burst;
2009 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
2010 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
2011 tpeer.rtt = htonl(tp->rtt);
2012 tpeer.rtt_dev = htonl(tp->rtt_dev);
2013 tpeer.timeout.sec = htonl(tp->timeout.sec);
2014 tpeer.timeout.usec = htonl(tp->timeout.usec);
2015 tpeer.nSent = htonl(tp->nSent);
2016 tpeer.reSends = htonl(tp->reSends);
2017 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
2018 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
2019 tpeer.rateFlag = htonl(tp->rateFlag);
2020 tpeer.natMTU = htons(tp->natMTU);
2021 tpeer.maxMTU = htons(tp->maxMTU);
2022 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2023 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2024 tpeer.MTU = htons(tp->MTU);
2025 tpeer.cwind = htons(tp->cwind);
2026 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2027 tpeer.congestSeq = htons(tp->congestSeq);
2028 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2029 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2030 tpeer.bytesReceived.high =
2031 htonl(tp->bytesReceived.high);
2032 tpeer.bytesReceived.low =
2033 htonl(tp->bytesReceived.low);
2034 MUTEX_EXIT(&tp->peer_lock);
2036 MUTEX_ENTER(&rx_peerHashTable_lock);
2038 MUTEX_EXIT(&rx_peerHashTable_lock);
2040 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2043 ap->length = sizeof(struct rx_debugPeer);
2044 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2050 MUTEX_EXIT(&rx_peerHashTable_lock);
2052 /* if we make it here, there are no interesting packets */
2053 tpeer.host = htonl(0xffffffff); /* means end */
2054 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2057 ap->length = sizeof(struct rx_debugPeer);
2058 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2063 case RX_DEBUGI_RXSTATS:{
2067 tl = sizeof(rx_stats) - ap->length;
2069 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2073 /* Since its all int32s convert to network order with a loop. */
2074 if (rx_stats_active)
2075 MUTEX_ENTER(&rx_stats_mutex);
2076 s = (afs_int32 *) & rx_stats;
2077 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2078 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2081 ap->length = sizeof(rx_stats);
2082 if (rx_stats_active)
2083 MUTEX_EXIT(&rx_stats_mutex);
2084 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2090 /* error response packet */
2091 tin.type = htonl(RX_DEBUGI_BADTYPE);
2092 tin.index = tin.type;
2093 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2095 ap->length = sizeof(struct rx_debugIn);
2096 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2104 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2105 afs_uint32 ahost, short aport, int istack)
2110 * Only respond to client-initiated version requests, and
2111 * clear that flag in the response.
2113 if (ap->header.flags & RX_CLIENT_INITIATED) {
2116 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2117 rxi_EncodePacketHeader(ap);
2118 memset(buf, 0, sizeof(buf));
2119 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2120 rx_packetwrite(ap, 0, 65, buf);
2123 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2131 /* send a debug packet back to the sender */
2133 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2134 afs_uint32 ahost, short aport, afs_int32 istack)
2136 struct sockaddr_in taddr;
2137 unsigned int i, nbytes, savelen = 0;
2140 int waslocked = ISAFS_GLOCK();
2143 taddr.sin_family = AF_INET;
2144 taddr.sin_port = aport;
2145 taddr.sin_addr.s_addr = ahost;
2146 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2147 taddr.sin_len = sizeof(struct sockaddr_in);
2150 /* We need to trim the niovecs. */
2151 nbytes = apacket->length;
2152 for (i = 1; i < apacket->niovecs; i++) {
2153 if (nbytes <= apacket->wirevec[i].iov_len) {
2154 savelen = apacket->wirevec[i].iov_len;
2155 saven = apacket->niovecs;
2156 apacket->wirevec[i].iov_len = nbytes;
2157 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2159 nbytes -= apacket->wirevec[i].iov_len;
2162 #ifdef RX_KERNEL_TRACE
2163 if (ICL_SETACTIVE(afs_iclSetp)) {
2166 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2167 "before osi_NetSend()");
2175 /* debug packets are not reliably delivered, hence the cast below. */
2176 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2177 apacket->length + RX_HEADER_SIZE, istack);
2179 #ifdef RX_KERNEL_TRACE
2180 if (ICL_SETACTIVE(afs_iclSetp)) {
2182 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2183 "after osi_NetSend()");
2192 if (saven) { /* means we truncated the packet above. */
2193 apacket->wirevec[i - 1].iov_len = savelen;
2194 apacket->niovecs = saven;
2199 /* Send the packet to appropriate destination for the specified
2200 * call. The header is first encoded and placed in the packet.
2203 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2204 struct rx_packet *p, int istack)
2210 struct sockaddr_in addr;
2211 struct rx_peer *peer = conn->peer;
2214 char deliveryType = 'S';
2216 /* The address we're sending the packet to */
2217 memset(&addr, 0, sizeof(addr));
2218 addr.sin_family = AF_INET;
2219 addr.sin_port = peer->port;
2220 addr.sin_addr.s_addr = peer->host;
2222 /* This stuff should be revamped, I think, so that most, if not
2223 * all, of the header stuff is always added here. We could
2224 * probably do away with the encode/decode routines. XXXXX */
2226 /* Stamp each packet with a unique serial number. The serial
2227 * number is maintained on a connection basis because some types
2228 * of security may be based on the serial number of the packet,
2229 * and security is handled on a per authenticated-connection
2231 /* Pre-increment, to guarantee no zero serial number; a zero
2232 * serial number means the packet was never sent. */
2233 MUTEX_ENTER(&conn->conn_data_lock);
2234 p->header.serial = ++conn->serial;
2235 if (p->length > conn->peer->maxPacketSize) {
2236 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2237 (p->header.flags & RX_REQUEST_ACK)) {
2238 conn->lastPingSize = p->length;
2239 conn->lastPingSizeSer = p->header.serial;
2240 } else if (p->header.seq != 0) {
2241 conn->lastPacketSize = p->length;
2242 conn->lastPacketSizeSeq = p->header.seq;
2245 MUTEX_EXIT(&conn->conn_data_lock);
2246 /* This is so we can adjust retransmit time-outs better in the face of
2247 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2249 if (p->firstSerial == 0) {
2250 p->firstSerial = p->header.serial;
2253 /* If an output tracer function is defined, call it with the packet and
2254 * network address. Note this function may modify its arguments. */
2255 if (rx_almostSent) {
2256 int drop = (*rx_almostSent) (p, &addr);
2257 /* drop packet if return value is non-zero? */
2259 deliveryType = 'D'; /* Drop the packet */
2263 /* Get network byte order header */
2264 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2265 * touch ALL the fields */
2267 /* Send the packet out on the same socket that related packets are being
2271 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2274 /* Possibly drop this packet, for testing purposes */
2275 if ((deliveryType == 'D')
2276 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2277 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2278 deliveryType = 'D'; /* Drop the packet */
2280 deliveryType = 'S'; /* Send the packet */
2281 #endif /* RXDEBUG */
2283 /* Loop until the packet is sent. We'd prefer just to use a
2284 * blocking socket, but unfortunately the interface doesn't
2285 * allow us to have the socket block in send mode, and not
2286 * block in receive mode */
2288 waslocked = ISAFS_GLOCK();
2289 #ifdef RX_KERNEL_TRACE
2290 if (ICL_SETACTIVE(afs_iclSetp)) {
2293 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2294 "before osi_NetSend()");
2303 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2304 p->length + RX_HEADER_SIZE, istack)) != 0) {
2305 /* send failed, so let's hurry up the resend, eh? */
2306 if (rx_stats_active)
2307 rx_atomic_inc(&rx_stats.netSendFailures);
2308 p->retryTime = p->timeSent; /* resend it very soon */
2309 clock_Addmsec(&(p->retryTime),
2310 10 + (((afs_uint32) p->backoff) << 8));
2311 /* Some systems are nice and tell us right away that we cannot
2312 * reach this recipient by returning an error code.
2313 * So, when this happens let's "down" the host NOW so
2314 * we don't sit around waiting for this host to timeout later.
2318 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2319 #elif defined(AFS_LINUX20_ENV)
2320 code == -ENETUNREACH
2321 #elif defined(AFS_DARWIN_ENV)
2322 code == EHOSTUNREACH
2327 call->lastReceiveTime = 0;
2330 #ifdef RX_KERNEL_TRACE
2331 if (ICL_SETACTIVE(afs_iclSetp)) {
2333 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2334 "after osi_NetSend()");
2345 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2346 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2347 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2348 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2350 if (rx_stats_active) {
2351 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2352 MUTEX_ENTER(&peer->peer_lock);
2353 hadd32(peer->bytesSent, p->length);
2354 MUTEX_EXIT(&peer->peer_lock);
2358 /* Send a list of packets to appropriate destination for the specified
2359 * connection. The headers are first encoded and placed in the packets.
2362 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2363 struct rx_packet **list, int len, int istack)
2365 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2368 struct sockaddr_in addr;
2369 struct rx_peer *peer = conn->peer;
2371 struct rx_packet *p = NULL;
2372 struct iovec wirevec[RX_MAXIOVECS];
2373 int i, length, code;
2376 struct rx_jumboHeader *jp;
2378 char deliveryType = 'S';
2380 /* The address we're sending the packet to */
2381 addr.sin_family = AF_INET;
2382 addr.sin_port = peer->port;
2383 addr.sin_addr.s_addr = peer->host;
2385 if (len + 1 > RX_MAXIOVECS) {
2386 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2390 * Stamp the packets in this jumbogram with consecutive serial numbers
2392 MUTEX_ENTER(&conn->conn_data_lock);
2393 serial = conn->serial;
2394 conn->serial += len;
2395 for (i = 0; i < len; i++) {
2397 if (p->length > conn->peer->maxPacketSize) {
2398 /* a ping *or* a sequenced packet can count */
2399 if ((p->length > conn->peer->maxPacketSize)) {
2400 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2401 (p->header.flags & RX_REQUEST_ACK)) &&
2402 ((i == 0) || (p->length >= conn->lastPingSize))) {
2403 conn->lastPingSize = p->length;
2404 conn->lastPingSizeSer = serial + i;
2405 } else if ((p->header.seq != 0) &&
2406 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2407 conn->lastPacketSize = p->length;
2408 conn->lastPacketSizeSeq = p->header.seq;
2413 MUTEX_EXIT(&conn->conn_data_lock);
2416 /* This stuff should be revamped, I think, so that most, if not
2417 * all, of the header stuff is always added here. We could
2418 * probably do away with the encode/decode routines. XXXXX */
2421 length = RX_HEADER_SIZE;
2422 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2423 wirevec[0].iov_len = RX_HEADER_SIZE;
2424 for (i = 0; i < len; i++) {
2427 /* The whole 3.5 jumbogram scheme relies on packets fitting
2428 * in a single packet buffer. */
2429 if (p->niovecs > 2) {
2430 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2433 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2436 if (p->length != RX_JUMBOBUFFERSIZE) {
2437 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2439 p->header.flags |= RX_JUMBO_PACKET;
2440 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2441 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2443 wirevec[i + 1].iov_len = p->length;
2444 length += p->length;
2446 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2448 /* Convert jumbo packet header to network byte order */
2449 temp = (afs_uint32) (p->header.flags) << 24;
2450 temp |= (afs_uint32) (p->header.spare);
2451 *(afs_uint32 *) jp = htonl(temp);
2453 jp = (struct rx_jumboHeader *)
2454 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2456 /* Stamp each packet with a unique serial number. The serial
2457 * number is maintained on a connection basis because some types
2458 * of security may be based on the serial number of the packet,
2459 * and security is handled on a per authenticated-connection
2461 /* Pre-increment, to guarantee no zero serial number; a zero
2462 * serial number means the packet was never sent. */
2463 p->header.serial = ++serial;
2464 /* This is so we can adjust retransmit time-outs better in the face of
2465 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2467 if (p->firstSerial == 0) {
2468 p->firstSerial = p->header.serial;
2471 /* If an output tracer function is defined, call it with the packet and
2472 * network address. Note this function may modify its arguments. */
2473 if (rx_almostSent) {
2474 int drop = (*rx_almostSent) (p, &addr);
2475 /* drop packet if return value is non-zero? */
2477 deliveryType = 'D'; /* Drop the packet */
2481 /* Get network byte order header */
2482 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2483 * touch ALL the fields */
2486 /* Send the packet out on the same socket that related packets are being
2490 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2493 /* Possibly drop this packet, for testing purposes */
2494 if ((deliveryType == 'D')
2495 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2496 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2497 deliveryType = 'D'; /* Drop the packet */
2499 deliveryType = 'S'; /* Send the packet */
2500 #endif /* RXDEBUG */
2502 /* Loop until the packet is sent. We'd prefer just to use a
2503 * blocking socket, but unfortunately the interface doesn't
2504 * allow us to have the socket block in send mode, and not
2505 * block in receive mode */
2506 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2507 waslocked = ISAFS_GLOCK();
2508 if (!istack && waslocked)
2512 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2514 /* send failed, so let's hurry up the resend, eh? */
2515 if (rx_stats_active)
2516 rx_atomic_inc(&rx_stats.netSendFailures);
2517 for (i = 0; i < len; i++) {
2519 p->retryTime = p->timeSent; /* resend it very soon */
2520 clock_Addmsec(&(p->retryTime),
2521 10 + (((afs_uint32) p->backoff) << 8));
2523 /* Some systems are nice and tell us right away that we cannot
2524 * reach this recipient by returning an error code.
2525 * So, when this happens let's "down" the host NOW so
2526 * we don't sit around waiting for this host to timeout later.
2530 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2531 #elif defined(AFS_LINUX20_ENV)
2532 code == -ENETUNREACH
2533 #elif defined(AFS_DARWIN_ENV)
2534 code == EHOSTUNREACH
2539 call->lastReceiveTime = 0;
2541 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2542 if (!istack && waslocked)
2550 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2551 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2552 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2553 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2556 if (rx_stats_active) {
2557 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2558 MUTEX_ENTER(&peer->peer_lock);
2559 hadd32(peer->bytesSent, p->length);
2560 MUTEX_EXIT(&peer->peer_lock);
2565 /* Send a "special" packet to the peer connection. If call is
2566 * specified, then the packet is directed to a specific call channel
2567 * associated with the connection, otherwise it is directed to the
2568 * connection only. Uses optionalPacket if it is supplied, rather than
2569 * allocating a new packet buffer. Nbytes is the length of the data
2570 * portion of the packet. If data is non-null, nbytes of data are
2571 * copied into the packet. Type is the type of the packet, as defined
2572 * in rx.h. Bug: there's a lot of duplication between this and other
2573 * routines. This needs to be cleaned up. */
2575 rxi_SendSpecial(struct rx_call *call,
2576 struct rx_connection *conn,
2577 struct rx_packet *optionalPacket, int type, char *data,
2578 int nbytes, int istack)
2580 /* Some of the following stuff should be common code for all
2581 * packet sends (it's repeated elsewhere) */
2582 struct rx_packet *p;
2584 int savelen = 0, saven = 0;
2585 int channel, callNumber;
2587 channel = call->channel;
2588 callNumber = *call->callNumber;
2589 /* BUSY packets refer to the next call on this connection */
2590 if (type == RX_PACKET_TYPE_BUSY) {
2599 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2601 osi_Panic("rxi_SendSpecial failure");
2608 p->header.serviceId = conn->serviceId;
2609 p->header.securityIndex = conn->securityIndex;
2610 p->header.cid = (conn->cid | channel);
2611 p->header.callNumber = callNumber;
2613 p->header.epoch = conn->epoch;
2614 p->header.type = type;
2615 p->header.flags = 0;
2616 if (conn->type == RX_CLIENT_CONNECTION)
2617 p->header.flags |= RX_CLIENT_INITIATED;
2619 rx_packetwrite(p, 0, nbytes, data);
2621 for (i = 1; i < p->niovecs; i++) {
2622 if (nbytes <= p->wirevec[i].iov_len) {
2623 savelen = p->wirevec[i].iov_len;
2625 p->wirevec[i].iov_len = nbytes;
2626 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2628 nbytes -= p->wirevec[i].iov_len;
2632 rxi_Send(call, p, istack);
2634 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2635 if (saven) { /* means we truncated the packet above. We probably don't */
2636 /* really need to do this, but it seems safer this way, given that */
2637 /* sneaky optionalPacket... */
2638 p->wirevec[i - 1].iov_len = savelen;
2641 if (!optionalPacket)
2643 return optionalPacket;
2647 /* Encode the packet's header (from the struct header in the packet to
2648 * the net byte order representation in the wire representation of the
2649 * packet, which is what is actually sent out on the wire) */
2651 rxi_EncodePacketHeader(struct rx_packet *p)
2653 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2655 memset(buf, 0, RX_HEADER_SIZE);
2656 *buf++ = htonl(p->header.epoch);
2657 *buf++ = htonl(p->header.cid);
2658 *buf++ = htonl(p->header.callNumber);
2659 *buf++ = htonl(p->header.seq);
2660 *buf++ = htonl(p->header.serial);
2661 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2662 | (((afs_uint32) p->header.flags) << 16)
2663 | (p->header.userStatus << 8) | p->header.securityIndex);
2664 /* Note: top 16 bits of this next word were reserved */
2665 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2668 /* Decode the packet's header (from net byte order to a struct header) */
2670 rxi_DecodePacketHeader(struct rx_packet *p)
2672 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2675 p->header.epoch = ntohl(*buf);
2677 p->header.cid = ntohl(*buf);
2679 p->header.callNumber = ntohl(*buf);
2681 p->header.seq = ntohl(*buf);
2683 p->header.serial = ntohl(*buf);
2689 /* C will truncate byte fields to bytes for me */
2690 p->header.type = temp >> 24;
2691 p->header.flags = temp >> 16;
2692 p->header.userStatus = temp >> 8;
2693 p->header.securityIndex = temp >> 0;
2698 p->header.serviceId = (temp & 0xffff);
2699 p->header.spare = temp >> 16;
2700 /* Note: top 16 bits of this last word are the security checksum */
2704 * LOCKS HELD: called with call->lock held.
2706 * PrepareSendPacket is the only place in the code that
2707 * can increment call->tnext. This could become an atomic
2708 * in the future. Beyond that there is nothing in this
2709 * function that requires the call being locked. This
2710 * function can only be called by the application thread.
2713 rxi_PrepareSendPacket(struct rx_call *call,
2714 struct rx_packet *p, int last)
2716 struct rx_connection *conn = call->conn;
2717 afs_uint32 seq = call->tnext++;
2719 afs_int32 len; /* len must be a signed type; it can go negative */
2721 /* No data packets on call 0. Where do these come from? */
2722 if (*call->callNumber == 0)
2723 *call->callNumber = 1;
2725 MUTEX_EXIT(&call->lock);
2726 p->flags &= ~RX_PKTFLAG_ACKED;
2727 p->header.cid = (conn->cid | call->channel);
2728 p->header.serviceId = conn->serviceId;
2729 p->header.securityIndex = conn->securityIndex;
2731 p->header.callNumber = *call->callNumber;
2732 p->header.seq = seq;
2733 p->header.epoch = conn->epoch;
2734 p->header.type = RX_PACKET_TYPE_DATA;
2735 p->header.flags = 0;
2736 p->header.spare = 0;
2737 if (conn->type == RX_CLIENT_CONNECTION)
2738 p->header.flags |= RX_CLIENT_INITIATED;
2741 p->header.flags |= RX_LAST_PACKET;
2743 clock_Zero(&p->retryTime); /* Never yet transmitted */
2744 clock_Zero(&p->firstSent); /* Never yet transmitted */
2745 p->header.serial = 0; /* Another way of saying never transmitted... */
2748 /* Now that we're sure this is the last data on the call, make sure
2749 * that the "length" and the sum of the iov_lens matches. */
2750 len = p->length + call->conn->securityHeaderSize;
2752 for (i = 1; i < p->niovecs && len > 0; i++) {
2753 len -= p->wirevec[i].iov_len;
2756 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2757 } else if (i < p->niovecs) {
2758 /* Free any extra elements in the wirevec */
2759 #if defined(RX_ENABLE_TSFPQ)
2760 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2761 #else /* !RX_ENABLE_TSFPQ */
2762 MUTEX_ENTER(&rx_freePktQ_lock);
2763 rxi_FreeDataBufsNoLock(p, i);
2764 MUTEX_EXIT(&rx_freePktQ_lock);
2765 #endif /* !RX_ENABLE_TSFPQ */
2770 p->wirevec[i - 1].iov_len += len;
2771 RXS_PreparePacket(conn->securityObject, call, p);
2772 MUTEX_ENTER(&call->lock);
2775 /* Given an interface MTU size, calculate an adjusted MTU size that
2776 * will make efficient use of the RX buffers when the peer is sending
2777 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2779 rxi_AdjustIfMTU(int mtu)
2784 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2786 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2787 if (mtu <= adjMTU) {
2794 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2795 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2798 /* Given an interface MTU size, and the peer's advertised max receive
2799 * size, calculate an adjisted maxMTU size that makes efficient use
2800 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2802 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2804 int maxMTU = mtu * rxi_nSendFrags;
2805 maxMTU = MIN(maxMTU, peerMaxMTU);
2806 return rxi_AdjustIfMTU(maxMTU);
2809 /* Given a packet size, figure out how many datagram packet will fit.
2810 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2811 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2812 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2814 rxi_AdjustDgramPackets(int frags, int mtu)
2817 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2820 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2821 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2822 /* subtract the size of the first and last packets */
2823 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2827 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2832 * This function can be used by the Windows Cache Manager
2833 * to dump the list of all rx packets so that we can determine
2834 * where the packet leakage is.
2836 int rx_DumpPackets(FILE *outputFile, char *cookie)
2838 #ifdef RXDEBUG_PACKET
2839 struct rx_packet *p;
2843 #define RXDPRINTF sprintf
2844 #define RXDPRINTOUT output
2846 #define RXDPRINTF fprintf
2847 #define RXDPRINTOUT outputFile
2851 MUTEX_ENTER(&rx_freePktQ_lock);
2852 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2854 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2857 for (p = rx_mallocedP; p; p = p->allNextp) {
2858 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2859 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2860 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2861 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2862 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2863 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2865 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2869 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2871 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2874 MUTEX_EXIT(&rx_freePktQ_lock);
2876 #endif /* RXDEBUG_PACKET */