2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
46 # if defined(AFS_NT40_ENV)
48 # define EWOULDBLOCK WSAEWOULDBLOCK
51 # include "rx_xmit_nt.h"
57 # include <sys/sysmacros.h>
63 #include "rx_packet.h"
64 #include "rx_atomic.h"
65 #include "rx_globals.h"
66 #include "rx_internal.h"
70 /* rxdb_fileID is used to identify the lock location, along with line#. */
71 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
72 #endif /* RX_LOCKS_DB */
73 static struct rx_packet *rx_mallocedP = 0;
75 static afs_uint32 rx_packet_id = 0;
78 extern char cml_version_number[];
80 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
82 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
83 afs_uint32 ahost, short aport,
86 #ifdef RX_ENABLE_TSFPQ
88 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
90 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
95 /* some rules about packets:
96 * 1. When a packet is allocated, the final iov_buf contains room for
97 * a security trailer, but iov_len masks that fact. If the security
98 * package wants to add the trailer, it may do so, and then extend
99 * iov_len appropriately. For this reason, packet's niovecs and
100 * iov_len fields should be accurate before calling PreparePacket.
104 * all packet buffers (iov_base) are integral multiples of
106 * offset is an integral multiple of the word size.
109 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
113 for (l = 0, i = 1; i < packet->niovecs; i++) {
114 if (l + packet->wirevec[i].iov_len > offset) {
116 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
119 l += packet->wirevec[i].iov_len;
126 * all packet buffers (iov_base) are integral multiples of the word size.
127 * offset is an integral multiple of the word size.
130 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
134 for (l = 0, i = 1; i < packet->niovecs; i++) {
135 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
137 (offset - l))) = data;
140 l += packet->wirevec[i].iov_len;
147 * all packet buffers (iov_base) are integral multiples of the
149 * offset is an integral multiple of the word size.
151 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
154 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
157 unsigned int i, j, l, r;
158 for (l = 0, i = 1; i < packet->niovecs; i++) {
159 if (l + packet->wirevec[i].iov_len > offset) {
162 l += packet->wirevec[i].iov_len;
165 /* i is the iovec which contains the first little bit of data in which we
166 * are interested. l is the total length of everything prior to this iovec.
167 * j is the number of bytes we can safely copy out of this iovec.
168 * offset only applies to the first iovec.
171 while ((r > 0) && (i < packet->niovecs)) {
172 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
173 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
176 l += packet->wirevec[i].iov_len;
181 return (r ? (resid - r) : resid);
186 * all packet buffers (iov_base) are integral multiples of the
188 * offset is an integral multiple of the word size.
191 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
193 unsigned int i, j, l, o, r;
196 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
197 if (l + packet->wirevec[i].iov_len > o) {
200 l += packet->wirevec[i].iov_len;
203 /* i is the iovec which contains the first little bit of data in which we
204 * are interested. l is the total length of everything prior to this iovec.
205 * j is the number of bytes we can safely copy out of this iovec.
206 * offset only applies to the first iovec.
209 while ((r > 0) && (i <= RX_MAXWVECS)) {
210 if (i >= packet->niovecs)
211 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
214 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
215 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
219 l += packet->wirevec[i].iov_len;
224 return (r ? (resid - r) : resid);
228 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
230 struct rx_packet *p, *np;
232 num_pkts = AllocPacketBufs(class, num_pkts, q);
234 for (queue_Scan(q, p, np, rx_packet)) {
235 RX_PACKET_IOV_FULLINIT(p);
241 #ifdef RX_ENABLE_TSFPQ
243 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
245 struct rx_ts_info_t * rx_ts_info;
249 RX_TS_INFO_GET(rx_ts_info);
251 transfer = num_pkts - rx_ts_info->_FPQ.len;
254 MUTEX_ENTER(&rx_freePktQ_lock);
255 transfer = MAX(transfer, rx_TSFPQGlobSize);
256 if (transfer > rx_nFreePackets) {
257 /* alloc enough for us, plus a few globs for other threads */
258 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
261 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
263 MUTEX_EXIT(&rx_freePktQ_lock);
267 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
271 #else /* RX_ENABLE_TSFPQ */
273 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
284 MUTEX_ENTER(&rx_freePktQ_lock);
287 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
288 num_pkts--, overq++);
291 rxi_NeedMorePackets = TRUE;
292 if (rx_stats_active) {
294 case RX_PACKET_CLASS_RECEIVE:
295 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
297 case RX_PACKET_CLASS_SEND:
298 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
300 case RX_PACKET_CLASS_SPECIAL:
301 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
303 case RX_PACKET_CLASS_RECV_CBUF:
304 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
306 case RX_PACKET_CLASS_SEND_CBUF:
307 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
313 if (rx_nFreePackets < num_pkts)
314 num_pkts = rx_nFreePackets;
317 rxi_NeedMorePackets = TRUE;
321 if (rx_nFreePackets < num_pkts) {
322 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
326 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
328 i++, c=queue_Next(c, rx_packet)) {
332 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
334 rx_nFreePackets -= num_pkts;
339 MUTEX_EXIT(&rx_freePktQ_lock);
344 #endif /* RX_ENABLE_TSFPQ */
347 * Free a packet currently used as a continuation buffer
349 #ifdef RX_ENABLE_TSFPQ
350 /* num_pkts=0 means queue length is unknown */
352 rxi_FreePackets(int num_pkts, struct rx_queue * q)
354 struct rx_ts_info_t * rx_ts_info;
355 struct rx_packet *c, *nc;
358 osi_Assert(num_pkts >= 0);
359 RX_TS_INFO_GET(rx_ts_info);
362 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
363 rxi_FreeDataBufsTSFPQ(c, 2, 0);
366 for (queue_Scan(q, c, nc, rx_packet)) {
367 rxi_FreeDataBufsTSFPQ(c, 2, 0);
372 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
375 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
377 MUTEX_ENTER(&rx_freePktQ_lock);
379 RX_TS_FPQ_LTOG(rx_ts_info);
381 /* Wakeup anyone waiting for packets */
384 MUTEX_EXIT(&rx_freePktQ_lock);
390 #else /* RX_ENABLE_TSFPQ */
391 /* num_pkts=0 means queue length is unknown */
393 rxi_FreePackets(int num_pkts, struct rx_queue *q)
396 struct rx_packet *p, *np;
400 osi_Assert(num_pkts >= 0);
404 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
405 if (p->niovecs > 2) {
406 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
413 for (queue_Scan(q, p, np, rx_packet)) {
414 if (p->niovecs > 2) {
415 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
422 queue_SpliceAppend(q, &cbs);
428 MUTEX_ENTER(&rx_freePktQ_lock);
430 queue_SpliceAppend(&rx_freePacketQueue, q);
431 rx_nFreePackets += qlen;
433 /* Wakeup anyone waiting for packets */
436 MUTEX_EXIT(&rx_freePktQ_lock);
441 #endif /* RX_ENABLE_TSFPQ */
443 /* this one is kind of awful.
444 * In rxkad, the packet has been all shortened, and everything, ready for
445 * sending. All of a sudden, we discover we need some of that space back.
446 * This isn't terribly general, because it knows that the packets are only
447 * rounded up to the EBS (userdata + security header).
450 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
454 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
455 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
456 p->wirevec[i].iov_len += nb;
460 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
461 p->wirevec[i].iov_len += nb;
469 /* get sufficient space to store nb bytes of data (or more), and hook
470 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
471 * returns the number of bytes >0 which it failed to come up with.
472 * Don't need to worry about locking on packet, since only
473 * one thread can manipulate one at a time. Locking on continution
474 * packets is handled by AllocPacketBufs */
475 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
477 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
481 struct rx_packet *cb, *ncb;
483 /* compute the number of cbuf's we need */
484 nv = nb / RX_CBUFFERSIZE;
485 if ((nv * RX_CBUFFERSIZE) < nb)
487 if ((nv + p->niovecs) > RX_MAXWVECS)
488 nv = RX_MAXWVECS - p->niovecs;
492 /* allocate buffers */
494 nv = AllocPacketBufs(class, nv, &q);
496 /* setup packet iovs */
497 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
499 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
500 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
503 nb -= (nv * RX_CBUFFERSIZE);
504 p->length += (nv * RX_CBUFFERSIZE);
510 /* Add more packet buffers */
511 #ifdef RX_ENABLE_TSFPQ
513 rxi_MorePackets(int apackets)
515 struct rx_packet *p, *e;
516 struct rx_ts_info_t * rx_ts_info;
520 getme = apackets * sizeof(struct rx_packet);
521 p = (struct rx_packet *)osi_Alloc(getme);
524 PIN(p, getme); /* XXXXX */
526 RX_TS_INFO_GET(rx_ts_info);
528 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
529 /* TSFPQ patch also needs to keep track of total packets */
531 MUTEX_ENTER(&rx_packets_mutex);
532 rx_nPackets += apackets;
533 RX_TS_FPQ_COMPUTE_LIMITS;
534 MUTEX_EXIT(&rx_packets_mutex);
536 for (e = p + apackets; p < e; p++) {
537 RX_PACKET_IOV_INIT(p);
540 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
543 MUTEX_ENTER(&rx_freePktQ_lock);
544 #ifdef RXDEBUG_PACKET
545 p->packetId = rx_packet_id++;
546 p->allNextp = rx_mallocedP;
547 #endif /* RXDEBUG_PACKET */
549 MUTEX_EXIT(&rx_freePktQ_lock);
552 rx_ts_info->_FPQ.delta += apackets;
554 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
556 MUTEX_ENTER(&rx_freePktQ_lock);
558 RX_TS_FPQ_LTOG(rx_ts_info);
559 rxi_NeedMorePackets = FALSE;
562 MUTEX_EXIT(&rx_freePktQ_lock);
566 #else /* RX_ENABLE_TSFPQ */
568 rxi_MorePackets(int apackets)
570 struct rx_packet *p, *e;
574 getme = apackets * sizeof(struct rx_packet);
575 p = (struct rx_packet *)osi_Alloc(getme);
578 PIN(p, getme); /* XXXXX */
581 MUTEX_ENTER(&rx_freePktQ_lock);
583 for (e = p + apackets; p < e; p++) {
584 RX_PACKET_IOV_INIT(p);
585 #ifdef RX_TRACK_PACKETS
586 p->flags |= RX_PKTFLAG_FREE;
590 queue_Append(&rx_freePacketQueue, p);
591 #ifdef RXDEBUG_PACKET
592 p->packetId = rx_packet_id++;
593 p->allNextp = rx_mallocedP;
594 #endif /* RXDEBUG_PACKET */
598 rx_nPackets += apackets;
599 rx_nFreePackets += apackets;
600 rxi_NeedMorePackets = FALSE;
603 MUTEX_EXIT(&rx_freePktQ_lock);
606 #endif /* RX_ENABLE_TSFPQ */
608 #ifdef RX_ENABLE_TSFPQ
610 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
612 struct rx_packet *p, *e;
613 struct rx_ts_info_t * rx_ts_info;
617 getme = apackets * sizeof(struct rx_packet);
618 p = (struct rx_packet *)osi_Alloc(getme);
620 PIN(p, getme); /* XXXXX */
622 RX_TS_INFO_GET(rx_ts_info);
624 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
625 /* TSFPQ patch also needs to keep track of total packets */
626 MUTEX_ENTER(&rx_packets_mutex);
627 rx_nPackets += apackets;
628 RX_TS_FPQ_COMPUTE_LIMITS;
629 MUTEX_EXIT(&rx_packets_mutex);
631 for (e = p + apackets; p < e; p++) {
632 RX_PACKET_IOV_INIT(p);
634 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
637 MUTEX_ENTER(&rx_freePktQ_lock);
638 #ifdef RXDEBUG_PACKET
639 p->packetId = rx_packet_id++;
640 p->allNextp = rx_mallocedP;
641 #endif /* RXDEBUG_PACKET */
643 MUTEX_EXIT(&rx_freePktQ_lock);
646 rx_ts_info->_FPQ.delta += apackets;
649 (num_keep_local < apackets)) {
651 MUTEX_ENTER(&rx_freePktQ_lock);
653 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
654 rxi_NeedMorePackets = FALSE;
657 MUTEX_EXIT(&rx_freePktQ_lock);
661 #endif /* RX_ENABLE_TSFPQ */
664 /* Add more packet buffers */
666 rxi_MorePacketsNoLock(int apackets)
668 #ifdef RX_ENABLE_TSFPQ
669 struct rx_ts_info_t * rx_ts_info;
670 #endif /* RX_ENABLE_TSFPQ */
671 struct rx_packet *p, *e;
674 /* allocate enough packets that 1/4 of the packets will be able
675 * to hold maximal amounts of data */
676 apackets += (apackets / 4)
677 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
679 getme = apackets * sizeof(struct rx_packet);
680 p = (struct rx_packet *)osi_Alloc(getme);
682 apackets -= apackets / 4;
683 osi_Assert(apackets > 0);
688 #ifdef RX_ENABLE_TSFPQ
689 RX_TS_INFO_GET(rx_ts_info);
690 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
691 #endif /* RX_ENABLE_TSFPQ */
693 for (e = p + apackets; p < e; p++) {
694 RX_PACKET_IOV_INIT(p);
695 #ifdef RX_TRACK_PACKETS
696 p->flags |= RX_PKTFLAG_FREE;
700 queue_Append(&rx_freePacketQueue, p);
701 #ifdef RXDEBUG_PACKET
702 p->packetId = rx_packet_id++;
703 p->allNextp = rx_mallocedP;
704 #endif /* RXDEBUG_PACKET */
708 rx_nFreePackets += apackets;
709 MUTEX_ENTER(&rx_packets_mutex);
710 rx_nPackets += apackets;
711 #ifdef RX_ENABLE_TSFPQ
712 RX_TS_FPQ_COMPUTE_LIMITS;
713 #endif /* RX_ENABLE_TSFPQ */
714 MUTEX_EXIT(&rx_packets_mutex);
715 rxi_NeedMorePackets = FALSE;
721 rxi_FreeAllPackets(void)
723 /* must be called at proper interrupt level, etcetera */
724 /* MTUXXX need to free all Packets */
725 osi_Free(rx_mallocedP,
726 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
727 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
730 #ifdef RX_ENABLE_TSFPQ
732 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
734 struct rx_ts_info_t * rx_ts_info;
738 RX_TS_INFO_GET(rx_ts_info);
740 if (num_keep_local != rx_ts_info->_FPQ.len) {
742 MUTEX_ENTER(&rx_freePktQ_lock);
743 if (num_keep_local < rx_ts_info->_FPQ.len) {
744 xfer = rx_ts_info->_FPQ.len - num_keep_local;
745 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
748 xfer = num_keep_local - rx_ts_info->_FPQ.len;
749 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
750 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
751 if (rx_nFreePackets < xfer) {
752 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
754 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
756 MUTEX_EXIT(&rx_freePktQ_lock);
762 rxi_FlushLocalPacketsTSFPQ(void)
764 rxi_AdjustLocalPacketsTSFPQ(0, 0);
766 #endif /* RX_ENABLE_TSFPQ */
768 /* Allocate more packets iff we need more continuation buffers */
769 /* In kernel, can't page in memory with interrupts disabled, so we
770 * don't use the event mechanism. */
772 rx_CheckPackets(void)
774 if (rxi_NeedMorePackets) {
775 rxi_MorePackets(rx_maxSendWindow);
779 /* In the packet freeing routine below, the assumption is that
780 we want all of the packets to be used equally frequently, so that we
781 don't get packet buffers paging out. It would be just as valid to
782 assume that we DO want them to page out if not many are being used.
783 In any event, we assume the former, and append the packets to the end
785 /* This explanation is bogus. The free list doesn't remain in any kind of
786 useful order for afs_int32: the packets in use get pretty much randomly scattered
787 across all the pages. In order to permit unused {packets,bufs} to page out, they
788 must be stored so that packets which are adjacent in memory are adjacent in the
789 free list. An array springs rapidly to mind.
792 /* Actually free the packet p. */
793 #ifdef RX_ENABLE_TSFPQ
795 rxi_FreePacketNoLock(struct rx_packet *p)
797 struct rx_ts_info_t * rx_ts_info;
798 dpf(("Free %"AFS_PTR_FMT"\n", p));
800 RX_TS_INFO_GET(rx_ts_info);
801 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
802 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
803 RX_TS_FPQ_LTOG(rx_ts_info);
806 #else /* RX_ENABLE_TSFPQ */
808 rxi_FreePacketNoLock(struct rx_packet *p)
810 dpf(("Free %"AFS_PTR_FMT"\n", p));
814 queue_Append(&rx_freePacketQueue, p);
816 #endif /* RX_ENABLE_TSFPQ */
818 #ifdef RX_ENABLE_TSFPQ
820 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
822 struct rx_ts_info_t * rx_ts_info;
823 dpf(("Free %"AFS_PTR_FMT"\n", p));
825 RX_TS_INFO_GET(rx_ts_info);
826 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
828 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
830 MUTEX_ENTER(&rx_freePktQ_lock);
832 RX_TS_FPQ_LTOG(rx_ts_info);
834 /* Wakeup anyone waiting for packets */
837 MUTEX_EXIT(&rx_freePktQ_lock);
841 #endif /* RX_ENABLE_TSFPQ */
844 * free continuation buffers off a packet into a queue
846 * [IN] p -- packet from which continuation buffers will be freed
847 * [IN] first -- iovec offset of first continuation buffer to free
848 * [IN] q -- queue into which continuation buffers will be chained
851 * number of continuation buffers freed
853 #ifndef RX_ENABLE_TSFPQ
855 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
858 struct rx_packet * cb;
861 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
862 iov = &p->wirevec[first];
864 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
865 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
866 RX_FPQ_MARK_FREE(cb);
877 * free packet continuation buffers into the global free packet pool
879 * [IN] p -- packet from which to free continuation buffers
880 * [IN] first -- iovec offset of first continuation buffer to free
886 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
890 for (first = MAX(2, first); first < p->niovecs; first++) {
891 iov = &p->wirevec[first];
893 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
894 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
902 #ifdef RX_ENABLE_TSFPQ
904 * free packet continuation buffers into the thread-local free pool
906 * [IN] p -- packet from which continuation buffers will be freed
907 * [IN] first -- iovec offset of first continuation buffer to free
908 * any value less than 2, the min number of iovecs,
909 * is treated as if it is 2.
910 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
911 * global free pool before returning
917 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
920 struct rx_ts_info_t * rx_ts_info;
922 RX_TS_INFO_GET(rx_ts_info);
924 for (first = MAX(2, first); first < p->niovecs; first++) {
925 iov = &p->wirevec[first];
927 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
928 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
933 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
935 MUTEX_ENTER(&rx_freePktQ_lock);
937 RX_TS_FPQ_LTOG(rx_ts_info);
939 /* Wakeup anyone waiting for packets */
942 MUTEX_EXIT(&rx_freePktQ_lock);
947 #endif /* RX_ENABLE_TSFPQ */
949 int rxi_nBadIovecs = 0;
951 /* rxi_RestoreDataBufs
953 * Restore the correct sizes to the iovecs. Called when reusing a packet
954 * for reading off the wire.
957 rxi_RestoreDataBufs(struct rx_packet *p)
960 struct iovec *iov = &p->wirevec[2];
962 RX_PACKET_IOV_INIT(p);
964 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
965 if (!iov->iov_base) {
970 iov->iov_len = RX_CBUFFERSIZE;
974 #ifdef RX_ENABLE_TSFPQ
976 rxi_TrimDataBufs(struct rx_packet *p, int first)
979 struct iovec *iov, *end;
980 struct rx_ts_info_t * rx_ts_info;
984 osi_Panic("TrimDataBufs 1: first must be 1");
986 /* Skip over continuation buffers containing message data */
987 iov = &p->wirevec[2];
988 end = iov + (p->niovecs - 2);
989 length = p->length - p->wirevec[1].iov_len;
990 for (; iov < end && length > 0; iov++) {
992 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
993 length -= iov->iov_len;
996 /* iov now points to the first empty data buffer. */
1000 RX_TS_INFO_GET(rx_ts_info);
1001 for (; iov < end; iov++) {
1003 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1004 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1007 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1009 MUTEX_ENTER(&rx_freePktQ_lock);
1011 RX_TS_FPQ_LTOG(rx_ts_info);
1012 rxi_PacketsUnWait();
1014 MUTEX_EXIT(&rx_freePktQ_lock);
1020 #else /* RX_ENABLE_TSFPQ */
1022 rxi_TrimDataBufs(struct rx_packet *p, int first)
1025 struct iovec *iov, *end;
1029 osi_Panic("TrimDataBufs 1: first must be 1");
1031 /* Skip over continuation buffers containing message data */
1032 iov = &p->wirevec[2];
1033 end = iov + (p->niovecs - 2);
1034 length = p->length - p->wirevec[1].iov_len;
1035 for (; iov < end && length > 0; iov++) {
1037 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1038 length -= iov->iov_len;
1041 /* iov now points to the first empty data buffer. */
1046 MUTEX_ENTER(&rx_freePktQ_lock);
1048 for (; iov < end; iov++) {
1050 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1051 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1054 rxi_PacketsUnWait();
1056 MUTEX_EXIT(&rx_freePktQ_lock);
1061 #endif /* RX_ENABLE_TSFPQ */
1063 /* Free the packet p. P is assumed not to be on any queue, i.e.
1064 * remove it yourself first if you call this routine. */
1065 #ifdef RX_ENABLE_TSFPQ
1067 rxi_FreePacket(struct rx_packet *p)
1069 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1070 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1072 #else /* RX_ENABLE_TSFPQ */
1074 rxi_FreePacket(struct rx_packet *p)
1079 MUTEX_ENTER(&rx_freePktQ_lock);
1081 rxi_FreeDataBufsNoLock(p, 2);
1082 rxi_FreePacketNoLock(p);
1083 /* Wakeup anyone waiting for packets */
1084 rxi_PacketsUnWait();
1086 MUTEX_EXIT(&rx_freePktQ_lock);
1089 #endif /* RX_ENABLE_TSFPQ */
1091 /* rxi_AllocPacket sets up p->length so it reflects the number of
1092 * bytes in the packet at this point, **not including** the header.
1093 * The header is absolutely necessary, besides, this is the way the
1094 * length field is usually used */
1095 #ifdef RX_ENABLE_TSFPQ
1097 rxi_AllocPacketNoLock(int class)
1099 struct rx_packet *p;
1100 struct rx_ts_info_t * rx_ts_info;
1102 RX_TS_INFO_GET(rx_ts_info);
1105 if (rxi_OverQuota(class)) {
1106 rxi_NeedMorePackets = TRUE;
1107 if (rx_stats_active) {
1109 case RX_PACKET_CLASS_RECEIVE:
1110 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1112 case RX_PACKET_CLASS_SEND:
1113 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1115 case RX_PACKET_CLASS_SPECIAL:
1116 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1118 case RX_PACKET_CLASS_RECV_CBUF:
1119 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1121 case RX_PACKET_CLASS_SEND_CBUF:
1122 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1126 return (struct rx_packet *)0;
1130 if (rx_stats_active)
1131 rx_atomic_inc(&rx_stats.packetRequests);
1132 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1135 if (queue_IsEmpty(&rx_freePacketQueue))
1136 osi_Panic("rxi_AllocPacket error");
1138 if (queue_IsEmpty(&rx_freePacketQueue))
1139 rxi_MorePacketsNoLock(rx_maxSendWindow);
1143 RX_TS_FPQ_GTOL(rx_ts_info);
1146 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1148 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1151 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1152 * order to truncate outbound packets. In the near future, may need
1153 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1155 RX_PACKET_IOV_FULLINIT(p);
1158 #else /* RX_ENABLE_TSFPQ */
1160 rxi_AllocPacketNoLock(int class)
1162 struct rx_packet *p;
1165 if (rxi_OverQuota(class)) {
1166 rxi_NeedMorePackets = TRUE;
1167 if (rx_stats_active) {
1169 case RX_PACKET_CLASS_RECEIVE:
1170 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1172 case RX_PACKET_CLASS_SEND:
1173 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1175 case RX_PACKET_CLASS_SPECIAL:
1176 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1178 case RX_PACKET_CLASS_RECV_CBUF:
1179 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1181 case RX_PACKET_CLASS_SEND_CBUF:
1182 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1186 return (struct rx_packet *)0;
1190 if (rx_stats_active)
1191 rx_atomic_inc(&rx_stats.packetRequests);
1194 if (queue_IsEmpty(&rx_freePacketQueue))
1195 osi_Panic("rxi_AllocPacket error");
1197 if (queue_IsEmpty(&rx_freePacketQueue))
1198 rxi_MorePacketsNoLock(rx_maxSendWindow);
1202 p = queue_First(&rx_freePacketQueue, rx_packet);
1204 RX_FPQ_MARK_USED(p);
1206 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1209 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1210 * order to truncate outbound packets. In the near future, may need
1211 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1213 RX_PACKET_IOV_FULLINIT(p);
1216 #endif /* RX_ENABLE_TSFPQ */
1218 #ifdef RX_ENABLE_TSFPQ
1220 rxi_AllocPacketTSFPQ(int class, int pull_global)
1222 struct rx_packet *p;
1223 struct rx_ts_info_t * rx_ts_info;
1225 RX_TS_INFO_GET(rx_ts_info);
1227 if (rx_stats_active)
1228 rx_atomic_inc(&rx_stats.packetRequests);
1229 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1230 MUTEX_ENTER(&rx_freePktQ_lock);
1232 if (queue_IsEmpty(&rx_freePacketQueue))
1233 rxi_MorePacketsNoLock(rx_maxSendWindow);
1235 RX_TS_FPQ_GTOL(rx_ts_info);
1237 MUTEX_EXIT(&rx_freePktQ_lock);
1238 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1242 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1244 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1246 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1247 * order to truncate outbound packets. In the near future, may need
1248 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1250 RX_PACKET_IOV_FULLINIT(p);
1253 #endif /* RX_ENABLE_TSFPQ */
1255 #ifdef RX_ENABLE_TSFPQ
1257 rxi_AllocPacket(int class)
1259 struct rx_packet *p;
1261 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1264 #else /* RX_ENABLE_TSFPQ */
1266 rxi_AllocPacket(int class)
1268 struct rx_packet *p;
1270 MUTEX_ENTER(&rx_freePktQ_lock);
1271 p = rxi_AllocPacketNoLock(class);
1272 MUTEX_EXIT(&rx_freePktQ_lock);
1275 #endif /* RX_ENABLE_TSFPQ */
1277 /* This guy comes up with as many buffers as it {takes,can get} given
1278 * the MTU for this call. It also sets the packet length before
1279 * returning. caution: this is often called at NETPRI
1280 * Called with call locked.
1283 rxi_AllocSendPacket(struct rx_call *call, int want)
1285 struct rx_packet *p = (struct rx_packet *)0;
1290 mud = call->MTU - RX_HEADER_SIZE;
1292 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1293 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1295 #ifdef RX_ENABLE_TSFPQ
1296 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1298 want = MIN(want, mud);
1300 if ((unsigned)want > p->length)
1301 (void)rxi_AllocDataBuf(p, (want - p->length),
1302 RX_PACKET_CLASS_SEND_CBUF);
1304 if (p->length > mud)
1307 if (delta >= p->length) {
1315 #endif /* RX_ENABLE_TSFPQ */
1317 while (!(call->error)) {
1318 MUTEX_ENTER(&rx_freePktQ_lock);
1319 /* if an error occurred, or we get the packet we want, we're done */
1320 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1321 MUTEX_EXIT(&rx_freePktQ_lock);
1324 want = MIN(want, mud);
1326 if ((unsigned)want > p->length)
1327 (void)rxi_AllocDataBuf(p, (want - p->length),
1328 RX_PACKET_CLASS_SEND_CBUF);
1330 if (p->length > mud)
1333 if (delta >= p->length) {
1342 /* no error occurred, and we didn't get a packet, so we sleep.
1343 * At this point, we assume that packets will be returned
1344 * sooner or later, as packets are acknowledged, and so we
1347 call->flags |= RX_CALL_WAIT_PACKETS;
1348 MUTEX_ENTER(&rx_refcnt_mutex);
1349 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1350 MUTEX_EXIT(&rx_refcnt_mutex);
1351 MUTEX_EXIT(&call->lock);
1352 rx_waitingForPackets = 1;
1354 #ifdef RX_ENABLE_LOCKS
1355 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1357 osi_rxSleep(&rx_waitingForPackets);
1359 MUTEX_EXIT(&rx_freePktQ_lock);
1360 MUTEX_ENTER(&call->lock);
1361 MUTEX_ENTER(&rx_refcnt_mutex);
1362 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1363 MUTEX_EXIT(&rx_refcnt_mutex);
1364 call->flags &= ~RX_CALL_WAIT_PACKETS;
1373 /* Windows does not use file descriptors. */
1374 #define CountFDs(amax) 0
1376 /* count the number of used FDs */
1385 for (i = 0; i < amax; i++) {
1386 code = fstat(i, &tstat);
1392 #endif /* AFS_NT40_ENV */
1395 #define CountFDs(amax) amax
1399 #if !defined(KERNEL) || defined(UKERNEL)
1401 /* This function reads a single packet from the interface into the
1402 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1403 * (host,port) of the sender are stored in the supplied variables, and
1404 * the data length of the packet is stored in the packet structure.
1405 * The header is decoded. */
1407 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1410 struct sockaddr_in from;
1411 unsigned int nbytes;
1413 afs_uint32 tlen, savelen;
1415 rx_computelen(p, tlen);
1416 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1418 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1419 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1420 * it once in order to avoid races. */
1423 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1431 /* Extend the last iovec for padding, it's just to make sure that the
1432 * read doesn't return more data than we expect, and is done to get around
1433 * our problems caused by the lack of a length field in the rx header.
1434 * Use the extra buffer that follows the localdata in each packet
1436 savelen = p->wirevec[p->niovecs - 1].iov_len;
1437 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1439 memset(&msg, 0, sizeof(msg));
1440 msg.msg_name = (char *)&from;
1441 msg.msg_namelen = sizeof(struct sockaddr_in);
1442 msg.msg_iov = p->wirevec;
1443 msg.msg_iovlen = p->niovecs;
1444 nbytes = rxi_Recvmsg(socket, &msg, 0);
1446 /* restore the vec to its correct state */
1447 p->wirevec[p->niovecs - 1].iov_len = savelen;
1449 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1450 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1451 if (nbytes < 0 && errno == EWOULDBLOCK) {
1452 if (rx_stats_active)
1453 rx_atomic_inc(&rx_stats.noPacketOnRead);
1454 } else if (nbytes <= 0) {
1455 if (rx_stats_active) {
1456 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1457 rx_stats.bogusHost = from.sin_addr.s_addr;
1459 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1460 ntohs(from.sin_port), nbytes));
1465 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1466 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1467 rxi_DecodePacketHeader(p);
1469 *host = from.sin_addr.s_addr;
1470 *port = from.sin_port;
1472 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1473 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1474 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1476 #ifdef RX_TRIMDATABUFS
1477 rxi_TrimDataBufs(p, 1);
1483 /* Extract packet header. */
1484 rxi_DecodePacketHeader(p);
1486 *host = from.sin_addr.s_addr;
1487 *port = from.sin_port;
1488 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1489 if (rx_stats_active) {
1490 struct rx_peer *peer;
1491 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1493 * Try to look up this peer structure. If it doesn't exist,
1494 * don't create a new one -
1495 * we don't keep count of the bytes sent/received if a peer
1496 * structure doesn't already exist.
1498 * The peer/connection cleanup code assumes that there is 1 peer
1499 * per connection. If we actually created a peer structure here
1500 * and this packet was an rxdebug packet, the peer structure would
1501 * never be cleaned up.
1503 peer = rxi_FindPeer(*host, *port, 0, 0);
1504 /* Since this may not be associated with a connection,
1505 * it may have no refCount, meaning we could race with
1508 if (peer && (peer->refCount > 0)) {
1509 MUTEX_ENTER(&peer->peer_lock);
1510 hadd32(peer->bytesReceived, p->length);
1511 MUTEX_EXIT(&peer->peer_lock);
1516 #ifdef RX_TRIMDATABUFS
1517 /* Free any empty packet buffers at the end of this packet */
1518 rxi_TrimDataBufs(p, 1);
1524 #endif /* !KERNEL || UKERNEL */
1526 /* This function splits off the first packet in a jumbo packet.
1527 * As of AFS 3.5, jumbograms contain more than one fixed size
1528 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1529 * last packet header. All packets (except the last) are padded to
1530 * fall on RX_CBUFFERSIZE boundaries.
1531 * HACK: We store the length of the first n-1 packets in the
1532 * last two pad bytes. */
1535 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1538 struct rx_packet *np;
1539 struct rx_jumboHeader *jp;
1545 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1546 * bytes in length. All but the first packet are preceded by
1547 * an abbreviated four byte header. The length of the last packet
1548 * is calculated from the size of the jumbogram. */
1549 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1551 if ((int)p->length < length) {
1552 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1555 niov = p->niovecs - 2;
1557 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1560 iov = &p->wirevec[2];
1561 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1563 /* Get a pointer to the abbreviated packet header */
1564 jp = (struct rx_jumboHeader *)
1565 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1567 /* Set up the iovecs for the next packet */
1568 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1569 np->wirevec[0].iov_len = sizeof(struct rx_header);
1570 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1571 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1572 np->niovecs = niov + 1;
1573 for (i = 2, iov++; i <= niov; i++, iov++) {
1574 np->wirevec[i] = *iov;
1576 np->length = p->length - length;
1577 p->length = RX_JUMBOBUFFERSIZE;
1580 /* Convert the jumbo packet header to host byte order */
1581 temp = ntohl(*(afs_uint32 *) jp);
1582 jp->flags = (u_char) (temp >> 24);
1583 jp->cksum = (u_short) (temp);
1585 /* Fill in the packet header */
1586 np->header = p->header;
1587 np->header.serial = p->header.serial + 1;
1588 np->header.seq = p->header.seq + 1;
1589 np->header.flags = jp->flags;
1590 np->header.spare = jp->cksum;
1596 /* Send a udp datagram */
1598 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1599 int length, int istack)
1604 memset(&msg, 0, sizeof(msg));
1606 msg.msg_iovlen = nvecs;
1607 msg.msg_name = addr;
1608 msg.msg_namelen = sizeof(struct sockaddr_in);
1610 ret = rxi_Sendmsg(socket, &msg, 0);
1614 #elif !defined(UKERNEL)
1616 * message receipt is done in rxk_input or rx_put.
1619 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1621 * Copy an mblock to the contiguous area pointed to by cp.
1622 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1623 * but it doesn't really.
1624 * Returns the number of bytes not transferred.
1625 * The message is NOT changed.
1628 cpytoc(mblk_t * mp, int off, int len, char *cp)
1632 for (; mp && len > 0; mp = mp->b_cont) {
1633 if (mp->b_datap->db_type != M_DATA) {
1636 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1637 memcpy(cp, (char *)mp->b_rptr, n);
1645 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1646 * but it doesn't really.
1647 * This sucks, anyway, do it like m_cpy.... below
1650 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1655 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1656 if (mp->b_datap->db_type != M_DATA) {
1659 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1665 t = iovs[i].iov_len;
1668 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1678 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1679 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1681 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1683 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1686 unsigned int l1, l2, i, t;
1688 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1689 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1692 if (m->m_len <= off) {
1702 p1 = mtod(m, caddr_t) + off;
1703 l1 = m->m_len - off;
1705 p2 = iovs[0].iov_base;
1706 l2 = iovs[0].iov_len;
1709 t = MIN(l1, MIN(l2, (unsigned int)len));
1720 p1 = mtod(m, caddr_t);
1726 p2 = iovs[i].iov_base;
1727 l2 = iovs[i].iov_len;
1735 #endif /* AFS_SUN5_ENV */
1737 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1739 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1740 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1746 struct rx_packet *phandle;
1747 int hdr_len, data_len;
1752 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1759 #endif /*KERNEL && !UKERNEL */
1762 /* send a response to a debug packet */
1765 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1766 afs_uint32 ahost, short aport, int istack)
1768 struct rx_debugIn tin;
1770 struct rx_serverQueueEntry *np, *nqe;
1773 * Only respond to client-initiated Rx debug packets,
1774 * and clear the client flag in the response.
1776 if (ap->header.flags & RX_CLIENT_INITIATED) {
1777 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1778 rxi_EncodePacketHeader(ap);
1783 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1784 /* all done with packet, now set length to the truth, so we can
1785 * reuse this packet */
1786 rx_computelen(ap, ap->length);
1788 tin.type = ntohl(tin.type);
1789 tin.index = ntohl(tin.index);
1791 case RX_DEBUGI_GETSTATS:{
1792 struct rx_debugStats tstat;
1794 /* get basic stats */
1795 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1796 tstat.version = RX_DEBUGI_VERSION;
1797 #ifndef RX_ENABLE_LOCKS
1798 tstat.waitingForPackets = rx_waitingForPackets;
1800 MUTEX_ENTER(&rx_serverPool_lock);
1801 tstat.nFreePackets = htonl(rx_nFreePackets);
1802 tstat.nPackets = htonl(rx_nPackets);
1803 tstat.callsExecuted = htonl(rxi_nCalls);
1804 tstat.packetReclaims = htonl(rx_packetReclaims);
1805 tstat.usedFDs = CountFDs(64);
1806 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1807 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1808 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1810 MUTEX_EXIT(&rx_serverPool_lock);
1811 tstat.idleThreads = htonl(tstat.idleThreads);
1812 tl = sizeof(struct rx_debugStats) - ap->length;
1814 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1817 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1819 ap->length = sizeof(struct rx_debugStats);
1820 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1821 rx_computelen(ap, ap->length);
1826 case RX_DEBUGI_GETALLCONN:
1827 case RX_DEBUGI_GETCONN:{
1829 struct rx_connection *tc;
1830 struct rx_call *tcall;
1831 struct rx_debugConn tconn;
1832 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1835 tl = sizeof(struct rx_debugConn) - ap->length;
1837 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1841 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1842 /* get N'th (maybe) "interesting" connection info */
1843 for (i = 0; i < rx_hashTableSize; i++) {
1844 #if !defined(KERNEL)
1845 /* the time complexity of the algorithm used here
1846 * exponentially increses with the number of connections.
1848 #ifdef AFS_PTHREAD_ENV
1854 MUTEX_ENTER(&rx_connHashTable_lock);
1855 /* We might be slightly out of step since we are not
1856 * locking each call, but this is only debugging output.
1858 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1859 if ((all || rxi_IsConnInteresting(tc))
1860 && tin.index-- <= 0) {
1861 tconn.host = tc->peer->host;
1862 tconn.port = tc->peer->port;
1863 tconn.cid = htonl(tc->cid);
1864 tconn.epoch = htonl(tc->epoch);
1865 tconn.serial = htonl(tc->serial);
1866 for (j = 0; j < RX_MAXCALLS; j++) {
1867 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1868 if ((tcall = tc->call[j])) {
1869 tconn.callState[j] = tcall->state;
1870 tconn.callMode[j] = tcall->mode;
1871 tconn.callFlags[j] = tcall->flags;
1872 if (queue_IsNotEmpty(&tcall->rq))
1873 tconn.callOther[j] |= RX_OTHER_IN;
1874 if (queue_IsNotEmpty(&tcall->tq))
1875 tconn.callOther[j] |= RX_OTHER_OUT;
1877 tconn.callState[j] = RX_STATE_NOTINIT;
1880 tconn.natMTU = htonl(tc->peer->natMTU);
1881 tconn.error = htonl(tc->error);
1882 tconn.flags = tc->flags;
1883 tconn.type = tc->type;
1884 tconn.securityIndex = tc->securityIndex;
1885 if (tc->securityObject) {
1886 RXS_GetStats(tc->securityObject, tc,
1888 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1889 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1892 DOHTONL(packetsReceived);
1893 DOHTONL(packetsSent);
1894 DOHTONL(bytesReceived);
1898 sizeof(tconn.secStats.spares) /
1903 sizeof(tconn.secStats.sparel) /
1904 sizeof(afs_int32); i++)
1908 MUTEX_EXIT(&rx_connHashTable_lock);
1909 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1912 ap->length = sizeof(struct rx_debugConn);
1913 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1919 MUTEX_EXIT(&rx_connHashTable_lock);
1921 /* if we make it here, there are no interesting packets */
1922 tconn.cid = htonl(0xffffffff); /* means end */
1923 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1926 ap->length = sizeof(struct rx_debugConn);
1927 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1933 * Pass back all the peer structures we have available
1936 case RX_DEBUGI_GETPEER:{
1939 struct rx_debugPeer tpeer;
1942 tl = sizeof(struct rx_debugPeer) - ap->length;
1944 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1948 memset(&tpeer, 0, sizeof(tpeer));
1949 for (i = 0; i < rx_hashTableSize; i++) {
1950 #if !defined(KERNEL)
1951 /* the time complexity of the algorithm used here
1952 * exponentially increses with the number of peers.
1954 * Yielding after processing each hash table entry
1955 * and dropping rx_peerHashTable_lock.
1956 * also increases the risk that we will miss a new
1957 * entry - but we are willing to live with this
1958 * limitation since this is meant for debugging only
1960 #ifdef AFS_PTHREAD_ENV
1966 MUTEX_ENTER(&rx_peerHashTable_lock);
1967 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1968 if (tin.index-- <= 0) {
1970 MUTEX_EXIT(&rx_peerHashTable_lock);
1972 MUTEX_ENTER(&tp->peer_lock);
1973 tpeer.host = tp->host;
1974 tpeer.port = tp->port;
1975 tpeer.ifMTU = htons(tp->ifMTU);
1976 tpeer.idleWhen = htonl(tp->idleWhen);
1977 tpeer.refCount = htons(tp->refCount);
1978 tpeer.burstSize = tp->burstSize;
1979 tpeer.burst = tp->burst;
1980 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1981 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1982 tpeer.rtt = htonl(tp->rtt);
1983 tpeer.rtt_dev = htonl(tp->rtt_dev);
1984 tpeer.timeout.sec = htonl(tp->timeout.sec);
1985 tpeer.timeout.usec = htonl(tp->timeout.usec);
1986 tpeer.nSent = htonl(tp->nSent);
1987 tpeer.reSends = htonl(tp->reSends);
1988 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1989 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1990 tpeer.rateFlag = htonl(tp->rateFlag);
1991 tpeer.natMTU = htons(tp->natMTU);
1992 tpeer.maxMTU = htons(tp->maxMTU);
1993 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1994 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1995 tpeer.MTU = htons(tp->MTU);
1996 tpeer.cwind = htons(tp->cwind);
1997 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1998 tpeer.congestSeq = htons(tp->congestSeq);
1999 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
2000 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
2001 tpeer.bytesReceived.high =
2002 htonl(tp->bytesReceived.high);
2003 tpeer.bytesReceived.low =
2004 htonl(tp->bytesReceived.low);
2005 MUTEX_EXIT(&tp->peer_lock);
2007 MUTEX_ENTER(&rx_peerHashTable_lock);
2009 MUTEX_EXIT(&rx_peerHashTable_lock);
2011 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2014 ap->length = sizeof(struct rx_debugPeer);
2015 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2021 MUTEX_EXIT(&rx_peerHashTable_lock);
2023 /* if we make it here, there are no interesting packets */
2024 tpeer.host = htonl(0xffffffff); /* means end */
2025 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2028 ap->length = sizeof(struct rx_debugPeer);
2029 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2034 case RX_DEBUGI_RXSTATS:{
2038 tl = sizeof(rx_stats) - ap->length;
2040 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2044 /* Since its all int32s convert to network order with a loop. */
2045 if (rx_stats_active)
2046 MUTEX_ENTER(&rx_stats_mutex);
2047 s = (afs_int32 *) & rx_stats;
2048 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2049 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2052 ap->length = sizeof(rx_stats);
2053 if (rx_stats_active)
2054 MUTEX_EXIT(&rx_stats_mutex);
2055 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2061 /* error response packet */
2062 tin.type = htonl(RX_DEBUGI_BADTYPE);
2063 tin.index = tin.type;
2064 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2066 ap->length = sizeof(struct rx_debugIn);
2067 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2075 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2076 afs_uint32 ahost, short aport, int istack)
2081 * Only respond to client-initiated version requests, and
2082 * clear that flag in the response.
2084 if (ap->header.flags & RX_CLIENT_INITIATED) {
2087 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2088 rxi_EncodePacketHeader(ap);
2089 memset(buf, 0, sizeof(buf));
2090 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2091 rx_packetwrite(ap, 0, 65, buf);
2094 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2102 /* send a debug packet back to the sender */
2104 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2105 afs_uint32 ahost, short aport, afs_int32 istack)
2107 struct sockaddr_in taddr;
2108 unsigned int i, nbytes, savelen = 0;
2111 int waslocked = ISAFS_GLOCK();
2114 taddr.sin_family = AF_INET;
2115 taddr.sin_port = aport;
2116 taddr.sin_addr.s_addr = ahost;
2117 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2118 taddr.sin_len = sizeof(struct sockaddr_in);
2121 /* We need to trim the niovecs. */
2122 nbytes = apacket->length;
2123 for (i = 1; i < apacket->niovecs; i++) {
2124 if (nbytes <= apacket->wirevec[i].iov_len) {
2125 savelen = apacket->wirevec[i].iov_len;
2126 saven = apacket->niovecs;
2127 apacket->wirevec[i].iov_len = nbytes;
2128 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2130 nbytes -= apacket->wirevec[i].iov_len;
2133 #ifdef RX_KERNEL_TRACE
2134 if (ICL_SETACTIVE(afs_iclSetp)) {
2137 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2138 "before osi_NetSend()");
2146 /* debug packets are not reliably delivered, hence the cast below. */
2147 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2148 apacket->length + RX_HEADER_SIZE, istack);
2150 #ifdef RX_KERNEL_TRACE
2151 if (ICL_SETACTIVE(afs_iclSetp)) {
2153 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2154 "after osi_NetSend()");
2163 if (saven) { /* means we truncated the packet above. */
2164 apacket->wirevec[i - 1].iov_len = savelen;
2165 apacket->niovecs = saven;
2170 /* Send the packet to appropriate destination for the specified
2171 * call. The header is first encoded and placed in the packet.
2174 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2175 struct rx_packet *p, int istack)
2181 struct sockaddr_in addr;
2182 struct rx_peer *peer = conn->peer;
2185 char deliveryType = 'S';
2187 /* The address we're sending the packet to */
2188 memset(&addr, 0, sizeof(addr));
2189 addr.sin_family = AF_INET;
2190 addr.sin_port = peer->port;
2191 addr.sin_addr.s_addr = peer->host;
2193 /* This stuff should be revamped, I think, so that most, if not
2194 * all, of the header stuff is always added here. We could
2195 * probably do away with the encode/decode routines. XXXXX */
2197 /* Stamp each packet with a unique serial number. The serial
2198 * number is maintained on a connection basis because some types
2199 * of security may be based on the serial number of the packet,
2200 * and security is handled on a per authenticated-connection
2202 /* Pre-increment, to guarantee no zero serial number; a zero
2203 * serial number means the packet was never sent. */
2204 MUTEX_ENTER(&conn->conn_data_lock);
2205 p->header.serial = ++conn->serial;
2206 if (p->length > conn->peer->maxPacketSize) {
2207 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2208 (p->header.flags & RX_REQUEST_ACK)) {
2209 conn->lastPingSize = p->length;
2210 conn->lastPingSizeSer = p->header.serial;
2211 } else if (p->header.seq != 0) {
2212 conn->lastPacketSize = p->length;
2213 conn->lastPacketSizeSeq = p->header.seq;
2216 MUTEX_EXIT(&conn->conn_data_lock);
2217 /* This is so we can adjust retransmit time-outs better in the face of
2218 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2220 if (p->firstSerial == 0) {
2221 p->firstSerial = p->header.serial;
2224 /* If an output tracer function is defined, call it with the packet and
2225 * network address. Note this function may modify its arguments. */
2226 if (rx_almostSent) {
2227 int drop = (*rx_almostSent) (p, &addr);
2228 /* drop packet if return value is non-zero? */
2230 deliveryType = 'D'; /* Drop the packet */
2234 /* Get network byte order header */
2235 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2236 * touch ALL the fields */
2238 /* Send the packet out on the same socket that related packets are being
2242 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2245 /* Possibly drop this packet, for testing purposes */
2246 if ((deliveryType == 'D')
2247 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2248 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2249 deliveryType = 'D'; /* Drop the packet */
2251 deliveryType = 'S'; /* Send the packet */
2252 #endif /* RXDEBUG */
2254 /* Loop until the packet is sent. We'd prefer just to use a
2255 * blocking socket, but unfortunately the interface doesn't
2256 * allow us to have the socket block in send mode, and not
2257 * block in receive mode */
2259 waslocked = ISAFS_GLOCK();
2260 #ifdef RX_KERNEL_TRACE
2261 if (ICL_SETACTIVE(afs_iclSetp)) {
2264 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2265 "before osi_NetSend()");
2274 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2275 p->length + RX_HEADER_SIZE, istack)) != 0) {
2276 /* send failed, so let's hurry up the resend, eh? */
2277 if (rx_stats_active)
2278 rx_atomic_inc(&rx_stats.netSendFailures);
2279 p->retryTime = p->timeSent; /* resend it very soon */
2280 clock_Addmsec(&(p->retryTime),
2281 10 + (((afs_uint32) p->backoff) << 8));
2282 /* Some systems are nice and tell us right away that we cannot
2283 * reach this recipient by returning an error code.
2284 * So, when this happens let's "down" the host NOW so
2285 * we don't sit around waiting for this host to timeout later.
2289 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2290 #elif defined(AFS_LINUX20_ENV)
2291 code == -ENETUNREACH
2292 #elif defined(AFS_DARWIN_ENV)
2293 code == EHOSTUNREACH
2298 call->lastReceiveTime = 0;
2301 #ifdef RX_KERNEL_TRACE
2302 if (ICL_SETACTIVE(afs_iclSetp)) {
2304 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2305 "after osi_NetSend()");
2316 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2317 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2318 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2319 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2321 if (rx_stats_active) {
2322 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2323 MUTEX_ENTER(&peer->peer_lock);
2324 hadd32(peer->bytesSent, p->length);
2325 MUTEX_EXIT(&peer->peer_lock);
2329 /* Send a list of packets to appropriate destination for the specified
2330 * connection. The headers are first encoded and placed in the packets.
2333 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2334 struct rx_packet **list, int len, int istack)
2336 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2339 struct sockaddr_in addr;
2340 struct rx_peer *peer = conn->peer;
2342 struct rx_packet *p = NULL;
2343 struct iovec wirevec[RX_MAXIOVECS];
2344 int i, length, code;
2347 struct rx_jumboHeader *jp;
2349 char deliveryType = 'S';
2351 /* The address we're sending the packet to */
2352 addr.sin_family = AF_INET;
2353 addr.sin_port = peer->port;
2354 addr.sin_addr.s_addr = peer->host;
2356 if (len + 1 > RX_MAXIOVECS) {
2357 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2361 * Stamp the packets in this jumbogram with consecutive serial numbers
2363 MUTEX_ENTER(&conn->conn_data_lock);
2364 serial = conn->serial;
2365 conn->serial += len;
2366 for (i = 0; i < len; i++) {
2368 if (p->length > conn->peer->maxPacketSize) {
2369 /* a ping *or* a sequenced packet can count */
2370 if ((p->length > conn->peer->maxPacketSize)) {
2371 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2372 (p->header.flags & RX_REQUEST_ACK)) &&
2373 ((i == 0) || (p->length >= conn->lastPingSize))) {
2374 conn->lastPingSize = p->length;
2375 conn->lastPingSizeSer = serial + i;
2376 } else if ((p->header.seq != 0) &&
2377 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2378 conn->lastPacketSize = p->length;
2379 conn->lastPacketSizeSeq = p->header.seq;
2384 MUTEX_EXIT(&conn->conn_data_lock);
2387 /* This stuff should be revamped, I think, so that most, if not
2388 * all, of the header stuff is always added here. We could
2389 * probably do away with the encode/decode routines. XXXXX */
2392 length = RX_HEADER_SIZE;
2393 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2394 wirevec[0].iov_len = RX_HEADER_SIZE;
2395 for (i = 0; i < len; i++) {
2398 /* The whole 3.5 jumbogram scheme relies on packets fitting
2399 * in a single packet buffer. */
2400 if (p->niovecs > 2) {
2401 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2404 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2407 if (p->length != RX_JUMBOBUFFERSIZE) {
2408 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2410 p->header.flags |= RX_JUMBO_PACKET;
2411 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2412 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2414 wirevec[i + 1].iov_len = p->length;
2415 length += p->length;
2417 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2419 /* Convert jumbo packet header to network byte order */
2420 temp = (afs_uint32) (p->header.flags) << 24;
2421 temp |= (afs_uint32) (p->header.spare);
2422 *(afs_uint32 *) jp = htonl(temp);
2424 jp = (struct rx_jumboHeader *)
2425 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2427 /* Stamp each packet with a unique serial number. The serial
2428 * number is maintained on a connection basis because some types
2429 * of security may be based on the serial number of the packet,
2430 * and security is handled on a per authenticated-connection
2432 /* Pre-increment, to guarantee no zero serial number; a zero
2433 * serial number means the packet was never sent. */
2434 p->header.serial = ++serial;
2435 /* This is so we can adjust retransmit time-outs better in the face of
2436 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2438 if (p->firstSerial == 0) {
2439 p->firstSerial = p->header.serial;
2442 /* If an output tracer function is defined, call it with the packet and
2443 * network address. Note this function may modify its arguments. */
2444 if (rx_almostSent) {
2445 int drop = (*rx_almostSent) (p, &addr);
2446 /* drop packet if return value is non-zero? */
2448 deliveryType = 'D'; /* Drop the packet */
2452 /* Get network byte order header */
2453 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2454 * touch ALL the fields */
2457 /* Send the packet out on the same socket that related packets are being
2461 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2464 /* Possibly drop this packet, for testing purposes */
2465 if ((deliveryType == 'D')
2466 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2467 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2468 deliveryType = 'D'; /* Drop the packet */
2470 deliveryType = 'S'; /* Send the packet */
2471 #endif /* RXDEBUG */
2473 /* Loop until the packet is sent. We'd prefer just to use a
2474 * blocking socket, but unfortunately the interface doesn't
2475 * allow us to have the socket block in send mode, and not
2476 * block in receive mode */
2477 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2478 waslocked = ISAFS_GLOCK();
2479 if (!istack && waslocked)
2483 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2485 /* send failed, so let's hurry up the resend, eh? */
2486 if (rx_stats_active)
2487 rx_atomic_inc(&rx_stats.netSendFailures);
2488 for (i = 0; i < len; i++) {
2490 p->retryTime = p->timeSent; /* resend it very soon */
2491 clock_Addmsec(&(p->retryTime),
2492 10 + (((afs_uint32) p->backoff) << 8));
2494 /* Some systems are nice and tell us right away that we cannot
2495 * reach this recipient by returning an error code.
2496 * So, when this happens let's "down" the host NOW so
2497 * we don't sit around waiting for this host to timeout later.
2501 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2502 #elif defined(AFS_LINUX20_ENV)
2503 code == -ENETUNREACH
2504 #elif defined(AFS_DARWIN_ENV)
2505 code == EHOSTUNREACH
2510 call->lastReceiveTime = 0;
2512 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2513 if (!istack && waslocked)
2519 osi_Assert(p != NULL);
2521 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" resend %d.%.3d len %d\n",
2522 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2523 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2524 p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2527 if (rx_stats_active) {
2528 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2529 MUTEX_ENTER(&peer->peer_lock);
2530 hadd32(peer->bytesSent, p->length);
2531 MUTEX_EXIT(&peer->peer_lock);
2536 /* Send a "special" packet to the peer connection. If call is
2537 * specified, then the packet is directed to a specific call channel
2538 * associated with the connection, otherwise it is directed to the
2539 * connection only. Uses optionalPacket if it is supplied, rather than
2540 * allocating a new packet buffer. Nbytes is the length of the data
2541 * portion of the packet. If data is non-null, nbytes of data are
2542 * copied into the packet. Type is the type of the packet, as defined
2543 * in rx.h. Bug: there's a lot of duplication between this and other
2544 * routines. This needs to be cleaned up. */
2546 rxi_SendSpecial(struct rx_call *call,
2547 struct rx_connection *conn,
2548 struct rx_packet *optionalPacket, int type, char *data,
2549 int nbytes, int istack)
2551 /* Some of the following stuff should be common code for all
2552 * packet sends (it's repeated elsewhere) */
2553 struct rx_packet *p;
2555 int savelen = 0, saven = 0;
2556 int channel, callNumber;
2558 channel = call->channel;
2559 callNumber = *call->callNumber;
2560 /* BUSY packets refer to the next call on this connection */
2561 if (type == RX_PACKET_TYPE_BUSY) {
2570 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2572 osi_Panic("rxi_SendSpecial failure");
2579 p->header.serviceId = conn->serviceId;
2580 p->header.securityIndex = conn->securityIndex;
2581 p->header.cid = (conn->cid | channel);
2582 p->header.callNumber = callNumber;
2584 p->header.epoch = conn->epoch;
2585 p->header.type = type;
2586 p->header.flags = 0;
2587 if (conn->type == RX_CLIENT_CONNECTION)
2588 p->header.flags |= RX_CLIENT_INITIATED;
2590 rx_packetwrite(p, 0, nbytes, data);
2592 for (i = 1; i < p->niovecs; i++) {
2593 if (nbytes <= p->wirevec[i].iov_len) {
2594 savelen = p->wirevec[i].iov_len;
2596 p->wirevec[i].iov_len = nbytes;
2597 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2599 nbytes -= p->wirevec[i].iov_len;
2603 rxi_Send(call, p, istack);
2605 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2606 if (saven) { /* means we truncated the packet above. We probably don't */
2607 /* really need to do this, but it seems safer this way, given that */
2608 /* sneaky optionalPacket... */
2609 p->wirevec[i - 1].iov_len = savelen;
2612 if (!optionalPacket)
2614 return optionalPacket;
2618 /* Encode the packet's header (from the struct header in the packet to
2619 * the net byte order representation in the wire representation of the
2620 * packet, which is what is actually sent out on the wire) */
2622 rxi_EncodePacketHeader(struct rx_packet *p)
2624 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2626 memset(buf, 0, RX_HEADER_SIZE);
2627 *buf++ = htonl(p->header.epoch);
2628 *buf++ = htonl(p->header.cid);
2629 *buf++ = htonl(p->header.callNumber);
2630 *buf++ = htonl(p->header.seq);
2631 *buf++ = htonl(p->header.serial);
2632 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2633 | (((afs_uint32) p->header.flags) << 16)
2634 | (p->header.userStatus << 8) | p->header.securityIndex);
2635 /* Note: top 16 bits of this next word were reserved */
2636 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2639 /* Decode the packet's header (from net byte order to a struct header) */
2641 rxi_DecodePacketHeader(struct rx_packet *p)
2643 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2646 p->header.epoch = ntohl(*buf);
2648 p->header.cid = ntohl(*buf);
2650 p->header.callNumber = ntohl(*buf);
2652 p->header.seq = ntohl(*buf);
2654 p->header.serial = ntohl(*buf);
2660 /* C will truncate byte fields to bytes for me */
2661 p->header.type = temp >> 24;
2662 p->header.flags = temp >> 16;
2663 p->header.userStatus = temp >> 8;
2664 p->header.securityIndex = temp >> 0;
2669 p->header.serviceId = (temp & 0xffff);
2670 p->header.spare = temp >> 16;
2671 /* Note: top 16 bits of this last word are the security checksum */
2675 * LOCKS HELD: called with call->lock held.
2677 * PrepareSendPacket is the only place in the code that
2678 * can increment call->tnext. This could become an atomic
2679 * in the future. Beyond that there is nothing in this
2680 * function that requires the call being locked. This
2681 * function can only be called by the application thread.
2684 rxi_PrepareSendPacket(struct rx_call *call,
2685 struct rx_packet *p, int last)
2687 struct rx_connection *conn = call->conn;
2688 afs_uint32 seq = call->tnext++;
2690 afs_int32 len; /* len must be a signed type; it can go negative */
2692 /* No data packets on call 0. Where do these come from? */
2693 if (*call->callNumber == 0)
2694 *call->callNumber = 1;
2696 MUTEX_EXIT(&call->lock);
2697 p->flags &= ~RX_PKTFLAG_ACKED;
2698 p->header.cid = (conn->cid | call->channel);
2699 p->header.serviceId = conn->serviceId;
2700 p->header.securityIndex = conn->securityIndex;
2702 p->header.callNumber = *call->callNumber;
2703 p->header.seq = seq;
2704 p->header.epoch = conn->epoch;
2705 p->header.type = RX_PACKET_TYPE_DATA;
2706 p->header.flags = 0;
2707 p->header.spare = 0;
2708 if (conn->type == RX_CLIENT_CONNECTION)
2709 p->header.flags |= RX_CLIENT_INITIATED;
2712 p->header.flags |= RX_LAST_PACKET;
2714 clock_Zero(&p->retryTime); /* Never yet transmitted */
2715 clock_Zero(&p->firstSent); /* Never yet transmitted */
2716 p->header.serial = 0; /* Another way of saying never transmitted... */
2719 /* Now that we're sure this is the last data on the call, make sure
2720 * that the "length" and the sum of the iov_lens matches. */
2721 len = p->length + call->conn->securityHeaderSize;
2723 for (i = 1; i < p->niovecs && len > 0; i++) {
2724 len -= p->wirevec[i].iov_len;
2727 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2728 } else if (i < p->niovecs) {
2729 /* Free any extra elements in the wirevec */
2730 #if defined(RX_ENABLE_TSFPQ)
2731 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2732 #else /* !RX_ENABLE_TSFPQ */
2733 MUTEX_ENTER(&rx_freePktQ_lock);
2734 rxi_FreeDataBufsNoLock(p, i);
2735 MUTEX_EXIT(&rx_freePktQ_lock);
2736 #endif /* !RX_ENABLE_TSFPQ */
2741 p->wirevec[i - 1].iov_len += len;
2742 RXS_PreparePacket(conn->securityObject, call, p);
2743 MUTEX_ENTER(&call->lock);
2746 /* Given an interface MTU size, calculate an adjusted MTU size that
2747 * will make efficient use of the RX buffers when the peer is sending
2748 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2750 rxi_AdjustIfMTU(int mtu)
2755 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2757 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2758 if (mtu <= adjMTU) {
2765 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2766 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2769 /* Given an interface MTU size, and the peer's advertised max receive
2770 * size, calculate an adjisted maxMTU size that makes efficient use
2771 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2773 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2775 int maxMTU = mtu * rxi_nSendFrags;
2776 maxMTU = MIN(maxMTU, peerMaxMTU);
2777 return rxi_AdjustIfMTU(maxMTU);
2780 /* Given a packet size, figure out how many datagram packet will fit.
2781 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2782 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2783 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2785 rxi_AdjustDgramPackets(int frags, int mtu)
2788 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2791 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2792 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2793 /* subtract the size of the first and last packets */
2794 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2798 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2803 * This function can be used by the Windows Cache Manager
2804 * to dump the list of all rx packets so that we can determine
2805 * where the packet leakage is.
2807 int rx_DumpPackets(FILE *outputFile, char *cookie)
2809 #ifdef RXDEBUG_PACKET
2810 struct rx_packet *p;
2814 #define RXDPRINTF sprintf
2815 #define RXDPRINTOUT output
2817 #define RXDPRINTF fprintf
2818 #define RXDPRINTOUT outputFile
2822 MUTEX_ENTER(&rx_freePktQ_lock);
2823 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2825 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2828 for (p = rx_mallocedP; p; p = p->allNextp) {
2829 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, retryTime=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, backoff=%u, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2830 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec, p->retryTime.sec, p->retryTime.usec,
2831 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->backoff, (afs_uint32)p->length,
2832 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2833 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2834 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2836 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2840 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2842 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2845 MUTEX_EXIT(&rx_freePktQ_lock);
2847 #endif /* RXDEBUG_PACKET */