2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
23 # ifndef AFS_LINUX20_ENV
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
29 # if defined(AFS_OBSD_ENV)
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
47 # if defined(AFS_NT40_ENV)
49 # define EWOULDBLOCK WSAEWOULDBLOCK
52 # include "rx_xmit_nt.h"
58 # include <sys/sysmacros.h>
64 #include "rx_packet.h"
65 #include "rx_atomic.h"
66 #include "rx_globals.h"
67 #include "rx_internal.h"
75 /* rxdb_fileID is used to identify the lock location, along with line#. */
76 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
77 #endif /* RX_LOCKS_DB */
78 static struct rx_packet *rx_mallocedP = 0;
80 static afs_uint32 rx_packet_id = 0;
83 extern char cml_version_number[];
85 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
87 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
88 afs_uint32 ahost, short aport,
90 static struct rx_packet *rxi_AllocPacketNoLock(int class);
93 static void rxi_MorePacketsNoLock(int apackets);
96 #ifdef RX_ENABLE_TSFPQ
97 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
99 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
100 int allow_overcommit);
102 static void rxi_FreePacketNoLock(struct rx_packet *p);
103 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
104 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
105 struct rx_queue * q);
108 /* some rules about packets:
109 * 1. When a packet is allocated, the final iov_buf contains room for
110 * a security trailer, but iov_len masks that fact. If the security
111 * package wants to add the trailer, it may do so, and then extend
112 * iov_len appropriately. For this reason, packet's niovecs and
113 * iov_len fields should be accurate before calling PreparePacket.
117 * all packet buffers (iov_base) are integral multiples of
119 * offset is an integral multiple of the word size.
122 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
126 for (l = 0, i = 1; i < packet->niovecs; i++) {
127 if (l + packet->wirevec[i].iov_len > offset) {
129 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
132 l += packet->wirevec[i].iov_len;
139 * all packet buffers (iov_base) are integral multiples of the word size.
140 * offset is an integral multiple of the word size.
143 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
147 for (l = 0, i = 1; i < packet->niovecs; i++) {
148 if (l + packet->wirevec[i].iov_len > offset) {
149 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
150 (offset - l))) = data;
153 l += packet->wirevec[i].iov_len;
160 * all packet buffers (iov_base) are integral multiples of the
162 * offset is an integral multiple of the word size.
164 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
167 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
170 unsigned int i, j, l, r;
171 for (l = 0, i = 1; i < packet->niovecs; i++) {
172 if (l + packet->wirevec[i].iov_len > offset) {
175 l += packet->wirevec[i].iov_len;
178 /* i is the iovec which contains the first little bit of data in which we
179 * are interested. l is the total length of everything prior to this iovec.
180 * j is the number of bytes we can safely copy out of this iovec.
181 * offset only applies to the first iovec.
184 while ((r > 0) && (i < packet->niovecs)) {
185 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
186 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
189 l += packet->wirevec[i].iov_len;
194 return (r ? (resid - r) : resid);
199 * all packet buffers (iov_base) are integral multiples of the
201 * offset is an integral multiple of the word size.
204 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
206 unsigned int i, j, l, o, r;
209 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
210 if (l + packet->wirevec[i].iov_len > o) {
213 l += packet->wirevec[i].iov_len;
216 /* i is the iovec which contains the first little bit of data in which we
217 * are interested. l is the total length of everything prior to this iovec.
218 * j is the number of bytes we can safely copy out of this iovec.
219 * offset only applies to the first iovec.
222 while ((r > 0) && (i <= RX_MAXWVECS)) {
223 if (i >= packet->niovecs)
224 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
227 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
228 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
232 l += packet->wirevec[i].iov_len;
237 return (r ? (resid - r) : resid);
241 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
243 struct rx_packet *p, *np;
245 num_pkts = AllocPacketBufs(class, num_pkts, q);
247 for (queue_Scan(q, p, np, rx_packet)) {
248 RX_PACKET_IOV_FULLINIT(p);
254 #ifdef RX_ENABLE_TSFPQ
256 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
258 struct rx_ts_info_t * rx_ts_info;
262 RX_TS_INFO_GET(rx_ts_info);
264 transfer = num_pkts - rx_ts_info->_FPQ.len;
267 MUTEX_ENTER(&rx_freePktQ_lock);
268 transfer = MAX(transfer, rx_TSFPQGlobSize);
269 if (transfer > rx_nFreePackets) {
270 /* alloc enough for us, plus a few globs for other threads */
271 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
274 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
276 MUTEX_EXIT(&rx_freePktQ_lock);
280 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
284 #else /* RX_ENABLE_TSFPQ */
286 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
297 MUTEX_ENTER(&rx_freePktQ_lock);
300 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
301 num_pkts--, overq++);
304 rxi_NeedMorePackets = TRUE;
305 if (rx_stats_active) {
307 case RX_PACKET_CLASS_RECEIVE:
308 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
310 case RX_PACKET_CLASS_SEND:
311 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
313 case RX_PACKET_CLASS_SPECIAL:
314 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
316 case RX_PACKET_CLASS_RECV_CBUF:
317 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
319 case RX_PACKET_CLASS_SEND_CBUF:
320 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
326 if (rx_nFreePackets < num_pkts)
327 num_pkts = rx_nFreePackets;
330 rxi_NeedMorePackets = TRUE;
334 if (rx_nFreePackets < num_pkts) {
335 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
339 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
341 i++, c=queue_Next(c, rx_packet)) {
345 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
347 rx_nFreePackets -= num_pkts;
352 MUTEX_EXIT(&rx_freePktQ_lock);
357 #endif /* RX_ENABLE_TSFPQ */
360 * Free a packet currently used as a continuation buffer
362 #ifdef RX_ENABLE_TSFPQ
363 /* num_pkts=0 means queue length is unknown */
365 rxi_FreePackets(int num_pkts, struct rx_queue * q)
367 struct rx_ts_info_t * rx_ts_info;
368 struct rx_packet *c, *nc;
371 osi_Assert(num_pkts >= 0);
372 RX_TS_INFO_GET(rx_ts_info);
375 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
376 rxi_FreeDataBufsTSFPQ(c, 2, 0);
379 for (queue_Scan(q, c, nc, rx_packet)) {
380 rxi_FreeDataBufsTSFPQ(c, 2, 0);
385 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
388 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
390 MUTEX_ENTER(&rx_freePktQ_lock);
392 RX_TS_FPQ_LTOG(rx_ts_info);
394 /* Wakeup anyone waiting for packets */
397 MUTEX_EXIT(&rx_freePktQ_lock);
403 #else /* RX_ENABLE_TSFPQ */
404 /* num_pkts=0 means queue length is unknown */
406 rxi_FreePackets(int num_pkts, struct rx_queue *q)
409 struct rx_packet *p, *np;
413 osi_Assert(num_pkts >= 0);
417 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
418 if (p->niovecs > 2) {
419 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
426 for (queue_Scan(q, p, np, rx_packet)) {
427 if (p->niovecs > 2) {
428 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
435 queue_SpliceAppend(q, &cbs);
441 MUTEX_ENTER(&rx_freePktQ_lock);
443 queue_SpliceAppend(&rx_freePacketQueue, q);
444 rx_nFreePackets += qlen;
446 /* Wakeup anyone waiting for packets */
449 MUTEX_EXIT(&rx_freePktQ_lock);
454 #endif /* RX_ENABLE_TSFPQ */
456 /* this one is kind of awful.
457 * In rxkad, the packet has been all shortened, and everything, ready for
458 * sending. All of a sudden, we discover we need some of that space back.
459 * This isn't terribly general, because it knows that the packets are only
460 * rounded up to the EBS (userdata + security header).
463 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
467 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
468 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
469 p->wirevec[i].iov_len += nb;
473 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
474 p->wirevec[i].iov_len += nb;
482 /* get sufficient space to store nb bytes of data (or more), and hook
483 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
484 * returns the number of bytes >0 which it failed to come up with.
485 * Don't need to worry about locking on packet, since only
486 * one thread can manipulate one at a time. Locking on continution
487 * packets is handled by AllocPacketBufs */
488 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
490 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
494 struct rx_packet *cb, *ncb;
496 /* compute the number of cbuf's we need */
497 nv = nb / RX_CBUFFERSIZE;
498 if ((nv * RX_CBUFFERSIZE) < nb)
500 if ((nv + p->niovecs) > RX_MAXWVECS)
501 nv = RX_MAXWVECS - p->niovecs;
505 /* allocate buffers */
507 nv = AllocPacketBufs(class, nv, &q);
509 /* setup packet iovs */
510 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
512 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
513 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
516 nb -= (nv * RX_CBUFFERSIZE);
517 p->length += (nv * RX_CBUFFERSIZE);
523 /* Add more packet buffers */
524 #ifdef RX_ENABLE_TSFPQ
526 rxi_MorePackets(int apackets)
528 struct rx_packet *p, *e;
529 struct rx_ts_info_t * rx_ts_info;
533 getme = apackets * sizeof(struct rx_packet);
534 p = osi_Alloc(getme);
537 PIN(p, getme); /* XXXXX */
539 RX_TS_INFO_GET(rx_ts_info);
541 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
542 /* TSFPQ patch also needs to keep track of total packets */
544 MUTEX_ENTER(&rx_packets_mutex);
545 rx_nPackets += apackets;
546 RX_TS_FPQ_COMPUTE_LIMITS;
547 MUTEX_EXIT(&rx_packets_mutex);
549 for (e = p + apackets; p < e; p++) {
550 RX_PACKET_IOV_INIT(p);
553 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
556 MUTEX_ENTER(&rx_freePktQ_lock);
557 #ifdef RXDEBUG_PACKET
558 p->packetId = rx_packet_id++;
559 p->allNextp = rx_mallocedP;
560 #endif /* RXDEBUG_PACKET */
562 MUTEX_EXIT(&rx_freePktQ_lock);
565 rx_ts_info->_FPQ.delta += apackets;
567 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
569 MUTEX_ENTER(&rx_freePktQ_lock);
571 RX_TS_FPQ_LTOG(rx_ts_info);
572 rxi_NeedMorePackets = FALSE;
575 MUTEX_EXIT(&rx_freePktQ_lock);
579 #else /* RX_ENABLE_TSFPQ */
581 rxi_MorePackets(int apackets)
583 struct rx_packet *p, *e;
587 getme = apackets * sizeof(struct rx_packet);
588 p = osi_Alloc(getme);
591 PIN(p, getme); /* XXXXX */
594 MUTEX_ENTER(&rx_freePktQ_lock);
596 for (e = p + apackets; p < e; p++) {
597 RX_PACKET_IOV_INIT(p);
598 #ifdef RX_TRACK_PACKETS
599 p->flags |= RX_PKTFLAG_FREE;
603 queue_Append(&rx_freePacketQueue, p);
604 #ifdef RXDEBUG_PACKET
605 p->packetId = rx_packet_id++;
606 p->allNextp = rx_mallocedP;
607 #endif /* RXDEBUG_PACKET */
611 rx_nPackets += apackets;
612 rx_nFreePackets += apackets;
613 rxi_NeedMorePackets = FALSE;
616 MUTEX_EXIT(&rx_freePktQ_lock);
619 #endif /* RX_ENABLE_TSFPQ */
621 #ifdef RX_ENABLE_TSFPQ
623 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
625 struct rx_packet *p, *e;
626 struct rx_ts_info_t * rx_ts_info;
630 getme = apackets * sizeof(struct rx_packet);
631 p = osi_Alloc(getme);
633 PIN(p, getme); /* XXXXX */
635 RX_TS_INFO_GET(rx_ts_info);
637 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
638 /* TSFPQ patch also needs to keep track of total packets */
639 MUTEX_ENTER(&rx_packets_mutex);
640 rx_nPackets += apackets;
641 RX_TS_FPQ_COMPUTE_LIMITS;
642 MUTEX_EXIT(&rx_packets_mutex);
644 for (e = p + apackets; p < e; p++) {
645 RX_PACKET_IOV_INIT(p);
647 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
650 MUTEX_ENTER(&rx_freePktQ_lock);
651 #ifdef RXDEBUG_PACKET
652 p->packetId = rx_packet_id++;
653 p->allNextp = rx_mallocedP;
654 #endif /* RXDEBUG_PACKET */
656 MUTEX_EXIT(&rx_freePktQ_lock);
659 rx_ts_info->_FPQ.delta += apackets;
662 (num_keep_local < apackets)) {
664 MUTEX_ENTER(&rx_freePktQ_lock);
666 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
667 rxi_NeedMorePackets = FALSE;
670 MUTEX_EXIT(&rx_freePktQ_lock);
674 #endif /* RX_ENABLE_TSFPQ */
677 /* Add more packet buffers */
679 rxi_MorePacketsNoLock(int apackets)
681 #ifdef RX_ENABLE_TSFPQ
682 struct rx_ts_info_t * rx_ts_info;
683 #endif /* RX_ENABLE_TSFPQ */
684 struct rx_packet *p, *e;
687 /* allocate enough packets that 1/4 of the packets will be able
688 * to hold maximal amounts of data */
689 apackets += (apackets / 4)
690 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
692 getme = apackets * sizeof(struct rx_packet);
693 p = osi_Alloc(getme);
695 apackets -= apackets / 4;
696 osi_Assert(apackets > 0);
701 #ifdef RX_ENABLE_TSFPQ
702 RX_TS_INFO_GET(rx_ts_info);
703 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
704 #endif /* RX_ENABLE_TSFPQ */
706 for (e = p + apackets; p < e; p++) {
707 RX_PACKET_IOV_INIT(p);
708 #ifdef RX_TRACK_PACKETS
709 p->flags |= RX_PKTFLAG_FREE;
713 queue_Append(&rx_freePacketQueue, p);
714 #ifdef RXDEBUG_PACKET
715 p->packetId = rx_packet_id++;
716 p->allNextp = rx_mallocedP;
717 #endif /* RXDEBUG_PACKET */
721 rx_nFreePackets += apackets;
722 MUTEX_ENTER(&rx_packets_mutex);
723 rx_nPackets += apackets;
724 #ifdef RX_ENABLE_TSFPQ
725 RX_TS_FPQ_COMPUTE_LIMITS;
726 #endif /* RX_ENABLE_TSFPQ */
727 MUTEX_EXIT(&rx_packets_mutex);
728 rxi_NeedMorePackets = FALSE;
734 rxi_FreeAllPackets(void)
736 /* must be called at proper interrupt level, etcetera */
737 /* MTUXXX need to free all Packets */
738 osi_Free(rx_mallocedP,
739 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
740 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
743 #ifdef RX_ENABLE_TSFPQ
745 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
747 struct rx_ts_info_t * rx_ts_info;
751 RX_TS_INFO_GET(rx_ts_info);
753 if (num_keep_local != rx_ts_info->_FPQ.len) {
755 MUTEX_ENTER(&rx_freePktQ_lock);
756 if (num_keep_local < rx_ts_info->_FPQ.len) {
757 xfer = rx_ts_info->_FPQ.len - num_keep_local;
758 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
761 xfer = num_keep_local - rx_ts_info->_FPQ.len;
762 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
763 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
764 if (rx_nFreePackets < xfer) {
765 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
767 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
769 MUTEX_EXIT(&rx_freePktQ_lock);
775 rxi_FlushLocalPacketsTSFPQ(void)
777 rxi_AdjustLocalPacketsTSFPQ(0, 0);
779 #endif /* RX_ENABLE_TSFPQ */
781 /* Allocate more packets iff we need more continuation buffers */
782 /* In kernel, can't page in memory with interrupts disabled, so we
783 * don't use the event mechanism. */
785 rx_CheckPackets(void)
787 if (rxi_NeedMorePackets) {
788 rxi_MorePackets(rx_maxSendWindow);
792 /* In the packet freeing routine below, the assumption is that
793 we want all of the packets to be used equally frequently, so that we
794 don't get packet buffers paging out. It would be just as valid to
795 assume that we DO want them to page out if not many are being used.
796 In any event, we assume the former, and append the packets to the end
798 /* This explanation is bogus. The free list doesn't remain in any kind of
799 useful order for afs_int32: the packets in use get pretty much randomly scattered
800 across all the pages. In order to permit unused {packets,bufs} to page out, they
801 must be stored so that packets which are adjacent in memory are adjacent in the
802 free list. An array springs rapidly to mind.
805 /* Actually free the packet p. */
806 #ifndef RX_ENABLE_TSFPQ
808 rxi_FreePacketNoLock(struct rx_packet *p)
810 dpf(("Free %"AFS_PTR_FMT"\n", p));
814 queue_Append(&rx_freePacketQueue, p);
816 #endif /* RX_ENABLE_TSFPQ */
818 #ifdef RX_ENABLE_TSFPQ
820 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
822 struct rx_ts_info_t * rx_ts_info;
823 dpf(("Free %"AFS_PTR_FMT"\n", p));
825 RX_TS_INFO_GET(rx_ts_info);
826 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
828 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
830 MUTEX_ENTER(&rx_freePktQ_lock);
832 RX_TS_FPQ_LTOG(rx_ts_info);
834 /* Wakeup anyone waiting for packets */
837 MUTEX_EXIT(&rx_freePktQ_lock);
841 #endif /* RX_ENABLE_TSFPQ */
844 * free continuation buffers off a packet into a queue
846 * [IN] p -- packet from which continuation buffers will be freed
847 * [IN] first -- iovec offset of first continuation buffer to free
848 * [IN] q -- queue into which continuation buffers will be chained
851 * number of continuation buffers freed
853 #ifndef RX_ENABLE_TSFPQ
855 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
858 struct rx_packet * cb;
861 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
862 iov = &p->wirevec[first];
864 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
865 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
866 RX_FPQ_MARK_FREE(cb);
876 * free packet continuation buffers into the global free packet pool
878 * [IN] p -- packet from which to free continuation buffers
879 * [IN] first -- iovec offset of first continuation buffer to free
885 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
889 for (first = MAX(2, first); first < p->niovecs; first++) {
890 iov = &p->wirevec[first];
892 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
893 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
904 * free packet continuation buffers into the thread-local free pool
906 * [IN] p -- packet from which continuation buffers will be freed
907 * [IN] first -- iovec offset of first continuation buffer to free
908 * any value less than 2, the min number of iovecs,
909 * is treated as if it is 2.
910 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
911 * global free pool before returning
917 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
920 struct rx_ts_info_t * rx_ts_info;
922 RX_TS_INFO_GET(rx_ts_info);
924 for (first = MAX(2, first); first < p->niovecs; first++) {
925 iov = &p->wirevec[first];
927 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
928 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
933 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
935 MUTEX_ENTER(&rx_freePktQ_lock);
937 RX_TS_FPQ_LTOG(rx_ts_info);
939 /* Wakeup anyone waiting for packets */
942 MUTEX_EXIT(&rx_freePktQ_lock);
947 #endif /* RX_ENABLE_TSFPQ */
949 int rxi_nBadIovecs = 0;
951 /* rxi_RestoreDataBufs
953 * Restore the correct sizes to the iovecs. Called when reusing a packet
954 * for reading off the wire.
957 rxi_RestoreDataBufs(struct rx_packet *p)
962 RX_PACKET_IOV_INIT(p);
964 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
965 if (!iov->iov_base) {
970 iov->iov_len = RX_CBUFFERSIZE;
974 #ifdef RX_ENABLE_TSFPQ
976 rxi_TrimDataBufs(struct rx_packet *p, int first)
979 struct iovec *iov, *end;
980 struct rx_ts_info_t * rx_ts_info;
984 osi_Panic("TrimDataBufs 1: first must be 1");
986 /* Skip over continuation buffers containing message data */
987 iov = &p->wirevec[2];
988 end = iov + (p->niovecs - 2);
989 length = p->length - p->wirevec[1].iov_len;
990 for (; iov < end && length > 0; iov++) {
992 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
993 length -= iov->iov_len;
996 /* iov now points to the first empty data buffer. */
1000 RX_TS_INFO_GET(rx_ts_info);
1001 for (; iov < end; iov++) {
1003 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1004 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1007 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1009 MUTEX_ENTER(&rx_freePktQ_lock);
1011 RX_TS_FPQ_LTOG(rx_ts_info);
1012 rxi_PacketsUnWait();
1014 MUTEX_EXIT(&rx_freePktQ_lock);
1020 #else /* RX_ENABLE_TSFPQ */
1022 rxi_TrimDataBufs(struct rx_packet *p, int first)
1025 struct iovec *iov, *end;
1029 osi_Panic("TrimDataBufs 1: first must be 1");
1031 /* Skip over continuation buffers containing message data */
1032 iov = &p->wirevec[2];
1033 end = iov + (p->niovecs - 2);
1034 length = p->length - p->wirevec[1].iov_len;
1035 for (; iov < end && length > 0; iov++) {
1037 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1038 length -= iov->iov_len;
1041 /* iov now points to the first empty data buffer. */
1046 MUTEX_ENTER(&rx_freePktQ_lock);
1048 for (; iov < end; iov++) {
1050 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1051 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1054 rxi_PacketsUnWait();
1056 MUTEX_EXIT(&rx_freePktQ_lock);
1061 #endif /* RX_ENABLE_TSFPQ */
1063 /* Free the packet p. P is assumed not to be on any queue, i.e.
1064 * remove it yourself first if you call this routine. */
1065 #ifdef RX_ENABLE_TSFPQ
1067 rxi_FreePacket(struct rx_packet *p)
1069 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1070 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1072 #else /* RX_ENABLE_TSFPQ */
1074 rxi_FreePacket(struct rx_packet *p)
1079 MUTEX_ENTER(&rx_freePktQ_lock);
1081 rxi_FreeDataBufsNoLock(p, 2);
1082 rxi_FreePacketNoLock(p);
1083 /* Wakeup anyone waiting for packets */
1084 rxi_PacketsUnWait();
1086 MUTEX_EXIT(&rx_freePktQ_lock);
1089 #endif /* RX_ENABLE_TSFPQ */
1091 /* rxi_AllocPacket sets up p->length so it reflects the number of
1092 * bytes in the packet at this point, **not including** the header.
1093 * The header is absolutely necessary, besides, this is the way the
1094 * length field is usually used */
1095 #ifdef RX_ENABLE_TSFPQ
1096 static struct rx_packet *
1097 rxi_AllocPacketNoLock(int class)
1099 struct rx_packet *p;
1100 struct rx_ts_info_t * rx_ts_info;
1102 RX_TS_INFO_GET(rx_ts_info);
1105 if (rxi_OverQuota(class)) {
1106 rxi_NeedMorePackets = TRUE;
1107 if (rx_stats_active) {
1109 case RX_PACKET_CLASS_RECEIVE:
1110 rx_atomic_inc(rx_stats.receivePktAllocFailures);
1112 case RX_PACKET_CLASS_SEND:
1113 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1115 case RX_PACKET_CLASS_SPECIAL:
1116 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1118 case RX_PACKET_CLASS_RECV_CBUF:
1119 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1121 case RX_PACKET_CLASS_SEND_CBUF:
1122 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1126 return (struct rx_packet *)0;
1130 if (rx_stats_active)
1131 rx_atomic_inc(&rx_stats.packetRequests);
1132 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1135 if (queue_IsEmpty(&rx_freePacketQueue))
1136 osi_Panic("rxi_AllocPacket error");
1138 if (queue_IsEmpty(&rx_freePacketQueue))
1139 rxi_MorePacketsNoLock(rx_maxSendWindow);
1143 RX_TS_FPQ_GTOL(rx_ts_info);
1146 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1148 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1151 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1152 * order to truncate outbound packets. In the near future, may need
1153 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1155 RX_PACKET_IOV_FULLINIT(p);
1158 #else /* RX_ENABLE_TSFPQ */
1159 static struct rx_packet *
1160 rxi_AllocPacketNoLock(int class)
1162 struct rx_packet *p;
1165 if (rxi_OverQuota(class)) {
1166 rxi_NeedMorePackets = TRUE;
1167 if (rx_stats_active) {
1169 case RX_PACKET_CLASS_RECEIVE:
1170 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1172 case RX_PACKET_CLASS_SEND:
1173 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1175 case RX_PACKET_CLASS_SPECIAL:
1176 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1178 case RX_PACKET_CLASS_RECV_CBUF:
1179 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1181 case RX_PACKET_CLASS_SEND_CBUF:
1182 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1186 return (struct rx_packet *)0;
1190 if (rx_stats_active)
1191 rx_atomic_inc(&rx_stats.packetRequests);
1194 if (queue_IsEmpty(&rx_freePacketQueue))
1195 osi_Panic("rxi_AllocPacket error");
1197 if (queue_IsEmpty(&rx_freePacketQueue))
1198 rxi_MorePacketsNoLock(rx_maxSendWindow);
1202 p = queue_First(&rx_freePacketQueue, rx_packet);
1204 RX_FPQ_MARK_USED(p);
1206 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1209 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1210 * order to truncate outbound packets. In the near future, may need
1211 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1213 RX_PACKET_IOV_FULLINIT(p);
1216 #endif /* RX_ENABLE_TSFPQ */
1218 #ifdef RX_ENABLE_TSFPQ
1219 static struct rx_packet *
1220 rxi_AllocPacketTSFPQ(int class, int pull_global)
1222 struct rx_packet *p;
1223 struct rx_ts_info_t * rx_ts_info;
1225 RX_TS_INFO_GET(rx_ts_info);
1227 if (rx_stats_active)
1228 rx_atomic_inc(&rx_stats.packetRequests);
1229 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1230 MUTEX_ENTER(&rx_freePktQ_lock);
1232 if (queue_IsEmpty(&rx_freePacketQueue))
1233 rxi_MorePacketsNoLock(rx_maxSendWindow);
1235 RX_TS_FPQ_GTOL(rx_ts_info);
1237 MUTEX_EXIT(&rx_freePktQ_lock);
1238 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1242 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1244 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1246 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1247 * order to truncate outbound packets. In the near future, may need
1248 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1250 RX_PACKET_IOV_FULLINIT(p);
1253 #endif /* RX_ENABLE_TSFPQ */
1255 #ifdef RX_ENABLE_TSFPQ
1257 rxi_AllocPacket(int class)
1259 struct rx_packet *p;
1261 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1264 #else /* RX_ENABLE_TSFPQ */
1266 rxi_AllocPacket(int class)
1268 struct rx_packet *p;
1270 MUTEX_ENTER(&rx_freePktQ_lock);
1271 p = rxi_AllocPacketNoLock(class);
1272 MUTEX_EXIT(&rx_freePktQ_lock);
1275 #endif /* RX_ENABLE_TSFPQ */
1277 /* This guy comes up with as many buffers as it {takes,can get} given
1278 * the MTU for this call. It also sets the packet length before
1279 * returning. caution: this is often called at NETPRI
1280 * Called with call locked.
1283 rxi_AllocSendPacket(struct rx_call *call, int want)
1285 struct rx_packet *p = (struct rx_packet *)0;
1290 mud = call->MTU - RX_HEADER_SIZE;
1292 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1293 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1295 #ifdef RX_ENABLE_TSFPQ
1296 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1298 want = MIN(want, mud);
1300 if ((unsigned)want > p->length)
1301 (void)rxi_AllocDataBuf(p, (want - p->length),
1302 RX_PACKET_CLASS_SEND_CBUF);
1304 if (p->length > mud)
1307 if (delta >= p->length) {
1315 #endif /* RX_ENABLE_TSFPQ */
1317 while (!(call->error)) {
1318 MUTEX_ENTER(&rx_freePktQ_lock);
1319 /* if an error occurred, or we get the packet we want, we're done */
1320 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1321 MUTEX_EXIT(&rx_freePktQ_lock);
1324 want = MIN(want, mud);
1326 if ((unsigned)want > p->length)
1327 (void)rxi_AllocDataBuf(p, (want - p->length),
1328 RX_PACKET_CLASS_SEND_CBUF);
1330 if (p->length > mud)
1333 if (delta >= p->length) {
1342 /* no error occurred, and we didn't get a packet, so we sleep.
1343 * At this point, we assume that packets will be returned
1344 * sooner or later, as packets are acknowledged, and so we
1347 call->flags |= RX_CALL_WAIT_PACKETS;
1348 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1349 MUTEX_EXIT(&call->lock);
1350 rx_waitingForPackets = 1;
1352 #ifdef RX_ENABLE_LOCKS
1353 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1355 osi_rxSleep(&rx_waitingForPackets);
1357 MUTEX_EXIT(&rx_freePktQ_lock);
1358 MUTEX_ENTER(&call->lock);
1359 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1360 call->flags &= ~RX_CALL_WAIT_PACKETS;
1369 /* Windows does not use file descriptors. */
1370 #define CountFDs(amax) 0
1372 /* count the number of used FDs */
1381 for (i = 0; i < amax; i++) {
1382 code = fstat(i, &tstat);
1388 #endif /* AFS_NT40_ENV */
1391 #define CountFDs(amax) amax
1395 #if !defined(KERNEL) || defined(UKERNEL)
1397 /* This function reads a single packet from the interface into the
1398 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1399 * (host,port) of the sender are stored in the supplied variables, and
1400 * the data length of the packet is stored in the packet structure.
1401 * The header is decoded. */
1403 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1406 struct sockaddr_in from;
1409 afs_uint32 tlen, savelen;
1411 rx_computelen(p, tlen);
1412 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1414 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1415 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1416 * it once in order to avoid races. */
1419 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1427 /* Extend the last iovec for padding, it's just to make sure that the
1428 * read doesn't return more data than we expect, and is done to get around
1429 * our problems caused by the lack of a length field in the rx header.
1430 * Use the extra buffer that follows the localdata in each packet
1432 savelen = p->wirevec[p->niovecs - 1].iov_len;
1433 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1435 memset(&msg, 0, sizeof(msg));
1436 msg.msg_name = (char *)&from;
1437 msg.msg_namelen = sizeof(struct sockaddr_in);
1438 msg.msg_iov = p->wirevec;
1439 msg.msg_iovlen = p->niovecs;
1440 nbytes = rxi_Recvmsg(socket, &msg, 0);
1442 /* restore the vec to its correct state */
1443 p->wirevec[p->niovecs - 1].iov_len = savelen;
1445 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1446 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1447 if (nbytes < 0 && errno == EWOULDBLOCK) {
1448 if (rx_stats_active)
1449 rx_atomic_inc(&rx_stats.noPacketOnRead);
1450 } else if (nbytes <= 0) {
1451 if (rx_stats_active) {
1452 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1453 rx_stats.bogusHost = from.sin_addr.s_addr;
1455 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1456 ntohs(from.sin_port), nbytes));
1461 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1462 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1463 rxi_DecodePacketHeader(p);
1465 *host = from.sin_addr.s_addr;
1466 *port = from.sin_port;
1468 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1469 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1470 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1472 #ifdef RX_TRIMDATABUFS
1473 rxi_TrimDataBufs(p, 1);
1479 /* Extract packet header. */
1480 rxi_DecodePacketHeader(p);
1482 *host = from.sin_addr.s_addr;
1483 *port = from.sin_port;
1485 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1487 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1490 #ifdef RX_TRIMDATABUFS
1491 /* Free any empty packet buffers at the end of this packet */
1492 rxi_TrimDataBufs(p, 1);
1498 #endif /* !KERNEL || UKERNEL */
1500 /* This function splits off the first packet in a jumbo packet.
1501 * As of AFS 3.5, jumbograms contain more than one fixed size
1502 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1503 * last packet header. All packets (except the last) are padded to
1504 * fall on RX_CBUFFERSIZE boundaries.
1505 * HACK: We store the length of the first n-1 packets in the
1506 * last two pad bytes. */
1509 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1512 struct rx_packet *np;
1513 struct rx_jumboHeader *jp;
1519 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1520 * bytes in length. All but the first packet are preceded by
1521 * an abbreviated four byte header. The length of the last packet
1522 * is calculated from the size of the jumbogram. */
1523 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1525 if ((int)p->length < length) {
1526 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1529 niov = p->niovecs - 2;
1531 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1534 iov = &p->wirevec[2];
1535 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1537 /* Get a pointer to the abbreviated packet header */
1538 jp = (struct rx_jumboHeader *)
1539 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1541 /* Set up the iovecs for the next packet */
1542 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1543 np->wirevec[0].iov_len = sizeof(struct rx_header);
1544 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1545 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1546 np->niovecs = niov + 1;
1547 for (i = 2, iov++; i <= niov; i++, iov++) {
1548 np->wirevec[i] = *iov;
1550 np->length = p->length - length;
1551 p->length = RX_JUMBOBUFFERSIZE;
1554 /* Convert the jumbo packet header to host byte order */
1555 temp = ntohl(*(afs_uint32 *) jp);
1556 jp->flags = (u_char) (temp >> 24);
1557 jp->cksum = (u_short) (temp);
1559 /* Fill in the packet header */
1560 np->header = p->header;
1561 np->header.serial = p->header.serial + 1;
1562 np->header.seq = p->header.seq + 1;
1563 np->header.flags = jp->flags;
1564 np->header.spare = jp->cksum;
1570 /* Send a udp datagram */
1572 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1573 int length, int istack)
1578 memset(&msg, 0, sizeof(msg));
1580 msg.msg_iovlen = nvecs;
1581 msg.msg_name = addr;
1582 msg.msg_namelen = sizeof(struct sockaddr_in);
1584 ret = rxi_Sendmsg(socket, &msg, 0);
1588 #elif !defined(UKERNEL)
1590 * message receipt is done in rxk_input or rx_put.
1593 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1595 * Copy an mblock to the contiguous area pointed to by cp.
1596 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1597 * but it doesn't really.
1598 * Returns the number of bytes not transferred.
1599 * The message is NOT changed.
1602 cpytoc(mblk_t * mp, int off, int len, char *cp)
1606 for (; mp && len > 0; mp = mp->b_cont) {
1607 if (mp->b_datap->db_type != M_DATA) {
1610 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1611 memcpy(cp, (char *)mp->b_rptr, n);
1619 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1620 * but it doesn't really.
1621 * This sucks, anyway, do it like m_cpy.... below
1624 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1629 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1630 if (mp->b_datap->db_type != M_DATA) {
1633 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1639 t = iovs[i].iov_len;
1642 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1652 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1653 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1655 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1657 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1660 unsigned int l1, l2, i, t;
1662 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1663 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1666 if (m->m_len <= off) {
1676 p1 = mtod(m, caddr_t) + off;
1677 l1 = m->m_len - off;
1679 p2 = iovs[0].iov_base;
1680 l2 = iovs[0].iov_len;
1683 t = MIN(l1, MIN(l2, (unsigned int)len));
1694 p1 = mtod(m, caddr_t);
1700 p2 = iovs[i].iov_base;
1701 l2 = iovs[i].iov_len;
1709 #endif /* AFS_SUN5_ENV */
1711 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1712 #if defined(AFS_NBSD_ENV)
1714 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1717 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1718 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1724 struct rx_packet *phandle;
1725 int hdr_len, data_len;
1726 #endif /* AFS_NBSD_ENV */
1731 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1738 #endif /*KERNEL && !UKERNEL */
1741 /* send a response to a debug packet */
1744 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1745 afs_uint32 ahost, short aport, int istack)
1747 struct rx_debugIn tin;
1749 struct rx_serverQueueEntry *np, *nqe;
1752 * Only respond to client-initiated Rx debug packets,
1753 * and clear the client flag in the response.
1755 if (ap->header.flags & RX_CLIENT_INITIATED) {
1756 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1757 rxi_EncodePacketHeader(ap);
1762 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1763 /* all done with packet, now set length to the truth, so we can
1764 * reuse this packet */
1765 rx_computelen(ap, ap->length);
1767 tin.type = ntohl(tin.type);
1768 tin.index = ntohl(tin.index);
1770 case RX_DEBUGI_GETSTATS:{
1771 struct rx_debugStats tstat;
1773 /* get basic stats */
1774 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1775 tstat.version = RX_DEBUGI_VERSION;
1776 #ifndef RX_ENABLE_LOCKS
1777 tstat.waitingForPackets = rx_waitingForPackets;
1779 MUTEX_ENTER(&rx_serverPool_lock);
1780 tstat.nFreePackets = htonl(rx_nFreePackets);
1781 tstat.nPackets = htonl(rx_nPackets);
1782 tstat.callsExecuted = htonl(rxi_nCalls);
1783 tstat.packetReclaims = htonl(rx_packetReclaims);
1784 tstat.usedFDs = CountFDs(64);
1785 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1786 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1787 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1789 MUTEX_EXIT(&rx_serverPool_lock);
1790 tstat.idleThreads = htonl(tstat.idleThreads);
1791 tl = sizeof(struct rx_debugStats) - ap->length;
1793 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1796 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1798 ap->length = sizeof(struct rx_debugStats);
1799 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1800 rx_computelen(ap, ap->length);
1805 case RX_DEBUGI_GETALLCONN:
1806 case RX_DEBUGI_GETCONN:{
1808 struct rx_connection *tc;
1809 struct rx_call *tcall;
1810 struct rx_debugConn tconn;
1811 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1814 tl = sizeof(struct rx_debugConn) - ap->length;
1816 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1820 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1821 /* get N'th (maybe) "interesting" connection info */
1822 for (i = 0; i < rx_hashTableSize; i++) {
1823 #if !defined(KERNEL)
1824 /* the time complexity of the algorithm used here
1825 * exponentially increses with the number of connections.
1827 #ifdef AFS_PTHREAD_ENV
1833 MUTEX_ENTER(&rx_connHashTable_lock);
1834 /* We might be slightly out of step since we are not
1835 * locking each call, but this is only debugging output.
1837 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1838 if ((all || rxi_IsConnInteresting(tc))
1839 && tin.index-- <= 0) {
1840 tconn.host = tc->peer->host;
1841 tconn.port = tc->peer->port;
1842 tconn.cid = htonl(tc->cid);
1843 tconn.epoch = htonl(tc->epoch);
1844 tconn.serial = htonl(tc->serial);
1845 for (j = 0; j < RX_MAXCALLS; j++) {
1846 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1847 if ((tcall = tc->call[j])) {
1848 tconn.callState[j] = tcall->state;
1849 tconn.callMode[j] = tcall->mode;
1850 tconn.callFlags[j] = tcall->flags;
1851 if (queue_IsNotEmpty(&tcall->rq))
1852 tconn.callOther[j] |= RX_OTHER_IN;
1853 if (queue_IsNotEmpty(&tcall->tq))
1854 tconn.callOther[j] |= RX_OTHER_OUT;
1856 tconn.callState[j] = RX_STATE_NOTINIT;
1859 tconn.natMTU = htonl(tc->peer->natMTU);
1860 tconn.error = htonl(tc->error);
1861 tconn.flags = tc->flags;
1862 tconn.type = tc->type;
1863 tconn.securityIndex = tc->securityIndex;
1864 if (tc->securityObject) {
1865 RXS_GetStats(tc->securityObject, tc,
1867 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1868 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1871 DOHTONL(packetsReceived);
1872 DOHTONL(packetsSent);
1873 DOHTONL(bytesReceived);
1877 sizeof(tconn.secStats.spares) /
1882 sizeof(tconn.secStats.sparel) /
1883 sizeof(afs_int32); i++)
1887 MUTEX_EXIT(&rx_connHashTable_lock);
1888 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1891 ap->length = sizeof(struct rx_debugConn);
1892 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1898 MUTEX_EXIT(&rx_connHashTable_lock);
1900 /* if we make it here, there are no interesting packets */
1901 tconn.cid = htonl(0xffffffff); /* means end */
1902 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1905 ap->length = sizeof(struct rx_debugConn);
1906 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1912 * Pass back all the peer structures we have available
1915 case RX_DEBUGI_GETPEER:{
1918 struct rx_debugPeer tpeer;
1921 tl = sizeof(struct rx_debugPeer) - ap->length;
1923 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1927 memset(&tpeer, 0, sizeof(tpeer));
1928 for (i = 0; i < rx_hashTableSize; i++) {
1929 #if !defined(KERNEL)
1930 /* the time complexity of the algorithm used here
1931 * exponentially increses with the number of peers.
1933 * Yielding after processing each hash table entry
1934 * and dropping rx_peerHashTable_lock.
1935 * also increases the risk that we will miss a new
1936 * entry - but we are willing to live with this
1937 * limitation since this is meant for debugging only
1939 #ifdef AFS_PTHREAD_ENV
1945 MUTEX_ENTER(&rx_peerHashTable_lock);
1946 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1947 if (tin.index-- <= 0) {
1949 MUTEX_EXIT(&rx_peerHashTable_lock);
1951 MUTEX_ENTER(&tp->peer_lock);
1952 tpeer.host = tp->host;
1953 tpeer.port = tp->port;
1954 tpeer.ifMTU = htons(tp->ifMTU);
1955 tpeer.idleWhen = htonl(tp->idleWhen);
1956 tpeer.refCount = htons(tp->refCount);
1957 tpeer.burstSize = 0;
1959 tpeer.burstWait.sec = 0;
1960 tpeer.burstWait.usec = 0;
1961 tpeer.rtt = htonl(tp->rtt);
1962 tpeer.rtt_dev = htonl(tp->rtt_dev);
1963 tpeer.nSent = htonl(tp->nSent);
1964 tpeer.reSends = htonl(tp->reSends);
1965 tpeer.natMTU = htons(tp->natMTU);
1966 tpeer.maxMTU = htons(tp->maxMTU);
1967 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1968 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1969 tpeer.MTU = htons(tp->MTU);
1970 tpeer.cwind = htons(tp->cwind);
1971 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1972 tpeer.congestSeq = htons(tp->congestSeq);
1973 tpeer.bytesSent.high =
1974 htonl(tp->bytesSent >> 32);
1975 tpeer.bytesSent.low =
1976 htonl(tp->bytesSent & MAX_AFS_UINT32);
1977 tpeer.bytesReceived.high =
1978 htonl(tp->bytesReceived >> 32);
1979 tpeer.bytesReceived.low =
1980 htonl(tp->bytesReceived & MAX_AFS_UINT32);
1981 MUTEX_EXIT(&tp->peer_lock);
1983 MUTEX_ENTER(&rx_peerHashTable_lock);
1985 MUTEX_EXIT(&rx_peerHashTable_lock);
1987 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1990 ap->length = sizeof(struct rx_debugPeer);
1991 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1997 MUTEX_EXIT(&rx_peerHashTable_lock);
1999 /* if we make it here, there are no interesting packets */
2000 tpeer.host = htonl(0xffffffff); /* means end */
2001 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2004 ap->length = sizeof(struct rx_debugPeer);
2005 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2010 case RX_DEBUGI_RXSTATS:{
2014 tl = sizeof(rx_stats) - ap->length;
2016 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2020 /* Since its all int32s convert to network order with a loop. */
2021 if (rx_stats_active)
2022 MUTEX_ENTER(&rx_stats_mutex);
2023 s = (afs_int32 *) & rx_stats;
2024 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2025 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2028 ap->length = sizeof(rx_stats);
2029 if (rx_stats_active)
2030 MUTEX_EXIT(&rx_stats_mutex);
2031 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2037 /* error response packet */
2038 tin.type = htonl(RX_DEBUGI_BADTYPE);
2039 tin.index = tin.type;
2040 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2042 ap->length = sizeof(struct rx_debugIn);
2043 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2051 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2052 afs_uint32 ahost, short aport, int istack)
2057 * Only respond to client-initiated version requests, and
2058 * clear that flag in the response.
2060 if (ap->header.flags & RX_CLIENT_INITIATED) {
2063 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2064 rxi_EncodePacketHeader(ap);
2065 memset(buf, 0, sizeof(buf));
2066 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2067 rx_packetwrite(ap, 0, 65, buf);
2070 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2078 /* send a debug packet back to the sender */
2080 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2081 afs_uint32 ahost, short aport, afs_int32 istack)
2083 struct sockaddr_in taddr;
2084 unsigned int i, nbytes, savelen = 0;
2087 int waslocked = ISAFS_GLOCK();
2090 taddr.sin_family = AF_INET;
2091 taddr.sin_port = aport;
2092 taddr.sin_addr.s_addr = ahost;
2093 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2094 taddr.sin_len = sizeof(struct sockaddr_in);
2097 /* We need to trim the niovecs. */
2098 nbytes = apacket->length;
2099 for (i = 1; i < apacket->niovecs; i++) {
2100 if (nbytes <= apacket->wirevec[i].iov_len) {
2101 savelen = apacket->wirevec[i].iov_len;
2102 saven = apacket->niovecs;
2103 apacket->wirevec[i].iov_len = nbytes;
2104 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2106 nbytes -= apacket->wirevec[i].iov_len;
2109 #ifdef RX_KERNEL_TRACE
2110 if (ICL_SETACTIVE(afs_iclSetp)) {
2113 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2114 "before osi_NetSend()");
2122 /* debug packets are not reliably delivered, hence the cast below. */
2123 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2124 apacket->length + RX_HEADER_SIZE, istack);
2126 #ifdef RX_KERNEL_TRACE
2127 if (ICL_SETACTIVE(afs_iclSetp)) {
2129 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2130 "after osi_NetSend()");
2139 if (saven) { /* means we truncated the packet above. */
2140 apacket->wirevec[i - 1].iov_len = savelen;
2141 apacket->niovecs = saven;
2146 /* Send the packet to appropriate destination for the specified
2147 * call. The header is first encoded and placed in the packet.
2150 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2151 struct rx_packet *p, int istack)
2157 struct sockaddr_in addr;
2158 struct rx_peer *peer = conn->peer;
2161 char deliveryType = 'S';
2163 /* The address we're sending the packet to */
2164 memset(&addr, 0, sizeof(addr));
2165 addr.sin_family = AF_INET;
2166 addr.sin_port = peer->port;
2167 addr.sin_addr.s_addr = peer->host;
2169 /* This stuff should be revamped, I think, so that most, if not
2170 * all, of the header stuff is always added here. We could
2171 * probably do away with the encode/decode routines. XXXXX */
2173 /* Stamp each packet with a unique serial number. The serial
2174 * number is maintained on a connection basis because some types
2175 * of security may be based on the serial number of the packet,
2176 * and security is handled on a per authenticated-connection
2178 /* Pre-increment, to guarantee no zero serial number; a zero
2179 * serial number means the packet was never sent. */
2180 MUTEX_ENTER(&conn->conn_data_lock);
2181 p->header.serial = ++conn->serial;
2182 if (p->length > conn->peer->maxPacketSize) {
2183 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2184 (p->header.flags & RX_REQUEST_ACK)) {
2185 conn->lastPingSize = p->length;
2186 conn->lastPingSizeSer = p->header.serial;
2187 } else if (p->header.seq != 0) {
2188 conn->lastPacketSize = p->length;
2189 conn->lastPacketSizeSeq = p->header.seq;
2192 MUTEX_EXIT(&conn->conn_data_lock);
2193 /* This is so we can adjust retransmit time-outs better in the face of
2194 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2196 if (p->firstSerial == 0) {
2197 p->firstSerial = p->header.serial;
2200 /* If an output tracer function is defined, call it with the packet and
2201 * network address. Note this function may modify its arguments. */
2202 if (rx_almostSent) {
2203 int drop = (*rx_almostSent) (p, &addr);
2204 /* drop packet if return value is non-zero? */
2206 deliveryType = 'D'; /* Drop the packet */
2210 /* Get network byte order header */
2211 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2212 * touch ALL the fields */
2214 /* Send the packet out on the same socket that related packets are being
2218 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2221 /* Possibly drop this packet, for testing purposes */
2222 if ((deliveryType == 'D')
2223 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2224 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2225 deliveryType = 'D'; /* Drop the packet */
2227 deliveryType = 'S'; /* Send the packet */
2228 #endif /* RXDEBUG */
2230 /* Loop until the packet is sent. We'd prefer just to use a
2231 * blocking socket, but unfortunately the interface doesn't
2232 * allow us to have the socket block in send mode, and not
2233 * block in receive mode */
2235 waslocked = ISAFS_GLOCK();
2236 #ifdef RX_KERNEL_TRACE
2237 if (ICL_SETACTIVE(afs_iclSetp)) {
2240 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2241 "before osi_NetSend()");
2250 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2251 p->length + RX_HEADER_SIZE, istack)) != 0) {
2252 /* send failed, so let's hurry up the resend, eh? */
2253 if (rx_stats_active)
2254 rx_atomic_inc(&rx_stats.netSendFailures);
2255 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2257 /* Some systems are nice and tell us right away that we cannot
2258 * reach this recipient by returning an error code.
2259 * So, when this happens let's "down" the host NOW so
2260 * we don't sit around waiting for this host to timeout later.
2264 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2265 #elif defined(AFS_LINUX20_ENV)
2266 code == -ENETUNREACH
2267 #elif defined(AFS_DARWIN_ENV)
2268 code == EHOSTUNREACH
2273 call->lastReceiveTime = 0;
2276 #ifdef RX_KERNEL_TRACE
2277 if (ICL_SETACTIVE(afs_iclSetp)) {
2279 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2280 "after osi_NetSend()");
2291 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2292 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2293 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2294 p->header.seq, p->header.flags, p, p->length));
2296 if (rx_stats_active) {
2297 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2298 MUTEX_ENTER(&peer->peer_lock);
2299 peer->bytesSent += p->length;
2300 MUTEX_EXIT(&peer->peer_lock);
2304 /* Send a list of packets to appropriate destination for the specified
2305 * connection. The headers are first encoded and placed in the packets.
2308 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2309 struct rx_packet **list, int len, int istack)
2311 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2314 struct sockaddr_in addr;
2315 struct rx_peer *peer = conn->peer;
2317 struct rx_packet *p = NULL;
2318 struct iovec wirevec[RX_MAXIOVECS];
2319 int i, length, code;
2322 struct rx_jumboHeader *jp;
2324 char deliveryType = 'S';
2326 /* The address we're sending the packet to */
2327 addr.sin_family = AF_INET;
2328 addr.sin_port = peer->port;
2329 addr.sin_addr.s_addr = peer->host;
2331 if (len + 1 > RX_MAXIOVECS) {
2332 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2336 * Stamp the packets in this jumbogram with consecutive serial numbers
2338 MUTEX_ENTER(&conn->conn_data_lock);
2339 serial = conn->serial;
2340 conn->serial += len;
2341 for (i = 0; i < len; i++) {
2343 if (p->length > conn->peer->maxPacketSize) {
2344 /* a ping *or* a sequenced packet can count */
2345 if ((p->length > conn->peer->maxPacketSize)) {
2346 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2347 (p->header.flags & RX_REQUEST_ACK)) &&
2348 ((i == 0) || (p->length >= conn->lastPingSize))) {
2349 conn->lastPingSize = p->length;
2350 conn->lastPingSizeSer = serial + i;
2351 } else if ((p->header.seq != 0) &&
2352 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2353 conn->lastPacketSize = p->length;
2354 conn->lastPacketSizeSeq = p->header.seq;
2359 MUTEX_EXIT(&conn->conn_data_lock);
2362 /* This stuff should be revamped, I think, so that most, if not
2363 * all, of the header stuff is always added here. We could
2364 * probably do away with the encode/decode routines. XXXXX */
2367 length = RX_HEADER_SIZE;
2368 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2369 wirevec[0].iov_len = RX_HEADER_SIZE;
2370 for (i = 0; i < len; i++) {
2373 /* The whole 3.5 jumbogram scheme relies on packets fitting
2374 * in a single packet buffer. */
2375 if (p->niovecs > 2) {
2376 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2379 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2382 if (p->length != RX_JUMBOBUFFERSIZE) {
2383 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2385 p->header.flags |= RX_JUMBO_PACKET;
2386 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2387 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2389 wirevec[i + 1].iov_len = p->length;
2390 length += p->length;
2392 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2394 /* Convert jumbo packet header to network byte order */
2395 temp = (afs_uint32) (p->header.flags) << 24;
2396 temp |= (afs_uint32) (p->header.spare);
2397 *(afs_uint32 *) jp = htonl(temp);
2399 jp = (struct rx_jumboHeader *)
2400 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2402 /* Stamp each packet with a unique serial number. The serial
2403 * number is maintained on a connection basis because some types
2404 * of security may be based on the serial number of the packet,
2405 * and security is handled on a per authenticated-connection
2407 /* Pre-increment, to guarantee no zero serial number; a zero
2408 * serial number means the packet was never sent. */
2409 p->header.serial = ++serial;
2410 /* This is so we can adjust retransmit time-outs better in the face of
2411 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2413 if (p->firstSerial == 0) {
2414 p->firstSerial = p->header.serial;
2417 /* If an output tracer function is defined, call it with the packet and
2418 * network address. Note this function may modify its arguments. */
2419 if (rx_almostSent) {
2420 int drop = (*rx_almostSent) (p, &addr);
2421 /* drop packet if return value is non-zero? */
2423 deliveryType = 'D'; /* Drop the packet */
2427 /* Get network byte order header */
2428 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2429 * touch ALL the fields */
2432 /* Send the packet out on the same socket that related packets are being
2436 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2439 /* Possibly drop this packet, for testing purposes */
2440 if ((deliveryType == 'D')
2441 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2442 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2443 deliveryType = 'D'; /* Drop the packet */
2445 deliveryType = 'S'; /* Send the packet */
2446 #endif /* RXDEBUG */
2448 /* Loop until the packet is sent. We'd prefer just to use a
2449 * blocking socket, but unfortunately the interface doesn't
2450 * allow us to have the socket block in send mode, and not
2451 * block in receive mode */
2452 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2453 waslocked = ISAFS_GLOCK();
2454 if (!istack && waslocked)
2458 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2460 /* send failed, so let's hurry up the resend, eh? */
2461 if (rx_stats_active)
2462 rx_atomic_inc(&rx_stats.netSendFailures);
2463 for (i = 0; i < len; i++) {
2465 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2467 /* Some systems are nice and tell us right away that we cannot
2468 * reach this recipient by returning an error code.
2469 * So, when this happens let's "down" the host NOW so
2470 * we don't sit around waiting for this host to timeout later.
2474 (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) || (code == -WSAEHOSTUNREACH)
2475 #elif defined(AFS_LINUX20_ENV)
2476 code == -ENETUNREACH
2477 #elif defined(AFS_DARWIN_ENV)
2478 code == EHOSTUNREACH
2483 call->lastReceiveTime = 0;
2485 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2486 if (!istack && waslocked)
2492 osi_Assert(p != NULL);
2494 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2495 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2496 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2497 p->header.seq, p->header.flags, p, p->length));
2500 if (rx_stats_active) {
2501 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2502 MUTEX_ENTER(&peer->peer_lock);
2503 peer->bytesSent += p->length;
2504 MUTEX_EXIT(&peer->peer_lock);
2508 /* Send a raw abort packet, without any call or connection structures */
2510 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2511 afs_int32 error, struct rx_packet *source, int istack)
2513 struct rx_header theader;
2514 struct sockaddr_in addr;
2515 struct iovec iov[2];
2517 memset(&theader, 0, sizeof(theader));
2518 theader.epoch = htonl(source->header.epoch);
2519 theader.callNumber = htonl(source->header.callNumber);
2520 theader.serial = htonl(1);
2521 theader.type = RX_PACKET_TYPE_ABORT;
2522 theader.serviceId = htons(source->header.serviceId);
2523 theader.securityIndex = source->header.securityIndex;
2524 theader.cid = htonl(source->header.cid);
2526 error = htonl(error);
2528 iov[0].iov_base = &theader;
2529 iov[0].iov_len = sizeof(struct rx_header);
2530 iov[1].iov_base = &error;
2531 iov[1].iov_len = sizeof(error);
2533 addr.sin_family = AF_INET;
2534 addr.sin_addr.s_addr = host;
2535 addr.sin_port = port;
2536 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2537 addr.sin_len = sizeof(struct sockaddr_in);
2540 osi_NetSend(socket, &addr, iov, 2,
2541 sizeof(struct rx_header) + sizeof(error), istack);
2544 /* Send a "special" packet to the peer connection. If call is
2545 * specified, then the packet is directed to a specific call channel
2546 * associated with the connection, otherwise it is directed to the
2547 * connection only. Uses optionalPacket if it is supplied, rather than
2548 * allocating a new packet buffer. Nbytes is the length of the data
2549 * portion of the packet. If data is non-null, nbytes of data are
2550 * copied into the packet. Type is the type of the packet, as defined
2551 * in rx.h. Bug: there's a lot of duplication between this and other
2552 * routines. This needs to be cleaned up. */
2554 rxi_SendSpecial(struct rx_call *call,
2555 struct rx_connection *conn,
2556 struct rx_packet *optionalPacket, int type, char *data,
2557 int nbytes, int istack)
2559 /* Some of the following stuff should be common code for all
2560 * packet sends (it's repeated elsewhere) */
2561 struct rx_packet *p;
2563 int savelen = 0, saven = 0;
2564 int channel, callNumber;
2566 channel = call->channel;
2567 callNumber = *call->callNumber;
2568 /* BUSY packets refer to the next call on this connection */
2569 if (type == RX_PACKET_TYPE_BUSY) {
2578 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2580 osi_Panic("rxi_SendSpecial failure");
2587 p->header.serviceId = conn->serviceId;
2588 p->header.securityIndex = conn->securityIndex;
2589 p->header.cid = (conn->cid | channel);
2590 p->header.callNumber = callNumber;
2592 p->header.epoch = conn->epoch;
2593 p->header.type = type;
2594 p->header.flags = 0;
2595 if (conn->type == RX_CLIENT_CONNECTION)
2596 p->header.flags |= RX_CLIENT_INITIATED;
2598 rx_packetwrite(p, 0, nbytes, data);
2600 for (i = 1; i < p->niovecs; i++) {
2601 if (nbytes <= p->wirevec[i].iov_len) {
2602 savelen = p->wirevec[i].iov_len;
2604 p->wirevec[i].iov_len = nbytes;
2605 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2607 nbytes -= p->wirevec[i].iov_len;
2611 rxi_Send(call, p, istack);
2613 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2614 if (saven) { /* means we truncated the packet above. We probably don't */
2615 /* really need to do this, but it seems safer this way, given that */
2616 /* sneaky optionalPacket... */
2617 p->wirevec[i - 1].iov_len = savelen;
2620 if (!optionalPacket)
2622 return optionalPacket;
2626 /* Encode the packet's header (from the struct header in the packet to
2627 * the net byte order representation in the wire representation of the
2628 * packet, which is what is actually sent out on the wire) */
2630 rxi_EncodePacketHeader(struct rx_packet *p)
2632 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2634 memset(buf, 0, RX_HEADER_SIZE);
2635 *buf++ = htonl(p->header.epoch);
2636 *buf++ = htonl(p->header.cid);
2637 *buf++ = htonl(p->header.callNumber);
2638 *buf++ = htonl(p->header.seq);
2639 *buf++ = htonl(p->header.serial);
2640 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2641 | (((afs_uint32) p->header.flags) << 16)
2642 | (p->header.userStatus << 8) | p->header.securityIndex);
2643 /* Note: top 16 bits of this next word were reserved */
2644 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2647 /* Decode the packet's header (from net byte order to a struct header) */
2649 rxi_DecodePacketHeader(struct rx_packet *p)
2651 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2654 p->header.epoch = ntohl(*buf);
2656 p->header.cid = ntohl(*buf);
2658 p->header.callNumber = ntohl(*buf);
2660 p->header.seq = ntohl(*buf);
2662 p->header.serial = ntohl(*buf);
2668 /* C will truncate byte fields to bytes for me */
2669 p->header.type = temp >> 24;
2670 p->header.flags = temp >> 16;
2671 p->header.userStatus = temp >> 8;
2672 p->header.securityIndex = temp >> 0;
2677 p->header.serviceId = (temp & 0xffff);
2678 p->header.spare = temp >> 16;
2679 /* Note: top 16 bits of this last word are the security checksum */
2683 * LOCKS HELD: called with call->lock held.
2685 * PrepareSendPacket is the only place in the code that
2686 * can increment call->tnext. This could become an atomic
2687 * in the future. Beyond that there is nothing in this
2688 * function that requires the call being locked. This
2689 * function can only be called by the application thread.
2692 rxi_PrepareSendPacket(struct rx_call *call,
2693 struct rx_packet *p, int last)
2695 struct rx_connection *conn = call->conn;
2696 afs_uint32 seq = call->tnext++;
2698 afs_int32 len; /* len must be a signed type; it can go negative */
2700 /* No data packets on call 0. Where do these come from? */
2701 if (*call->callNumber == 0)
2702 *call->callNumber = 1;
2704 MUTEX_EXIT(&call->lock);
2705 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2707 p->header.cid = (conn->cid | call->channel);
2708 p->header.serviceId = conn->serviceId;
2709 p->header.securityIndex = conn->securityIndex;
2711 p->header.callNumber = *call->callNumber;
2712 p->header.seq = seq;
2713 p->header.epoch = conn->epoch;
2714 p->header.type = RX_PACKET_TYPE_DATA;
2715 p->header.flags = 0;
2716 p->header.spare = 0;
2717 if (conn->type == RX_CLIENT_CONNECTION)
2718 p->header.flags |= RX_CLIENT_INITIATED;
2721 p->header.flags |= RX_LAST_PACKET;
2723 clock_Zero(&p->firstSent); /* Never yet transmitted */
2724 p->header.serial = 0; /* Another way of saying never transmitted... */
2726 /* Now that we're sure this is the last data on the call, make sure
2727 * that the "length" and the sum of the iov_lens matches. */
2728 len = p->length + call->conn->securityHeaderSize;
2730 for (i = 1; i < p->niovecs && len > 0; i++) {
2731 len -= p->wirevec[i].iov_len;
2734 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2735 } else if (i < p->niovecs) {
2736 /* Free any extra elements in the wirevec */
2737 #if defined(RX_ENABLE_TSFPQ)
2738 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2739 #else /* !RX_ENABLE_TSFPQ */
2740 MUTEX_ENTER(&rx_freePktQ_lock);
2741 rxi_FreeDataBufsNoLock(p, i);
2742 MUTEX_EXIT(&rx_freePktQ_lock);
2743 #endif /* !RX_ENABLE_TSFPQ */
2748 p->wirevec[i - 1].iov_len += len;
2749 MUTEX_ENTER(&call->lock);
2750 RXS_PreparePacket(conn->securityObject, call, p);
2753 /* Given an interface MTU size, calculate an adjusted MTU size that
2754 * will make efficient use of the RX buffers when the peer is sending
2755 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2757 rxi_AdjustIfMTU(int mtu)
2762 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2764 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2765 if (mtu <= adjMTU) {
2772 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2773 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2776 /* Given an interface MTU size, and the peer's advertised max receive
2777 * size, calculate an adjisted maxMTU size that makes efficient use
2778 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2780 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2782 int maxMTU = mtu * rxi_nSendFrags;
2783 maxMTU = MIN(maxMTU, peerMaxMTU);
2784 return rxi_AdjustIfMTU(maxMTU);
2787 /* Given a packet size, figure out how many datagram packet will fit.
2788 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2789 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2790 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2792 rxi_AdjustDgramPackets(int frags, int mtu)
2795 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2798 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2799 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2800 /* subtract the size of the first and last packets */
2801 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2805 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2810 * This function can be used by the Windows Cache Manager
2811 * to dump the list of all rx packets so that we can determine
2812 * where the packet leakage is.
2814 int rx_DumpPackets(FILE *outputFile, char *cookie)
2816 #ifdef RXDEBUG_PACKET
2817 struct rx_packet *p;
2821 #define RXDPRINTF sprintf
2822 #define RXDPRINTOUT output
2824 #define RXDPRINTF fprintf
2825 #define RXDPRINTOUT outputFile
2829 MUTEX_ENTER(&rx_freePktQ_lock);
2830 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2832 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2835 for (p = rx_mallocedP; p; p = p->allNextp) {
2836 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2837 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2838 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2839 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2840 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2841 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2843 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2847 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2849 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2852 MUTEX_EXIT(&rx_freePktQ_lock);
2854 #endif /* RXDEBUG_PACKET */