2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
104 /* rxdb_fileID is used to identify the lock location, along with line#. */
105 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
106 #endif /* RX_LOCKS_DB */
107 struct rx_packet *rx_mallocedP = 0;
109 extern char cml_version_number[];
110 extern int (*rx_almostSent) ();
112 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
114 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
115 struct sockaddr_storage *saddr, int slen,
118 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, int first,
119 struct rx_queue * q);
121 /* some rules about packets:
122 * 1. When a packet is allocated, the final iov_buf contains room for
123 * a security trailer, but iov_len masks that fact. If the security
124 * package wants to add the trailer, it may do so, and then extend
125 * iov_len appropriately. For this reason, packet's niovecs and
126 * iov_len fields should be accurate before calling PreparePacket.
130 * all packet buffers (iov_base) are integral multiples of
132 * offset is an integral multiple of the word size.
135 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
139 for (l = 0, i = 1; i < packet->niovecs; i++) {
140 if (l + packet->wirevec[i].iov_len > offset) {
142 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the word size.
153 * offset is an integral multiple of the word size.
156 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
160 for (l = 0, i = 1; i < packet->niovecs; i++) {
161 if (l + packet->wirevec[i].iov_len > offset) {
162 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
163 (offset - l))) = data;
166 l += packet->wirevec[i].iov_len;
173 * all packet buffers (iov_base) are integral multiples of the
175 * offset is an integral multiple of the word size.
177 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
180 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
183 unsigned int i, j, l, r;
184 for (l = 0, i = 1; i < packet->niovecs; i++) {
185 if (l + packet->wirevec[i].iov_len > offset) {
188 l += packet->wirevec[i].iov_len;
191 /* i is the iovec which contains the first little bit of data in which we
192 * are interested. l is the total length of everything prior to this iovec.
193 * j is the number of bytes we can safely copy out of this iovec.
194 * offset only applies to the first iovec.
197 while ((resid > 0) && (i < packet->niovecs)) {
198 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
199 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
202 l += packet->wirevec[i].iov_len;
207 return (resid ? (r - resid) : r);
212 * all packet buffers (iov_base) are integral multiples of the
214 * offset is an integral multiple of the word size.
217 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
222 for (l = 0, i = 1; i < packet->niovecs; i++) {
223 if (l + packet->wirevec[i].iov_len > offset) {
226 l += packet->wirevec[i].iov_len;
229 /* i is the iovec which contains the first little bit of data in which we
230 * are interested. l is the total length of everything prior to this iovec.
231 * j is the number of bytes we can safely copy out of this iovec.
232 * offset only applies to the first iovec.
235 while ((resid > 0) && (i < RX_MAXWVECS)) {
236 if (i >= packet->niovecs)
237 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
240 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
241 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
245 l += packet->wirevec[i].iov_len;
250 return (resid ? (r - resid) : r);
254 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
256 register struct rx_packet *p, *np;
258 num_pkts = AllocPacketBufs(class, num_pkts, q);
260 for (queue_Scan(q, p, np, rx_packet)) {
261 RX_PACKET_IOV_FULLINIT(p);
267 #ifdef RX_ENABLE_TSFPQ
269 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
271 register struct rx_packet *c;
272 register struct rx_ts_info_t * rx_ts_info;
276 RX_TS_INFO_GET(rx_ts_info);
278 transfer = num_pkts - rx_ts_info->_FPQ.len;
281 MUTEX_ENTER(&rx_freePktQ_lock);
283 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
284 transfer += rx_TSFPQGlobSize;
285 } else if (transfer <= rx_nFreePackets) {
286 transfer = rx_nFreePackets;
288 /* alloc enough for us, plus a few globs for other threads */
289 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
290 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
291 transfer += rx_TSFPQGlobSize;
294 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
296 MUTEX_EXIT(&rx_freePktQ_lock);
300 RX_TS_FPQ_CHECKOUT2(rx_ts_info, num_pkts, q);
304 #else /* RX_ENABLE_TSFPQ */
306 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
314 MUTEX_ENTER(&rx_freePktQ_lock);
317 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
318 num_pkts--, overq++);
321 rxi_NeedMorePackets = TRUE;
322 MUTEX_ENTER(&rx_stats_mutex);
324 case RX_PACKET_CLASS_RECEIVE:
325 rx_stats.receivePktAllocFailures++;
327 case RX_PACKET_CLASS_SEND:
328 rx_stats.sendPktAllocFailures++;
330 case RX_PACKET_CLASS_SPECIAL:
331 rx_stats.specialPktAllocFailures++;
333 case RX_PACKET_CLASS_RECV_CBUF:
334 rx_stats.receiveCbufPktAllocFailures++;
336 case RX_PACKET_CLASS_SEND_CBUF:
337 rx_stats.sendCbufPktAllocFailures++;
340 MUTEX_EXIT(&rx_stats_mutex);
343 if (rx_nFreePackets < num_pkts)
344 num_pkts = rx_nFreePackets;
347 rxi_NeedMorePackets = TRUE;
351 if (rx_nFreePackets < num_pkts) {
352 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
356 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
358 i++, c=queue_Next(c, rx_packet)) {
362 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
364 rx_nFreePackets -= num_pkts;
369 MUTEX_EXIT(&rx_freePktQ_lock);
374 #endif /* RX_ENABLE_TSFPQ */
377 * Free a packet currently used as a continuation buffer
379 #ifdef RX_ENABLE_TSFPQ
380 /* num_pkts=0 means queue length is unknown */
382 rxi_FreePackets(int num_pkts, struct rx_queue * q)
384 register struct rx_ts_info_t * rx_ts_info;
385 register struct rx_packet *c, *nc;
388 osi_Assert(num_pkts >= 0);
389 RX_TS_INFO_GET(rx_ts_info);
392 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
393 rxi_FreeDataBufsTSFPQ(c, 1, 0);
396 for (queue_Scan(q, c, nc, rx_packet)) {
397 rxi_FreeDataBufsTSFPQ(c, 1, 0);
402 RX_TS_FPQ_CHECKIN2(rx_ts_info, num_pkts, q);
405 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
407 MUTEX_ENTER(&rx_freePktQ_lock);
409 RX_TS_FPQ_LTOG(rx_ts_info);
411 /* Wakeup anyone waiting for packets */
414 MUTEX_EXIT(&rx_freePktQ_lock);
420 #else /* RX_ENABLE_TSFPQ */
421 /* num_pkts=0 means queue length is unknown */
423 rxi_FreePackets(int num_pkts, struct rx_queue *q)
426 register struct rx_packet *p, *np;
430 osi_Assert(num_pkts >= 0);
434 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
435 if (p->niovecs > 2) {
436 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
443 for (queue_Scan(q, p, np, rx_packet)) {
444 if (p->niovecs > 2) {
445 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
452 queue_SpliceAppend(q, &cbs);
458 MUTEX_ENTER(&rx_freePktQ_lock);
460 queue_SpliceAppend(&rx_freePacketQueue, q);
461 rx_nFreePackets += qlen;
463 /* Wakeup anyone waiting for packets */
466 MUTEX_EXIT(&rx_freePktQ_lock);
471 #endif /* RX_ENABLE_TSFPQ */
473 /* this one is kind of awful.
474 * In rxkad, the packet has been all shortened, and everything, ready for
475 * sending. All of a sudden, we discover we need some of that space back.
476 * This isn't terribly general, because it knows that the packets are only
477 * rounded up to the EBS (userdata + security header).
480 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
484 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
485 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
486 p->wirevec[i].iov_len += nb;
490 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
491 p->wirevec[i].iov_len += nb;
499 /* get sufficient space to store nb bytes of data (or more), and hook
500 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
501 * returns the number of bytes >0 which it failed to come up with.
502 * Don't need to worry about locking on packet, since only
503 * one thread can manipulate one at a time. Locking on continution
504 * packets is handled by AllocPacketBufs */
505 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
507 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
511 register struct rx_packet *cb, *ncb;
513 /* compute the number of cbuf's we need */
514 nv = nb / RX_CBUFFERSIZE;
515 if ((nv * RX_CBUFFERSIZE) < nb)
517 if ((nv + p->niovecs) > RX_MAXWVECS)
518 nv = RX_MAXWVECS - p->niovecs;
522 /* allocate buffers */
524 nv = AllocPacketBufs(class, nv, &q);
526 /* setup packet iovs */
527 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
529 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
530 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
533 nb -= (nv * RX_CBUFFERSIZE);
534 p->length += (nv * RX_CBUFFERSIZE);
540 /* Add more packet buffers */
541 #ifdef RX_ENABLE_TSFPQ
543 rxi_MorePackets(int apackets)
545 struct rx_packet *p, *e;
546 register struct rx_ts_info_t * rx_ts_info;
550 getme = apackets * sizeof(struct rx_packet);
551 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
553 PIN(p, getme); /* XXXXX */
554 memset((char *)p, 0, getme);
555 RX_TS_INFO_GET(rx_ts_info);
557 for (e = p + apackets; p < e; p++) {
558 RX_PACKET_IOV_INIT(p);
561 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
563 rx_ts_info->_FPQ.delta += apackets;
565 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
567 MUTEX_ENTER(&rx_freePktQ_lock);
569 RX_TS_FPQ_LTOG(rx_ts_info);
570 rxi_NeedMorePackets = FALSE;
573 MUTEX_EXIT(&rx_freePktQ_lock);
577 #else /* RX_ENABLE_TSFPQ */
579 rxi_MorePackets(int apackets)
581 struct rx_packet *p, *e;
585 getme = apackets * sizeof(struct rx_packet);
586 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
588 PIN(p, getme); /* XXXXX */
589 memset((char *)p, 0, getme);
591 MUTEX_ENTER(&rx_freePktQ_lock);
593 for (e = p + apackets; p < e; p++) {
594 RX_PACKET_IOV_INIT(p);
595 p->flags |= RX_PKTFLAG_FREE;
598 queue_Append(&rx_freePacketQueue, p);
600 rx_nFreePackets += apackets;
601 rxi_NeedMorePackets = FALSE;
604 MUTEX_EXIT(&rx_freePktQ_lock);
607 #endif /* RX_ENABLE_TSFPQ */
609 #ifdef RX_ENABLE_TSFPQ
611 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
613 struct rx_packet *p, *e;
614 register struct rx_ts_info_t * rx_ts_info;
618 getme = apackets * sizeof(struct rx_packet);
619 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
621 PIN(p, getme); /* XXXXX */
622 memset((char *)p, 0, getme);
623 RX_TS_INFO_GET(rx_ts_info);
625 for (e = p + apackets; p < e; p++) {
626 RX_PACKET_IOV_INIT(p);
629 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
631 rx_ts_info->_FPQ.delta += apackets;
634 (num_keep_local < apackets)) {
636 MUTEX_ENTER(&rx_freePktQ_lock);
638 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
639 rxi_NeedMorePackets = FALSE;
642 MUTEX_EXIT(&rx_freePktQ_lock);
646 #endif /* RX_ENABLE_TSFPQ */
649 /* Add more packet buffers */
651 rxi_MorePacketsNoLock(int apackets)
653 struct rx_packet *p, *e;
656 /* allocate enough packets that 1/4 of the packets will be able
657 * to hold maximal amounts of data */
658 apackets += (apackets / 4)
659 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
660 getme = apackets * sizeof(struct rx_packet);
661 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
663 memset((char *)p, 0, getme);
665 for (e = p + apackets; p < e; p++) {
666 RX_PACKET_IOV_INIT(p);
667 p->flags |= RX_PKTFLAG_FREE;
670 queue_Append(&rx_freePacketQueue, p);
673 rx_nFreePackets += apackets;
674 #ifdef RX_ENABLE_TSFPQ
675 /* TSFPQ patch also needs to keep track of total packets */
676 MUTEX_ENTER(&rx_stats_mutex);
677 rx_nPackets += apackets;
678 RX_TS_FPQ_COMPUTE_LIMITS;
679 MUTEX_EXIT(&rx_stats_mutex);
680 #endif /* RX_ENABLE_TSFPQ */
681 rxi_NeedMorePackets = FALSE;
687 rxi_FreeAllPackets(void)
689 /* must be called at proper interrupt level, etcetera */
690 /* MTUXXX need to free all Packets */
691 osi_Free(rx_mallocedP,
692 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
693 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
696 #ifdef RX_ENABLE_TSFPQ
698 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
700 register struct rx_ts_info_t * rx_ts_info;
704 RX_TS_INFO_GET(rx_ts_info);
706 if (num_keep_local != rx_ts_info->_FPQ.len) {
708 MUTEX_ENTER(&rx_freePktQ_lock);
709 if (num_keep_local < rx_ts_info->_FPQ.len) {
710 xfer = rx_ts_info->_FPQ.len - num_keep_local;
711 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
714 xfer = num_keep_local - rx_ts_info->_FPQ.len;
715 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
716 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
717 if (rx_nFreePackets < xfer) {
718 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
720 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
722 MUTEX_EXIT(&rx_freePktQ_lock);
728 rxi_FlushLocalPacketsTSFPQ(void)
730 rxi_AdjustLocalPacketsTSFPQ(0, 0);
732 #endif /* RX_ENABLE_TSFPQ */
734 /* Allocate more packets iff we need more continuation buffers */
735 /* In kernel, can't page in memory with interrupts disabled, so we
736 * don't use the event mechanism. */
738 rx_CheckPackets(void)
740 if (rxi_NeedMorePackets) {
741 rxi_MorePackets(rx_initSendWindow);
745 /* In the packet freeing routine below, the assumption is that
746 we want all of the packets to be used equally frequently, so that we
747 don't get packet buffers paging out. It would be just as valid to
748 assume that we DO want them to page out if not many are being used.
749 In any event, we assume the former, and append the packets to the end
751 /* This explanation is bogus. The free list doesn't remain in any kind of
752 useful order for afs_int32: the packets in use get pretty much randomly scattered
753 across all the pages. In order to permit unused {packets,bufs} to page out, they
754 must be stored so that packets which are adjacent in memory are adjacent in the
755 free list. An array springs rapidly to mind.
758 /* Actually free the packet p. */
759 #ifdef RX_ENABLE_TSFPQ
761 rxi_FreePacketNoLock(struct rx_packet *p)
763 register struct rx_ts_info_t * rx_ts_info;
764 dpf(("Free %lx\n", (unsigned long)p));
766 RX_TS_INFO_GET(rx_ts_info);
767 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
768 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
769 RX_TS_FPQ_LTOG(rx_ts_info);
772 #else /* RX_ENABLE_TSFPQ */
774 rxi_FreePacketNoLock(struct rx_packet *p)
776 dpf(("Free %lx\n", (unsigned long)p));
780 queue_Append(&rx_freePacketQueue, p);
782 #endif /* RX_ENABLE_TSFPQ */
784 #ifdef RX_ENABLE_TSFPQ
786 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
788 register struct rx_ts_info_t * rx_ts_info;
789 dpf(("Free %lx\n", (unsigned long)p));
791 RX_TS_INFO_GET(rx_ts_info);
792 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
794 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
796 MUTEX_ENTER(&rx_freePktQ_lock);
798 RX_TS_FPQ_LTOG(rx_ts_info);
800 /* Wakeup anyone waiting for packets */
803 MUTEX_EXIT(&rx_freePktQ_lock);
807 #endif /* RX_ENABLE_TSFPQ */
809 /* free continuation buffers off a packet into a queue of buffers */
811 rxi_FreeDataBufsToQueue(struct rx_packet *p, int first, struct rx_queue * q)
814 struct rx_packet * cb;
819 for (; first < p->niovecs; first++, count++) {
820 iov = &p->wirevec[first];
822 osi_Panic("rxi_PacketIOVToQueue: unexpected NULL iov");
823 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
824 RX_FPQ_MARK_FREE(cb);
834 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
836 struct iovec *iov, *end;
838 if (first != 1) /* MTUXXX */
839 osi_Panic("FreeDataBufs 1: first must be 1");
840 iov = &p->wirevec[1];
841 end = iov + (p->niovecs - 1);
842 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
843 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
844 for (iov++; iov < end; iov++) {
846 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
847 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
855 #ifdef RX_ENABLE_TSFPQ
857 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, int first, int flush_global)
859 struct iovec *iov, *end;
860 register struct rx_ts_info_t * rx_ts_info;
862 RX_TS_INFO_GET(rx_ts_info);
864 if (first != 1) /* MTUXXX */
865 osi_Panic("FreeDataBufs 1: first must be 1");
866 iov = &p->wirevec[1];
867 end = iov + (p->niovecs - 1);
868 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
869 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
870 for (iov++; iov < end; iov++) {
872 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
873 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
878 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
880 MUTEX_ENTER(&rx_freePktQ_lock);
882 RX_TS_FPQ_LTOG(rx_ts_info);
884 /* Wakeup anyone waiting for packets */
887 MUTEX_EXIT(&rx_freePktQ_lock);
892 #endif /* RX_ENABLE_TSFPQ */
894 int rxi_nBadIovecs = 0;
896 /* rxi_RestoreDataBufs
898 * Restore the correct sizes to the iovecs. Called when reusing a packet
899 * for reading off the wire.
902 rxi_RestoreDataBufs(struct rx_packet *p)
905 struct iovec *iov = &p->wirevec[2];
907 RX_PACKET_IOV_INIT(p);
909 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
910 if (!iov->iov_base) {
915 iov->iov_len = RX_CBUFFERSIZE;
919 #ifdef RX_ENABLE_TSFPQ
921 rxi_TrimDataBufs(struct rx_packet *p, int first)
924 struct iovec *iov, *end;
925 register struct rx_ts_info_t * rx_ts_info;
929 osi_Panic("TrimDataBufs 1: first must be 1");
931 /* Skip over continuation buffers containing message data */
932 iov = &p->wirevec[2];
933 end = iov + (p->niovecs - 2);
934 length = p->length - p->wirevec[1].iov_len;
935 for (; iov < end && length > 0; iov++) {
937 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
938 length -= iov->iov_len;
941 /* iov now points to the first empty data buffer. */
945 RX_TS_INFO_GET(rx_ts_info);
946 for (; iov < end; iov++) {
948 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
949 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
952 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
954 MUTEX_ENTER(&rx_freePktQ_lock);
956 RX_TS_FPQ_LTOG(rx_ts_info);
959 MUTEX_EXIT(&rx_freePktQ_lock);
965 #else /* RX_ENABLE_TSFPQ */
967 rxi_TrimDataBufs(struct rx_packet *p, int first)
970 struct iovec *iov, *end;
974 osi_Panic("TrimDataBufs 1: first must be 1");
976 /* Skip over continuation buffers containing message data */
977 iov = &p->wirevec[2];
978 end = iov + (p->niovecs - 2);
979 length = p->length - p->wirevec[1].iov_len;
980 for (; iov < end && length > 0; iov++) {
982 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
983 length -= iov->iov_len;
986 /* iov now points to the first empty data buffer. */
991 MUTEX_ENTER(&rx_freePktQ_lock);
993 for (; iov < end; iov++) {
995 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
996 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1001 MUTEX_EXIT(&rx_freePktQ_lock);
1006 #endif /* RX_ENABLE_TSFPQ */
1008 /* Free the packet p. P is assumed not to be on any queue, i.e.
1009 * remove it yourself first if you call this routine. */
1010 #ifdef RX_ENABLE_TSFPQ
1012 rxi_FreePacket(struct rx_packet *p)
1014 rxi_FreeDataBufsTSFPQ(p, 1, 0);
1015 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1017 #else /* RX_ENABLE_TSFPQ */
1019 rxi_FreePacket(struct rx_packet *p)
1024 MUTEX_ENTER(&rx_freePktQ_lock);
1026 rxi_FreeDataBufsNoLock(p, 1);
1027 rxi_FreePacketNoLock(p);
1028 /* Wakeup anyone waiting for packets */
1029 rxi_PacketsUnWait();
1031 MUTEX_EXIT(&rx_freePktQ_lock);
1034 #endif /* RX_ENABLE_TSFPQ */
1036 /* rxi_AllocPacket sets up p->length so it reflects the number of
1037 * bytes in the packet at this point, **not including** the header.
1038 * The header is absolutely necessary, besides, this is the way the
1039 * length field is usually used */
1040 #ifdef RX_ENABLE_TSFPQ
1042 rxi_AllocPacketNoLock(int class)
1044 register struct rx_packet *p;
1045 register struct rx_ts_info_t * rx_ts_info;
1047 RX_TS_INFO_GET(rx_ts_info);
1050 if (rxi_OverQuota(class)) {
1051 rxi_NeedMorePackets = TRUE;
1052 MUTEX_ENTER(&rx_stats_mutex);
1054 case RX_PACKET_CLASS_RECEIVE:
1055 rx_stats.receivePktAllocFailures++;
1057 case RX_PACKET_CLASS_SEND:
1058 rx_stats.sendPktAllocFailures++;
1060 case RX_PACKET_CLASS_SPECIAL:
1061 rx_stats.specialPktAllocFailures++;
1063 case RX_PACKET_CLASS_RECV_CBUF:
1064 rx_stats.receiveCbufPktAllocFailures++;
1066 case RX_PACKET_CLASS_SEND_CBUF:
1067 rx_stats.sendCbufPktAllocFailures++;
1070 MUTEX_EXIT(&rx_stats_mutex);
1071 return (struct rx_packet *)0;
1075 MUTEX_ENTER(&rx_stats_mutex);
1076 rx_stats.packetRequests++;
1077 MUTEX_EXIT(&rx_stats_mutex);
1079 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1082 if (queue_IsEmpty(&rx_freePacketQueue))
1083 osi_Panic("rxi_AllocPacket error");
1085 if (queue_IsEmpty(&rx_freePacketQueue))
1086 rxi_MorePacketsNoLock(rx_initSendWindow);
1090 RX_TS_FPQ_GTOL(rx_ts_info);
1093 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1095 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1098 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1099 * order to truncate outbound packets. In the near future, may need
1100 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1102 RX_PACKET_IOV_FULLINIT(p);
1105 #else /* RX_ENABLE_TSFPQ */
1107 rxi_AllocPacketNoLock(int class)
1109 register struct rx_packet *p;
1112 if (rxi_OverQuota(class)) {
1113 rxi_NeedMorePackets = TRUE;
1114 MUTEX_ENTER(&rx_stats_mutex);
1116 case RX_PACKET_CLASS_RECEIVE:
1117 rx_stats.receivePktAllocFailures++;
1119 case RX_PACKET_CLASS_SEND:
1120 rx_stats.sendPktAllocFailures++;
1122 case RX_PACKET_CLASS_SPECIAL:
1123 rx_stats.specialPktAllocFailures++;
1125 case RX_PACKET_CLASS_RECV_CBUF:
1126 rx_stats.receiveCbufPktAllocFailures++;
1128 case RX_PACKET_CLASS_SEND_CBUF:
1129 rx_stats.sendCbufPktAllocFailures++;
1132 MUTEX_EXIT(&rx_stats_mutex);
1133 return (struct rx_packet *)0;
1137 MUTEX_ENTER(&rx_stats_mutex);
1138 rx_stats.packetRequests++;
1139 MUTEX_EXIT(&rx_stats_mutex);
1142 if (queue_IsEmpty(&rx_freePacketQueue))
1143 osi_Panic("rxi_AllocPacket error");
1145 if (queue_IsEmpty(&rx_freePacketQueue))
1146 rxi_MorePacketsNoLock(rx_initSendWindow);
1150 p = queue_First(&rx_freePacketQueue, rx_packet);
1152 RX_FPQ_MARK_USED(p);
1154 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1157 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1158 * order to truncate outbound packets. In the near future, may need
1159 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1161 RX_PACKET_IOV_FULLINIT(p);
1164 #endif /* RX_ENABLE_TSFPQ */
1166 #ifdef RX_ENABLE_TSFPQ
1168 rxi_AllocPacketTSFPQ(int class, int pull_global)
1170 register struct rx_packet *p;
1171 register struct rx_ts_info_t * rx_ts_info;
1173 RX_TS_INFO_GET(rx_ts_info);
1175 MUTEX_ENTER(&rx_stats_mutex);
1176 rx_stats.packetRequests++;
1177 MUTEX_EXIT(&rx_stats_mutex);
1179 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1180 MUTEX_ENTER(&rx_freePktQ_lock);
1182 if (queue_IsEmpty(&rx_freePacketQueue))
1183 rxi_MorePacketsNoLock(rx_initSendWindow);
1185 RX_TS_FPQ_GTOL(rx_ts_info);
1187 MUTEX_EXIT(&rx_freePktQ_lock);
1188 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1192 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1194 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1196 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1197 * order to truncate outbound packets. In the near future, may need
1198 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1200 RX_PACKET_IOV_FULLINIT(p);
1203 #endif /* RX_ENABLE_TSFPQ */
1205 #ifdef RX_ENABLE_TSFPQ
1207 rxi_AllocPacket(int class)
1209 register struct rx_packet *p;
1211 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1214 #else /* RX_ENABLE_TSFPQ */
1216 rxi_AllocPacket(int class)
1218 register struct rx_packet *p;
1220 MUTEX_ENTER(&rx_freePktQ_lock);
1221 p = rxi_AllocPacketNoLock(class);
1222 MUTEX_EXIT(&rx_freePktQ_lock);
1225 #endif /* RX_ENABLE_TSFPQ */
1227 /* This guy comes up with as many buffers as it {takes,can get} given
1228 * the MTU for this call. It also sets the packet length before
1229 * returning. caution: this is often called at NETPRI
1230 * Called with call locked.
1233 rxi_AllocSendPacket(register struct rx_call *call, int want)
1235 register struct rx_packet *p = (struct rx_packet *)0;
1237 register unsigned delta;
1240 mud = call->MTU - RX_HEADER_SIZE;
1242 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1243 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1245 #ifdef RX_ENABLE_TSFPQ
1246 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1248 want = MIN(want, mud);
1250 if ((unsigned)want > p->length)
1251 (void)rxi_AllocDataBuf(p, (want - p->length),
1252 RX_PACKET_CLASS_SEND_CBUF);
1254 if ((unsigned)p->length > mud)
1257 if (delta >= p->length) {
1265 #endif /* RX_ENABLE_TSFPQ */
1267 while (!(call->error)) {
1268 MUTEX_ENTER(&rx_freePktQ_lock);
1269 /* if an error occurred, or we get the packet we want, we're done */
1270 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1271 MUTEX_EXIT(&rx_freePktQ_lock);
1274 want = MIN(want, mud);
1276 if ((unsigned)want > p->length)
1277 (void)rxi_AllocDataBuf(p, (want - p->length),
1278 RX_PACKET_CLASS_SEND_CBUF);
1280 if ((unsigned)p->length > mud)
1283 if (delta >= p->length) {
1292 /* no error occurred, and we didn't get a packet, so we sleep.
1293 * At this point, we assume that packets will be returned
1294 * sooner or later, as packets are acknowledged, and so we
1297 call->flags |= RX_CALL_WAIT_PACKETS;
1298 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1299 MUTEX_EXIT(&call->lock);
1300 rx_waitingForPackets = 1;
1302 #ifdef RX_ENABLE_LOCKS
1303 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1305 osi_rxSleep(&rx_waitingForPackets);
1307 MUTEX_EXIT(&rx_freePktQ_lock);
1308 MUTEX_ENTER(&call->lock);
1309 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1310 call->flags &= ~RX_CALL_WAIT_PACKETS;
1319 /* Windows does not use file descriptors. */
1320 #define CountFDs(amax) 0
1322 /* count the number of used FDs */
1324 CountFDs(register int amax)
1327 register int i, code;
1331 for (i = 0; i < amax; i++) {
1332 code = fstat(i, &tstat);
1338 #endif /* AFS_NT40_ENV */
1341 #define CountFDs(amax) amax
1345 #if !defined(KERNEL) || defined(UKERNEL)
1347 /* This function reads a single packet from the interface into the
1348 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1349 * (host,port) of the sender are stored in the supplied variables, and
1350 * the data length of the packet is stored in the packet structure.
1351 * The header is decoded. */
1353 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p,
1354 struct sockaddr_storage *saddr, int *slen)
1358 register afs_int32 tlen, savelen;
1360 rx_computelen(p, tlen);
1361 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1363 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1364 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1365 * it once in order to avoid races. */
1368 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1376 /* Extend the last iovec for padding, it's just to make sure that the
1377 * read doesn't return more data than we expect, and is done to get around
1378 * our problems caused by the lack of a length field in the rx header.
1379 * Use the extra buffer that follows the localdata in each packet
1381 savelen = p->wirevec[p->niovecs - 1].iov_len;
1382 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1384 memset((char *)&msg, 0, sizeof(msg));
1385 msg.msg_name = (char *)saddr;
1386 msg.msg_namelen = *slen;
1387 msg.msg_iov = p->wirevec;
1388 msg.msg_iovlen = p->niovecs;
1389 nbytes = rxi_Recvmsg(socket, &msg, 0);
1390 *slen = msg.msg_namelen;
1392 /* restore the vec to its correct state */
1393 p->wirevec[p->niovecs - 1].iov_len = savelen;
1395 p->length = (nbytes - RX_HEADER_SIZE);
1396 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1397 if (nbytes < 0 && errno == EWOULDBLOCK) {
1398 MUTEX_ENTER(&rx_stats_mutex);
1399 rx_stats.noPacketOnRead++;
1400 MUTEX_EXIT(&rx_stats_mutex);
1401 } else if (nbytes <= 0) {
1402 MUTEX_ENTER(&rx_stats_mutex);
1403 rx_stats.bogusPacketOnRead++;
1404 switch (rx_ssfamily(saddr)) {
1406 rx_stats.bogusHost = rx_ss2sin(saddr)->sin_addr.s_addr;
1411 #endif /* AF_INET6 */
1412 rx_stats.bogusHost = 0xffffffff;
1415 MUTEX_EXIT(&rx_stats_mutex);
1416 dpf(("B: bogus packet from [%x,%d] nb=%d",
1417 ntohl(rx_ss2v4addr(saddr)), ntohs(rx_ss2pn(saddr)), nbytes));
1422 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1423 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1424 rxi_DecodePacketHeader(p);
1426 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1427 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(rx_ss2v4addr(saddr)), ntohs(rx_ss2pn(saddr)), p->header.serial,
1428 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1430 rxi_TrimDataBufs(p, 1);
1435 /* Extract packet header. */
1436 rxi_DecodePacketHeader(p);
1438 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1439 struct rx_peer *peer;
1440 MUTEX_ENTER(&rx_stats_mutex);
1441 rx_stats.packetsRead[p->header.type - 1]++;
1442 MUTEX_EXIT(&rx_stats_mutex);
1444 * Try to look up this peer structure. If it doesn't exist,
1445 * don't create a new one -
1446 * we don't keep count of the bytes sent/received if a peer
1447 * structure doesn't already exist.
1449 * The peer/connection cleanup code assumes that there is 1 peer
1450 * per connection. If we actually created a peer structure here
1451 * and this packet was an rxdebug packet, the peer structure would
1452 * never be cleaned up.
1454 peer = rxi_FindPeer(saddr, *slen, SOCK_DGRAM, 0, 0);
1455 /* Since this may not be associated with a connection,
1456 * it may have no refCount, meaning we could race with
1459 if (peer && (peer->refCount > 0)) {
1460 MUTEX_ENTER(&peer->peer_lock);
1461 hadd32(peer->bytesReceived, p->length);
1462 MUTEX_EXIT(&peer->peer_lock);
1466 /* Free any empty packet buffers at the end of this packet */
1467 rxi_TrimDataBufs(p, 1);
1473 #endif /* !KERNEL || UKERNEL */
1475 /* This function splits off the first packet in a jumbo packet.
1476 * As of AFS 3.5, jumbograms contain more than one fixed size
1477 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1478 * last packet header. All packets (except the last) are padded to
1479 * fall on RX_CBUFFERSIZE boundaries.
1480 * HACK: We store the length of the first n-1 packets in the
1481 * last two pad bytes. */
1484 rxi_SplitJumboPacket(register struct rx_packet *p,
1485 struct sockaddr_storage *saddr, int slen, int first)
1487 struct rx_packet *np;
1488 struct rx_jumboHeader *jp;
1494 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1495 * bytes in length. All but the first packet are preceded by
1496 * an abbreviated four byte header. The length of the last packet
1497 * is calculated from the size of the jumbogram. */
1498 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1500 if ((int)p->length < length) {
1501 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1504 niov = p->niovecs - 2;
1506 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1509 iov = &p->wirevec[2];
1510 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1512 /* Get a pointer to the abbreviated packet header */
1513 jp = (struct rx_jumboHeader *)
1514 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1516 /* Set up the iovecs for the next packet */
1517 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1518 np->wirevec[0].iov_len = sizeof(struct rx_header);
1519 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1520 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1521 np->niovecs = niov + 1;
1522 for (i = 2, iov++; i <= niov; i++, iov++) {
1523 np->wirevec[i] = *iov;
1525 np->length = p->length - length;
1526 p->length = RX_JUMBOBUFFERSIZE;
1529 /* Convert the jumbo packet header to host byte order */
1530 temp = ntohl(*(afs_uint32 *) jp);
1531 jp->flags = (u_char) (temp >> 24);
1532 jp->cksum = (u_short) (temp);
1534 /* Fill in the packet header */
1535 np->header = p->header;
1536 np->header.serial = p->header.serial + 1;
1537 np->header.seq = p->header.seq + 1;
1538 np->header.flags = jp->flags;
1539 np->header.spare = jp->cksum;
1545 /* Send a udp datagram */
1547 osi_NetSend(osi_socket socket, void *addr, int addrlen, struct iovec *dvec,
1548 int nvecs, int length, int istack)
1553 memset(&msg, 0, sizeof(msg));
1555 msg.msg_iovlen = nvecs;
1556 msg.msg_name = addr;
1557 msg.msg_namelen = addrlen;
1559 ret = rxi_Sendmsg(socket, &msg, 0);
1563 #elif !defined(UKERNEL)
1565 * message receipt is done in rxk_input or rx_put.
1568 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1570 * Copy an mblock to the contiguous area pointed to by cp.
1571 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1572 * but it doesn't really.
1573 * Returns the number of bytes not transferred.
1574 * The message is NOT changed.
1577 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1581 for (; mp && len > 0; mp = mp->b_cont) {
1582 if (mp->b_datap->db_type != M_DATA) {
1585 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1586 memcpy(cp, (char *)mp->b_rptr, n);
1594 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1595 * but it doesn't really.
1596 * This sucks, anyway, do it like m_cpy.... below
1599 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1602 register int m, n, o, t, i;
1604 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1605 if (mp->b_datap->db_type != M_DATA) {
1608 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1614 t = iovs[i].iov_len;
1617 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1627 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1628 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1630 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1632 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1635 unsigned int l1, l2, i, t;
1637 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1638 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1641 if (m->m_len <= off) {
1651 p1 = mtod(m, caddr_t) + off;
1652 l1 = m->m_len - off;
1654 p2 = iovs[0].iov_base;
1655 l2 = iovs[0].iov_len;
1658 t = MIN(l1, MIN(l2, (unsigned int)len));
1669 p1 = mtod(m, caddr_t);
1675 p2 = iovs[i].iov_base;
1676 l2 = iovs[i].iov_len;
1684 #endif /* AFS_SUN5_ENV */
1686 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1688 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1689 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1695 struct rx_packet *phandle;
1696 int hdr_len, data_len;
1701 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1708 #endif /*KERNEL && !UKERNEL */
1711 /* send a response to a debug packet */
1714 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1715 struct sockaddr_storage *saddr, int slen, int istack)
1717 struct rx_debugIn tin;
1719 struct rx_serverQueueEntry *np, *nqe;
1722 * Only respond to client-initiated Rx debug packets,
1723 * and clear the client flag in the response.
1725 if (ap->header.flags & RX_CLIENT_INITIATED) {
1726 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1727 rxi_EncodePacketHeader(ap);
1732 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1733 /* all done with packet, now set length to the truth, so we can
1734 * reuse this packet */
1735 rx_computelen(ap, ap->length);
1737 tin.type = ntohl(tin.type);
1738 tin.index = ntohl(tin.index);
1740 case RX_DEBUGI_GETSTATS:{
1741 struct rx_debugStats tstat;
1743 /* get basic stats */
1744 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1745 tstat.version = RX_DEBUGI_VERSION;
1746 #ifndef RX_ENABLE_LOCKS
1747 tstat.waitingForPackets = rx_waitingForPackets;
1749 MUTEX_ENTER(&rx_serverPool_lock);
1750 tstat.nFreePackets = htonl(rx_nFreePackets);
1751 tstat.callsExecuted = htonl(rxi_nCalls);
1752 tstat.packetReclaims = htonl(rx_packetReclaims);
1753 tstat.usedFDs = CountFDs(64);
1754 tstat.nWaiting = htonl(rx_nWaiting);
1755 tstat.nWaited = htonl(rx_nWaited);
1756 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1758 MUTEX_EXIT(&rx_serverPool_lock);
1759 tstat.idleThreads = htonl(tstat.idleThreads);
1760 tl = sizeof(struct rx_debugStats) - ap->length;
1762 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1765 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1767 ap->length = sizeof(struct rx_debugStats);
1768 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
1769 rx_computelen(ap, ap->length);
1774 case RX_DEBUGI_GETALLCONN:
1775 case RX_DEBUGI_GETCONN:{
1777 register struct rx_connection *tc;
1778 struct rx_call *tcall;
1779 struct rx_debugConn tconn;
1780 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1783 tl = sizeof(struct rx_debugConn) - ap->length;
1785 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1789 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1790 /* get N'th (maybe) "interesting" connection info */
1791 for (i = 0; i < rx_hashTableSize; i++) {
1792 #if !defined(KERNEL)
1793 /* the time complexity of the algorithm used here
1794 * exponentially increses with the number of connections.
1796 #ifdef AFS_PTHREAD_ENV
1802 MUTEX_ENTER(&rx_connHashTable_lock);
1803 /* We might be slightly out of step since we are not
1804 * locking each call, but this is only debugging output.
1806 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1807 if ((all || rxi_IsConnInteresting(tc))
1808 && tin.index-- <= 0) {
1809 switch (rx_ssfamily(&tc->peer->saddr)) {
1811 tconn.host = rx_ss2sin(&tc->peer->saddr)->sin_addr.s_addr;
1816 #endif /* AF_INET6 */
1817 tconn.host = 0xffffffff;
1820 tconn.port = rx_ss2pn(&tc->peer->saddr);
1821 tconn.cid = htonl(tc->cid);
1822 tconn.epoch = htonl(tc->epoch);
1823 tconn.serial = htonl(tc->serial);
1824 for (j = 0; j < RX_MAXCALLS; j++) {
1825 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1826 if ((tcall = tc->call[j])) {
1827 tconn.callState[j] = tcall->state;
1828 tconn.callMode[j] = tcall->mode;
1829 tconn.callFlags[j] = tcall->flags;
1830 if (queue_IsNotEmpty(&tcall->rq))
1831 tconn.callOther[j] |= RX_OTHER_IN;
1832 if (queue_IsNotEmpty(&tcall->tq))
1833 tconn.callOther[j] |= RX_OTHER_OUT;
1835 tconn.callState[j] = RX_STATE_NOTINIT;
1838 tconn.natMTU = htonl(tc->peer->natMTU);
1839 tconn.error = htonl(tc->error);
1840 tconn.flags = tc->flags;
1841 tconn.type = tc->type;
1842 tconn.securityIndex = tc->securityIndex;
1843 if (tc->securityObject) {
1844 RXS_GetStats(tc->securityObject, tc,
1846 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1847 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1850 DOHTONL(packetsReceived);
1851 DOHTONL(packetsSent);
1852 DOHTONL(bytesReceived);
1856 sizeof(tconn.secStats.spares) /
1861 sizeof(tconn.secStats.sparel) /
1862 sizeof(afs_int32); i++)
1866 MUTEX_EXIT(&rx_connHashTable_lock);
1867 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1870 ap->length = sizeof(struct rx_debugConn);
1871 rxi_SendDebugPacket(ap, asocket, saddr, slen,
1877 MUTEX_EXIT(&rx_connHashTable_lock);
1879 /* if we make it here, there are no interesting packets */
1880 tconn.cid = htonl(0xffffffff); /* means end */
1881 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1884 ap->length = sizeof(struct rx_debugConn);
1885 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
1891 * Pass back all the peer structures we have available
1894 case RX_DEBUGI_GETPEER:{
1896 register struct rx_peer *tp;
1897 struct rx_debugPeer tpeer;
1900 tl = sizeof(struct rx_debugPeer) - ap->length;
1902 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1906 memset((char *)&tpeer, 0, sizeof(tpeer));
1907 for (i = 0; i < rx_hashTableSize; i++) {
1908 #if !defined(KERNEL)
1909 /* the time complexity of the algorithm used here
1910 * exponentially increses with the number of peers.
1912 * Yielding after processing each hash table entry
1913 * and dropping rx_peerHashTable_lock.
1914 * also increases the risk that we will miss a new
1915 * entry - but we are willing to live with this
1916 * limitation since this is meant for debugging only
1918 #ifdef AFS_PTHREAD_ENV
1924 MUTEX_ENTER(&rx_peerHashTable_lock);
1925 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1926 if (tin.index-- <= 0) {
1927 switch (rx_ssfamily(&tp->saddr)) {
1929 tpeer.host = rx_ss2sin(&tp->saddr)->sin_addr.s_addr;
1934 #endif /* AF_INET6 */
1935 tpeer.host = 0xffffffff;
1938 tpeer.port = rx_ss2pn(&tp->saddr);
1939 tpeer.ifMTU = htons(tp->ifMTU);
1940 tpeer.idleWhen = htonl(tp->idleWhen);
1941 tpeer.refCount = htons(tp->refCount);
1942 tpeer.burstSize = tp->burstSize;
1943 tpeer.burst = tp->burst;
1944 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1945 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1946 tpeer.rtt = htonl(tp->rtt);
1947 tpeer.rtt_dev = htonl(tp->rtt_dev);
1948 tpeer.timeout.sec = htonl(tp->timeout.sec);
1949 tpeer.timeout.usec = htonl(tp->timeout.usec);
1950 tpeer.nSent = htonl(tp->nSent);
1951 tpeer.reSends = htonl(tp->reSends);
1952 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1953 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1954 tpeer.rateFlag = htonl(tp->rateFlag);
1955 tpeer.natMTU = htons(tp->natMTU);
1956 tpeer.maxMTU = htons(tp->maxMTU);
1957 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1958 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1959 tpeer.MTU = htons(tp->MTU);
1960 tpeer.cwind = htons(tp->cwind);
1961 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1962 tpeer.congestSeq = htons(tp->congestSeq);
1963 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1964 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1965 tpeer.bytesReceived.high =
1966 htonl(tp->bytesReceived.high);
1967 tpeer.bytesReceived.low =
1968 htonl(tp->bytesReceived.low);
1970 MUTEX_EXIT(&rx_peerHashTable_lock);
1971 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1974 ap->length = sizeof(struct rx_debugPeer);
1975 rxi_SendDebugPacket(ap, asocket, saddr, slen,
1981 MUTEX_EXIT(&rx_peerHashTable_lock);
1983 /* if we make it here, there are no interesting packets */
1984 tpeer.host = htonl(0xffffffff); /* means end */
1985 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1988 ap->length = sizeof(struct rx_debugPeer);
1989 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
1994 case RX_DEBUGI_RXSTATS:{
1998 tl = sizeof(rx_stats) - ap->length;
2000 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2004 /* Since its all int32s convert to network order with a loop. */
2005 MUTEX_ENTER(&rx_stats_mutex);
2006 s = (afs_int32 *) & rx_stats;
2007 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2008 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2011 ap->length = sizeof(rx_stats);
2012 MUTEX_EXIT(&rx_stats_mutex);
2013 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
2019 /* error response packet */
2020 tin.type = htonl(RX_DEBUGI_BADTYPE);
2021 tin.index = tin.type;
2022 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2024 ap->length = sizeof(struct rx_debugIn);
2025 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
2033 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2034 struct sockaddr_storage *saddr, int slen, int istack)
2039 * Only respond to client-initiated version requests, and
2040 * clear that flag in the response.
2042 if (ap->header.flags & RX_CLIENT_INITIATED) {
2045 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2046 rxi_EncodePacketHeader(ap);
2047 memset(buf, 0, sizeof(buf));
2048 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2049 rx_packetwrite(ap, 0, 65, buf);
2052 rxi_SendDebugPacket(ap, asocket, saddr, slen, istack);
2060 /* send a debug packet back to the sender */
2062 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2063 struct sockaddr_storage *saddr, int slen, afs_int32 istack)
2070 int waslocked = ISAFS_GLOCK();
2073 /* We need to trim the niovecs. */
2074 nbytes = apacket->length;
2075 for (i = 1; i < apacket->niovecs; i++) {
2076 if (nbytes <= apacket->wirevec[i].iov_len) {
2077 savelen = apacket->wirevec[i].iov_len;
2078 saven = apacket->niovecs;
2079 apacket->wirevec[i].iov_len = nbytes;
2080 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2082 nbytes -= apacket->wirevec[i].iov_len;
2085 #ifdef RX_KERNEL_TRACE
2086 if (ICL_SETACTIVE(afs_iclSetp)) {
2089 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2090 "before osi_NetSend()");
2098 /* debug packets are not reliably delivered, hence the cast below. */
2099 (void)osi_NetSend(asocket, saddr, slen, apacket->wirevec, apacket->niovecs,
2100 apacket->length + RX_HEADER_SIZE, istack);
2102 #ifdef RX_KERNEL_TRACE
2103 if (ICL_SETACTIVE(afs_iclSetp)) {
2105 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2106 "after osi_NetSend()");
2115 if (saven) { /* means we truncated the packet above. */
2116 apacket->wirevec[i - 1].iov_len = savelen;
2117 apacket->niovecs = saven;
2122 /* Send the packet to appropriate destination for the specified
2123 * call. The header is first encoded and placed in the packet.
2126 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2127 struct rx_packet *p, int istack)
2133 register struct rx_peer *peer = conn->peer;
2136 char deliveryType = 'S';
2138 /* This stuff should be revamped, I think, so that most, if not
2139 * all, of the header stuff is always added here. We could
2140 * probably do away with the encode/decode routines. XXXXX */
2142 /* Stamp each packet with a unique serial number. The serial
2143 * number is maintained on a connection basis because some types
2144 * of security may be based on the serial number of the packet,
2145 * and security is handled on a per authenticated-connection
2147 /* Pre-increment, to guarantee no zero serial number; a zero
2148 * serial number means the packet was never sent. */
2149 MUTEX_ENTER(&conn->conn_data_lock);
2150 p->header.serial = ++conn->serial;
2151 MUTEX_EXIT(&conn->conn_data_lock);
2152 /* This is so we can adjust retransmit time-outs better in the face of
2153 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2155 if (p->firstSerial == 0) {
2156 p->firstSerial = p->header.serial;
2159 /* If an output tracer function is defined, call it with the packet and
2160 * network address. Note this function may modify its arguments. */
2161 if (rx_almostSent) {
2162 int drop = (*rx_almostSent) (p, &peer->saddr);
2163 /* drop packet if return value is non-zero? */
2165 deliveryType = 'D'; /* Drop the packet */
2169 /* Get network byte order header */
2170 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2171 * touch ALL the fields */
2173 /* Send the packet out on the same socket that related packets are being
2177 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2180 /* Possibly drop this packet, for testing purposes */
2181 if ((deliveryType == 'D')
2182 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2183 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2184 deliveryType = 'D'; /* Drop the packet */
2186 deliveryType = 'S'; /* Send the packet */
2187 #endif /* RXDEBUG */
2189 /* Loop until the packet is sent. We'd prefer just to use a
2190 * blocking socket, but unfortunately the interface doesn't
2191 * allow us to have the socket block in send mode, and not
2192 * block in receive mode */
2194 waslocked = ISAFS_GLOCK();
2195 #ifdef RX_KERNEL_TRACE
2196 if (ICL_SETACTIVE(afs_iclSetp)) {
2199 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2200 "before osi_NetSend()");
2209 osi_NetSend(socket, &peer->saddr, peer->saddrlen, p->wirevec,
2210 p->niovecs, p->length + RX_HEADER_SIZE,
2212 /* send failed, so let's hurry up the resend, eh? */
2213 MUTEX_ENTER(&rx_stats_mutex);
2214 rx_stats.netSendFailures++;
2215 MUTEX_EXIT(&rx_stats_mutex);
2216 p->retryTime = p->timeSent; /* resend it very soon */
2217 clock_Addmsec(&(p->retryTime),
2218 10 + (((afs_uint32) p->backoff) << 8));
2221 /* Windows is nice -- it can tell us right away that we cannot
2222 * reach this recipient by returning an WSAEHOSTUNREACH error
2223 * code. So, when this happens let's "down" the host NOW so
2224 * we don't sit around waiting for this host to timeout later.
2226 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2227 call->lastReceiveTime = 0;
2229 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2230 /* Linux is nice -- it can tell us right away that we cannot
2231 * reach this recipient by returning an ENETUNREACH error
2232 * code. So, when this happens let's "down" the host NOW so
2233 * we don't sit around waiting for this host to timeout later.
2235 if (call && code == -ENETUNREACH)
2236 call->lastReceiveTime = 0;
2240 #ifdef RX_KERNEL_TRACE
2241 if (ICL_SETACTIVE(afs_iclSetp)) {
2243 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2244 "after osi_NetSend()");
2255 dpf(("%c %d %s: %s.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], rx_AddrStringOf(peer), ntohs(rx_PortOf(peer)), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2257 MUTEX_ENTER(&rx_stats_mutex);
2258 rx_stats.packetsSent[p->header.type - 1]++;
2259 MUTEX_EXIT(&rx_stats_mutex);
2260 MUTEX_ENTER(&peer->peer_lock);
2261 hadd32(peer->bytesSent, p->length);
2262 MUTEX_EXIT(&peer->peer_lock);
2265 /* Send a list of packets to appropriate destination for the specified
2266 * connection. The headers are first encoded and placed in the packets.
2269 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2270 struct rx_packet **list, int len, int istack)
2272 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2275 register struct rx_peer *peer = conn->peer;
2277 struct rx_packet *p = NULL;
2278 struct iovec wirevec[RX_MAXIOVECS];
2279 int i, length, code;
2282 struct rx_jumboHeader *jp;
2284 char deliveryType = 'S';
2287 if (len + 1 > RX_MAXIOVECS) {
2288 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2292 * Stamp the packets in this jumbogram with consecutive serial numbers
2294 MUTEX_ENTER(&conn->conn_data_lock);
2295 serial = conn->serial;
2296 conn->serial += len;
2297 MUTEX_EXIT(&conn->conn_data_lock);
2300 /* This stuff should be revamped, I think, so that most, if not
2301 * all, of the header stuff is always added here. We could
2302 * probably do away with the encode/decode routines. XXXXX */
2305 length = RX_HEADER_SIZE;
2306 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2307 wirevec[0].iov_len = RX_HEADER_SIZE;
2308 for (i = 0; i < len; i++) {
2311 /* The whole 3.5 jumbogram scheme relies on packets fitting
2312 * in a single packet buffer. */
2313 if (p->niovecs > 2) {
2314 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2317 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2320 if (p->length != RX_JUMBOBUFFERSIZE) {
2321 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2323 p->header.flags |= RX_JUMBO_PACKET;
2324 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2325 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2327 wirevec[i + 1].iov_len = p->length;
2328 length += p->length;
2330 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2332 /* Convert jumbo packet header to network byte order */
2333 temp = (afs_uint32) (p->header.flags) << 24;
2334 temp |= (afs_uint32) (p->header.spare);
2335 *(afs_uint32 *) jp = htonl(temp);
2337 jp = (struct rx_jumboHeader *)
2338 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2340 /* Stamp each packet with a unique serial number. The serial
2341 * number is maintained on a connection basis because some types
2342 * of security may be based on the serial number of the packet,
2343 * and security is handled on a per authenticated-connection
2345 /* Pre-increment, to guarantee no zero serial number; a zero
2346 * serial number means the packet was never sent. */
2347 p->header.serial = ++serial;
2348 /* This is so we can adjust retransmit time-outs better in the face of
2349 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2351 if (p->firstSerial == 0) {
2352 p->firstSerial = p->header.serial;
2355 /* If an output tracer function is defined, call it with the packet and
2356 * network address. Note this function may modify its arguments. */
2357 if (rx_almostSent) {
2358 int drop = (*rx_almostSent) (p, &peer->saddr);
2359 /* drop packet if return value is non-zero? */
2361 deliveryType = 'D'; /* Drop the packet */
2365 /* Get network byte order header */
2366 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2367 * touch ALL the fields */
2370 /* Send the packet out on the same socket that related packets are being
2374 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2377 /* Possibly drop this packet, for testing purposes */
2378 if ((deliveryType == 'D')
2379 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2380 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2381 deliveryType = 'D'; /* Drop the packet */
2383 deliveryType = 'S'; /* Send the packet */
2384 #endif /* RXDEBUG */
2386 /* Loop until the packet is sent. We'd prefer just to use a
2387 * blocking socket, but unfortunately the interface doesn't
2388 * allow us to have the socket block in send mode, and not
2389 * block in receive mode */
2390 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2391 waslocked = ISAFS_GLOCK();
2392 if (!istack && waslocked)
2396 osi_NetSend(socket, &peer->saddr, peer->saddrlen, &wirevec[0],
2397 len + 1, length, istack)) != 0) {
2398 /* send failed, so let's hurry up the resend, eh? */
2399 MUTEX_ENTER(&rx_stats_mutex);
2400 rx_stats.netSendFailures++;
2401 MUTEX_EXIT(&rx_stats_mutex);
2402 for (i = 0; i < len; i++) {
2404 p->retryTime = p->timeSent; /* resend it very soon */
2405 clock_Addmsec(&(p->retryTime),
2406 10 + (((afs_uint32) p->backoff) << 8));
2409 /* Windows is nice -- it can tell us right away that we cannot
2410 * reach this recipient by returning an WSAEHOSTUNREACH error
2411 * code. So, when this happens let's "down" the host NOW so
2412 * we don't sit around waiting for this host to timeout later.
2414 if (call && code == -1 && errno == WSAEHOSTUNREACH)
2415 call->lastReceiveTime = 0;
2417 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
2418 /* Linux is nice -- it can tell us right away that we cannot
2419 * reach this recipient by returning an ENETUNREACH error
2420 * code. So, when this happens let's "down" the host NOW so
2421 * we don't sit around waiting for this host to timeout later.
2423 if (call && code == -ENETUNREACH)
2424 call->lastReceiveTime = 0;
2427 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2428 if (!istack && waslocked)
2436 dpf(("%c %d %s: %s.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], rx_AddrStringOf(peer), ntohs(rx_PortOf(peer)), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2439 MUTEX_ENTER(&rx_stats_mutex);
2440 rx_stats.packetsSent[p->header.type - 1]++;
2441 MUTEX_EXIT(&rx_stats_mutex);
2442 MUTEX_ENTER(&peer->peer_lock);
2444 hadd32(peer->bytesSent, p->length);
2445 MUTEX_EXIT(&peer->peer_lock);
2449 /* Send a "special" packet to the peer connection. If call is
2450 * specified, then the packet is directed to a specific call channel
2451 * associated with the connection, otherwise it is directed to the
2452 * connection only. Uses optionalPacket if it is supplied, rather than
2453 * allocating a new packet buffer. Nbytes is the length of the data
2454 * portion of the packet. If data is non-null, nbytes of data are
2455 * copied into the packet. Type is the type of the packet, as defined
2456 * in rx.h. Bug: there's a lot of duplication between this and other
2457 * routines. This needs to be cleaned up. */
2459 rxi_SendSpecial(register struct rx_call *call,
2460 register struct rx_connection *conn,
2461 struct rx_packet *optionalPacket, int type, char *data,
2462 int nbytes, int istack)
2464 /* Some of the following stuff should be common code for all
2465 * packet sends (it's repeated elsewhere) */
2466 register struct rx_packet *p;
2468 int savelen = 0, saven = 0;
2469 int channel, callNumber;
2471 channel = call->channel;
2472 callNumber = *call->callNumber;
2473 /* BUSY packets refer to the next call on this connection */
2474 if (type == RX_PACKET_TYPE_BUSY) {
2483 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2485 osi_Panic("rxi_SendSpecial failure");
2492 p->header.serviceId = conn->serviceId;
2493 p->header.securityIndex = conn->securityIndex;
2494 p->header.cid = (conn->cid | channel);
2495 p->header.callNumber = callNumber;
2497 p->header.epoch = conn->epoch;
2498 p->header.type = type;
2499 p->header.flags = 0;
2500 if (conn->type == RX_CLIENT_CONNECTION)
2501 p->header.flags |= RX_CLIENT_INITIATED;
2503 rx_packetwrite(p, 0, nbytes, data);
2505 for (i = 1; i < p->niovecs; i++) {
2506 if (nbytes <= p->wirevec[i].iov_len) {
2507 savelen = p->wirevec[i].iov_len;
2509 p->wirevec[i].iov_len = nbytes;
2510 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2512 nbytes -= p->wirevec[i].iov_len;
2516 rxi_Send(call, p, istack);
2518 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2519 if (saven) { /* means we truncated the packet above. We probably don't */
2520 /* really need to do this, but it seems safer this way, given that */
2521 /* sneaky optionalPacket... */
2522 p->wirevec[i - 1].iov_len = savelen;
2525 if (!optionalPacket)
2527 return optionalPacket;
2531 /* Encode the packet's header (from the struct header in the packet to
2532 * the net byte order representation in the wire representation of the
2533 * packet, which is what is actually sent out on the wire) */
2535 rxi_EncodePacketHeader(register struct rx_packet *p)
2537 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2539 memset((char *)buf, 0, RX_HEADER_SIZE);
2540 *buf++ = htonl(p->header.epoch);
2541 *buf++ = htonl(p->header.cid);
2542 *buf++ = htonl(p->header.callNumber);
2543 *buf++ = htonl(p->header.seq);
2544 *buf++ = htonl(p->header.serial);
2545 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2546 | (((afs_uint32) p->header.flags) << 16)
2547 | (p->header.userStatus << 8) | p->header.securityIndex);
2548 /* Note: top 16 bits of this next word were reserved */
2549 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2552 /* Decode the packet's header (from net byte order to a struct header) */
2554 rxi_DecodePacketHeader(register struct rx_packet *p)
2556 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2559 p->header.epoch = ntohl(*buf);
2561 p->header.cid = ntohl(*buf);
2563 p->header.callNumber = ntohl(*buf);
2565 p->header.seq = ntohl(*buf);
2567 p->header.serial = ntohl(*buf);
2573 /* C will truncate byte fields to bytes for me */
2574 p->header.type = temp >> 24;
2575 p->header.flags = temp >> 16;
2576 p->header.userStatus = temp >> 8;
2577 p->header.securityIndex = temp >> 0;
2582 p->header.serviceId = (temp & 0xffff);
2583 p->header.spare = temp >> 16;
2584 /* Note: top 16 bits of this last word are the security checksum */
2588 rxi_PrepareSendPacket(register struct rx_call *call,
2589 register struct rx_packet *p, register int last)
2591 register struct rx_connection *conn = call->conn;
2593 ssize_t len; /* len must be a signed type; it can go negative */
2595 p->flags &= ~RX_PKTFLAG_ACKED;
2596 p->header.cid = (conn->cid | call->channel);
2597 p->header.serviceId = conn->serviceId;
2598 p->header.securityIndex = conn->securityIndex;
2600 /* No data packets on call 0. Where do these come from? */
2601 if (*call->callNumber == 0)
2602 *call->callNumber = 1;
2604 p->header.callNumber = *call->callNumber;
2605 p->header.seq = call->tnext++;
2606 p->header.epoch = conn->epoch;
2607 p->header.type = RX_PACKET_TYPE_DATA;
2608 p->header.flags = 0;
2609 p->header.spare = 0;
2610 if (conn->type == RX_CLIENT_CONNECTION)
2611 p->header.flags |= RX_CLIENT_INITIATED;
2614 p->header.flags |= RX_LAST_PACKET;
2616 clock_Zero(&p->retryTime); /* Never yet transmitted */
2617 clock_Zero(&p->firstSent); /* Never yet transmitted */
2618 p->header.serial = 0; /* Another way of saying never transmitted... */
2621 /* Now that we're sure this is the last data on the call, make sure
2622 * that the "length" and the sum of the iov_lens matches. */
2623 len = p->length + call->conn->securityHeaderSize;
2625 for (i = 1; i < p->niovecs && len > 0; i++) {
2626 len -= p->wirevec[i].iov_len;
2629 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2636 /* Free any extra elements in the wirevec */
2637 for (j = MAX(2, i), nb = p->niovecs - j; j < p->niovecs; j++) {
2638 queue_Append(&q,RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2641 rxi_FreePackets(nb, &q);
2644 p->wirevec[i - 1].iov_len += len;
2646 RXS_PreparePacket(conn->securityObject, call, p);
2649 /* Given an interface MTU size, calculate an adjusted MTU size that
2650 * will make efficient use of the RX buffers when the peer is sending
2651 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2653 rxi_AdjustIfMTU(int mtu)
2658 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2659 if (mtu <= adjMTU) {
2666 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2667 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2670 /* Given an interface MTU size, and the peer's advertised max receive
2671 * size, calculate an adjisted maxMTU size that makes efficient use
2672 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2674 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2676 int maxMTU = mtu * rxi_nSendFrags;
2677 maxMTU = MIN(maxMTU, peerMaxMTU);
2678 return rxi_AdjustIfMTU(maxMTU);
2681 /* Given a packet size, figure out how many datagram packet will fit.
2682 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2683 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2684 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2686 rxi_AdjustDgramPackets(int frags, int mtu)
2689 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2692 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2693 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2694 /* subtract the size of the first and last packets */
2695 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2699 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));