2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #include <sys/socket.h>
69 #include <netinet/in.h>
70 #endif /* AFS_NT40_ENV */
71 #include "rx_xmit_nt.h"
74 #include <sys/socket.h>
75 #include <netinet/in.h>
81 #include <sys/sysmacros.h>
83 #include "rx_packet.h"
84 #include "rx_globals.h"
99 /* rxdb_fileID is used to identify the lock location, along with line#. */
100 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
101 #endif /* RX_LOCKS_DB */
102 struct rx_packet *rx_mallocedP = 0;
104 extern char cml_version_number[];
105 extern int (*rx_almostSent) ();
107 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
108 afs_int32 ahost, short aport,
111 /* some rules about packets:
112 * 1. When a packet is allocated, the final iov_buf contains room for
113 * a security trailer, but iov_len masks that fact. If the security
114 * package wants to add the trailer, it may do so, and then extend
115 * iov_len appropriately. For this reason, packet's niovecs and
116 * iov_len fields should be accurate before calling PreparePacket.
120 * all packet buffers (iov_base) are integral multiples of
122 * offset is an integral multiple of the word size.
125 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
129 for (l = 0, i = 1; i < packet->niovecs; i++) {
130 if (l + packet->wirevec[i].iov_len > offset) {
132 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
135 l += packet->wirevec[i].iov_len;
142 * all packet buffers (iov_base) are integral multiples of the word size.
143 * offset is an integral multiple of the word size.
146 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
150 for (l = 0, i = 1; i < packet->niovecs; i++) {
151 if (l + packet->wirevec[i].iov_len > offset) {
152 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
153 (offset - l))) = data;
156 l += packet->wirevec[i].iov_len;
163 * all packet buffers (iov_base) are integral multiples of the
165 * offset is an integral multiple of the word size.
167 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
170 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
173 unsigned int i, j, l, r;
174 for (l = 0, i = 1; i < packet->niovecs; i++) {
175 if (l + packet->wirevec[i].iov_len > offset) {
178 l += packet->wirevec[i].iov_len;
181 /* i is the iovec which contains the first little bit of data in which we
182 * are interested. l is the total length of everything prior to this iovec.
183 * j is the number of bytes we can safely copy out of this iovec.
186 while ((resid > 0) && (i < packet->niovecs)) {
187 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
188 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
190 l += packet->wirevec[i].iov_len;
194 return (resid ? (r - resid) : r);
199 * all packet buffers (iov_base) are integral multiples of the
201 * offset is an integral multiple of the word size.
204 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
209 for (l = 0, i = 1; i < packet->niovecs; i++) {
210 if (l + packet->wirevec[i].iov_len > offset) {
213 l += packet->wirevec[i].iov_len;
216 /* i is the iovec which contains the first little bit of data in which we
217 * are interested. l is the total length of everything prior to this iovec.
218 * j is the number of bytes we can safely copy out of this iovec.
221 while ((resid > 0) && (i < RX_MAXWVECS)) {
222 if (i >= packet->niovecs)
223 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
226 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
227 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
230 l += packet->wirevec[i].iov_len;
234 return (resid ? (r - resid) : r);
237 static struct rx_packet *
244 MUTEX_ENTER(&rx_freePktQ_lock);
247 if (rxi_OverQuota(class)) {
249 rxi_NeedMorePackets = TRUE;
250 MUTEX_ENTER(&rx_stats_mutex);
252 case RX_PACKET_CLASS_RECEIVE:
253 rx_stats.receivePktAllocFailures++;
255 case RX_PACKET_CLASS_SEND:
256 rx_stats.sendPktAllocFailures++;
258 case RX_PACKET_CLASS_SPECIAL:
259 rx_stats.specialPktAllocFailures++;
261 case RX_PACKET_CLASS_RECV_CBUF:
262 rx_stats.receiveCbufPktAllocFailures++;
264 case RX_PACKET_CLASS_SEND_CBUF:
265 rx_stats.sendCbufPktAllocFailures++;
268 MUTEX_EXIT(&rx_stats_mutex);
272 if (queue_IsEmpty(&rx_freePacketQueue)) {
274 rxi_NeedMorePackets = TRUE;
278 if (queue_IsEmpty(&rx_freePacketQueue)) {
279 rxi_MorePacketsNoLock(rx_initSendWindow);
284 c = queue_First(&rx_freePacketQueue, rx_packet);
286 if (!(c->flags & RX_PKTFLAG_FREE))
287 osi_Panic("rxi_AllocPacket: packet not free\n");
288 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
294 MUTEX_EXIT(&rx_freePktQ_lock);
301 * Free a packet currently used as a continuation buffer
304 rxi_freeCBuf(struct rx_packet *c)
309 MUTEX_ENTER(&rx_freePktQ_lock);
311 rxi_FreePacketNoLock(c);
312 /* Wakeup anyone waiting for packets */
315 MUTEX_EXIT(&rx_freePktQ_lock);
319 /* this one is kind of awful.
320 * In rxkad, the packet has been all shortened, and everything, ready for
321 * sending. All of a sudden, we discover we need some of that space back.
322 * This isn't terribly general, because it knows that the packets are only
323 * rounded up to the EBS (userdata + security header).
326 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
330 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
331 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
332 p->wirevec[i].iov_len += nb;
336 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
337 p->wirevec[i].iov_len += nb;
345 /* get sufficient space to store nb bytes of data (or more), and hook
346 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
347 * returns the number of bytes >0 which it failed to come up with.
348 * Don't need to worry about locking on packet, since only
349 * one thread can manipulate one at a time. Locking on continution
350 * packets is handled by allocCBuf */
351 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
353 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
357 for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
358 register struct rx_packet *cb;
359 if ((cb = allocCBuf(class))) {
360 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
361 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
362 nb -= RX_CBUFFERSIZE;
363 p->length += RX_CBUFFERSIZE;
372 /* Add more packet buffers */
374 rxi_MorePackets(int apackets)
376 struct rx_packet *p, *e;
380 getme = apackets * sizeof(struct rx_packet);
381 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
383 PIN(p, getme); /* XXXXX */
384 memset((char *)p, 0, getme);
387 MUTEX_ENTER(&rx_freePktQ_lock);
389 for (e = p + apackets; p < e; p++) {
390 p->wirevec[0].iov_base = (char *)(p->wirehead);
391 p->wirevec[0].iov_len = RX_HEADER_SIZE;
392 p->wirevec[1].iov_base = (char *)(p->localdata);
393 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
394 p->flags |= RX_PKTFLAG_FREE;
397 queue_Append(&rx_freePacketQueue, p);
399 rx_nFreePackets += apackets;
400 rxi_NeedMorePackets = FALSE;
404 MUTEX_EXIT(&rx_freePktQ_lock);
409 /* Add more packet buffers */
411 rxi_MorePacketsNoLock(int apackets)
413 struct rx_packet *p, *e;
416 /* allocate enough packets that 1/4 of the packets will be able
417 * to hold maximal amounts of data */
418 apackets += (apackets / 4)
419 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
420 getme = apackets * sizeof(struct rx_packet);
421 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
423 memset((char *)p, 0, getme);
425 for (e = p + apackets; p < e; p++) {
426 p->wirevec[0].iov_base = (char *)(p->wirehead);
427 p->wirevec[0].iov_len = RX_HEADER_SIZE;
428 p->wirevec[1].iov_base = (char *)(p->localdata);
429 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
430 p->flags |= RX_PKTFLAG_FREE;
433 queue_Append(&rx_freePacketQueue, p);
435 rx_nFreePackets += apackets;
436 rxi_NeedMorePackets = FALSE;
442 rxi_FreeAllPackets(void)
444 /* must be called at proper interrupt level, etcetera */
445 /* MTUXXX need to free all Packets */
446 osi_Free(rx_mallocedP,
447 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
448 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
451 /* Allocate more packets iff we need more continuation buffers */
452 /* In kernel, can't page in memory with interrupts disabled, so we
453 * don't use the event mechanism. */
455 rx_CheckPackets(void)
457 if (rxi_NeedMorePackets) {
458 rxi_MorePackets(rx_initSendWindow);
462 /* In the packet freeing routine below, the assumption is that
463 we want all of the packets to be used equally frequently, so that we
464 don't get packet buffers paging out. It would be just as valid to
465 assume that we DO want them to page out if not many are being used.
466 In any event, we assume the former, and append the packets to the end
468 /* This explanation is bogus. The free list doesn't remain in any kind of
469 useful order for afs_int32: the packets in use get pretty much randomly scattered
470 across all the pages. In order to permit unused {packets,bufs} to page out, they
471 must be stored so that packets which are adjacent in memory are adjacent in the
472 free list. An array springs rapidly to mind.
475 /* Actually free the packet p. */
477 rxi_FreePacketNoLock(struct rx_packet *p)
479 dpf(("Free %x\n", (int)p));
481 if (p->flags & RX_PKTFLAG_FREE)
482 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
484 p->flags |= RX_PKTFLAG_FREE;
485 queue_Append(&rx_freePacketQueue, p);
489 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
491 struct iovec *iov, *end;
493 if (first != 1) /* MTUXXX */
494 osi_Panic("FreeDataBufs 1: first must be 1");
495 iov = &p->wirevec[1];
496 end = iov + (p->niovecs - 1);
497 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
498 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
499 for (iov++; iov < end; iov++) {
501 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
502 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
510 int rxi_nBadIovecs = 0;
512 /* rxi_RestoreDataBufs
514 * Restore the correct sizes to the iovecs. Called when reusing a packet
515 * for reading off the wire.
518 rxi_RestoreDataBufs(struct rx_packet *p)
521 struct iovec *iov = &p->wirevec[2];
523 p->wirevec[0].iov_base = (char *)(p->wirehead);
524 p->wirevec[0].iov_len = RX_HEADER_SIZE;
525 p->wirevec[1].iov_base = (char *)(p->localdata);
526 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
528 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
529 if (!iov->iov_base) {
534 iov->iov_len = RX_CBUFFERSIZE;
539 rxi_TrimDataBufs(struct rx_packet *p, int first)
542 struct iovec *iov, *end;
546 osi_Panic("TrimDataBufs 1: first must be 1");
548 /* Skip over continuation buffers containing message data */
549 iov = &p->wirevec[2];
550 end = iov + (p->niovecs - 2);
551 length = p->length - p->wirevec[1].iov_len;
552 for (; iov < end && length > 0; iov++) {
554 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
555 length -= iov->iov_len;
558 /* iov now points to the first empty data buffer. */
563 MUTEX_ENTER(&rx_freePktQ_lock);
565 for (; iov < end; iov++) {
567 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
568 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
573 MUTEX_EXIT(&rx_freePktQ_lock);
579 /* Free the packet p. P is assumed not to be on any queue, i.e.
580 * remove it yourself first if you call this routine. */
582 rxi_FreePacket(struct rx_packet *p)
587 MUTEX_ENTER(&rx_freePktQ_lock);
589 rxi_FreeDataBufsNoLock(p, 1);
590 rxi_FreePacketNoLock(p);
591 /* Wakeup anyone waiting for packets */
594 MUTEX_EXIT(&rx_freePktQ_lock);
599 /* rxi_AllocPacket sets up p->length so it reflects the number of
600 * bytes in the packet at this point, **not including** the header.
601 * The header is absolutely necessary, besides, this is the way the
602 * length field is usually used */
604 rxi_AllocPacketNoLock(int class)
606 register struct rx_packet *p;
609 if (rxi_OverQuota(class)) {
610 rxi_NeedMorePackets = TRUE;
611 MUTEX_ENTER(&rx_stats_mutex);
613 case RX_PACKET_CLASS_RECEIVE:
614 rx_stats.receivePktAllocFailures++;
616 case RX_PACKET_CLASS_SEND:
617 rx_stats.sendPktAllocFailures++;
619 case RX_PACKET_CLASS_SPECIAL:
620 rx_stats.specialPktAllocFailures++;
622 case RX_PACKET_CLASS_RECV_CBUF:
623 rx_stats.receiveCbufPktAllocFailures++;
625 case RX_PACKET_CLASS_SEND_CBUF:
626 rx_stats.sendCbufPktAllocFailures++;
629 MUTEX_EXIT(&rx_stats_mutex);
630 return (struct rx_packet *)0;
634 MUTEX_ENTER(&rx_stats_mutex);
635 rx_stats.packetRequests++;
636 MUTEX_EXIT(&rx_stats_mutex);
639 if (queue_IsEmpty(&rx_freePacketQueue))
640 osi_Panic("rxi_AllocPacket error");
642 if (queue_IsEmpty(&rx_freePacketQueue))
643 rxi_MorePacketsNoLock(rx_initSendWindow);
647 p = queue_First(&rx_freePacketQueue, rx_packet);
648 if (!(p->flags & RX_PKTFLAG_FREE))
649 osi_Panic("rxi_AllocPacket: packet not free\n");
651 dpf(("Alloc %x, class %d\n", (int)p, class));
654 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
657 /* have to do this here because rx_FlushWrite fiddles with the iovs in
658 * order to truncate outbound packets. In the near future, may need
659 * to allocate bufs from a static pool here, and/or in AllocSendPacket
661 p->wirevec[0].iov_base = (char *)(p->wirehead);
662 p->wirevec[0].iov_len = RX_HEADER_SIZE;
663 p->wirevec[1].iov_base = (char *)(p->localdata);
664 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
666 p->length = RX_FIRSTBUFFERSIZE;
671 rxi_AllocPacket(int class)
673 register struct rx_packet *p;
675 MUTEX_ENTER(&rx_freePktQ_lock);
676 p = rxi_AllocPacketNoLock(class);
677 MUTEX_EXIT(&rx_freePktQ_lock);
681 /* This guy comes up with as many buffers as it {takes,can get} given
682 * the MTU for this call. It also sets the packet length before
683 * returning. caution: this is often called at NETPRI
684 * Called with call locked.
687 rxi_AllocSendPacket(register struct rx_call *call, int want)
689 register struct rx_packet *p = (struct rx_packet *)0;
691 register unsigned delta;
694 mud = call->MTU - RX_HEADER_SIZE;
696 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
697 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
699 while (!(call->error)) {
700 MUTEX_ENTER(&rx_freePktQ_lock);
701 /* if an error occurred, or we get the packet we want, we're done */
702 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
703 MUTEX_EXIT(&rx_freePktQ_lock);
706 want = MIN(want, mud);
708 if ((unsigned)want > p->length)
709 (void)rxi_AllocDataBuf(p, (want - p->length),
710 RX_PACKET_CLASS_SEND_CBUF);
712 if ((unsigned)p->length > mud)
715 if (delta >= p->length) {
724 /* no error occurred, and we didn't get a packet, so we sleep.
725 * At this point, we assume that packets will be returned
726 * sooner or later, as packets are acknowledged, and so we
729 call->flags |= RX_CALL_WAIT_PACKETS;
730 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
731 MUTEX_EXIT(&call->lock);
732 rx_waitingForPackets = 1;
734 #ifdef RX_ENABLE_LOCKS
735 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
737 osi_rxSleep(&rx_waitingForPackets);
739 MUTEX_EXIT(&rx_freePktQ_lock);
740 MUTEX_ENTER(&call->lock);
741 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
742 call->flags &= ~RX_CALL_WAIT_PACKETS;
751 /* count the number of used FDs */
753 CountFDs(register int amax)
756 register int i, code;
760 for (i = 0; i < amax; i++) {
761 code = fstat(i, &tstat);
770 #define CountFDs(amax) amax
774 #if !defined(KERNEL) || defined(UKERNEL)
776 /* This function reads a single packet from the interface into the
777 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
778 * (host,port) of the sender are stored in the supplied variables, and
779 * the data length of the packet is stored in the packet structure.
780 * The header is decoded. */
782 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
785 struct sockaddr_in from;
788 register afs_int32 tlen, savelen;
790 rx_computelen(p, tlen);
791 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
793 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
794 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
795 * it once in order to avoid races. */
798 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
806 /* Extend the last iovec for padding, it's just to make sure that the
807 * read doesn't return more data than we expect, and is done to get around
808 * our problems caused by the lack of a length field in the rx header.
809 * Use the extra buffer that follows the localdata in each packet
811 savelen = p->wirevec[p->niovecs - 1].iov_len;
812 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
814 memset((char *)&msg, 0, sizeof(msg));
815 msg.msg_name = (char *)&from;
816 msg.msg_namelen = sizeof(struct sockaddr_in);
817 msg.msg_iov = p->wirevec;
818 msg.msg_iovlen = p->niovecs;
819 nbytes = rxi_Recvmsg(socket, &msg, 0);
821 /* restore the vec to its correct state */
822 p->wirevec[p->niovecs - 1].iov_len = savelen;
824 p->length = (nbytes - RX_HEADER_SIZE);
825 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
827 rxi_MorePackets(rx_initSendWindow);
829 else if (nbytes < 0 && errno == EWOULDBLOCK) {
830 MUTEX_ENTER(&rx_stats_mutex);
831 rx_stats.noPacketOnRead++;
832 MUTEX_EXIT(&rx_stats_mutex);
836 MUTEX_ENTER(&rx_stats_mutex);
837 rx_stats.bogusPacketOnRead++;
838 rx_stats.bogusHost = from.sin_addr.s_addr;
839 MUTEX_EXIT(&rx_stats_mutex);
840 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
841 from.sin_port, nbytes));
845 /* Extract packet header. */
846 rxi_DecodePacketHeader(p);
848 *host = from.sin_addr.s_addr;
849 *port = from.sin_port;
850 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
851 struct rx_peer *peer;
852 MUTEX_ENTER(&rx_stats_mutex);
853 rx_stats.packetsRead[p->header.type - 1]++;
854 MUTEX_EXIT(&rx_stats_mutex);
856 * Try to look up this peer structure. If it doesn't exist,
857 * don't create a new one -
858 * we don't keep count of the bytes sent/received if a peer
859 * structure doesn't already exist.
861 * The peer/connection cleanup code assumes that there is 1 peer
862 * per connection. If we actually created a peer structure here
863 * and this packet was an rxdebug packet, the peer structure would
864 * never be cleaned up.
866 peer = rxi_FindPeer(*host, *port, 0, 0);
868 MUTEX_ENTER(&peer->peer_lock);
869 hadd32(peer->bytesReceived, p->length);
870 MUTEX_EXIT(&peer->peer_lock);
874 /* Free any empty packet buffers at the end of this packet */
875 rxi_TrimDataBufs(p, 1);
881 #endif /* !KERNEL || UKERNEL */
883 /* This function splits off the first packet in a jumbo packet.
884 * As of AFS 3.5, jumbograms contain more than one fixed size
885 * packet, and the RX_JUMBO_PACKET flag is set in all but the
886 * last packet header. All packets (except the last) are padded to
887 * fall on RX_CBUFFERSIZE boundaries.
888 * HACK: We store the length of the first n-1 packets in the
889 * last two pad bytes. */
892 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
895 struct rx_packet *np;
896 struct rx_jumboHeader *jp;
902 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
903 * bytes in length. All but the first packet are preceded by
904 * an abbreviated four byte header. The length of the last packet
905 * is calculated from the size of the jumbogram. */
906 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
908 if ((int)p->length < length) {
909 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
912 niov = p->niovecs - 2;
914 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
917 iov = &p->wirevec[2];
918 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
920 /* Get a pointer to the abbreviated packet header */
921 jp = (struct rx_jumboHeader *)
922 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
924 /* Set up the iovecs for the next packet */
925 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
926 np->wirevec[0].iov_len = sizeof(struct rx_header);
927 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
928 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
929 np->niovecs = niov + 1;
930 for (i = 2, iov++; i <= niov; i++, iov++) {
931 np->wirevec[i] = *iov;
933 np->length = p->length - length;
934 p->length = RX_JUMBOBUFFERSIZE;
937 /* Convert the jumbo packet header to host byte order */
938 temp = ntohl(*(afs_uint32 *) jp);
939 jp->flags = (u_char) (temp >> 24);
940 jp->cksum = (u_short) (temp);
942 /* Fill in the packet header */
943 np->header = p->header;
944 np->header.serial = p->header.serial + 1;
945 np->header.seq = p->header.seq + 1;
946 np->header.flags = jp->flags;
947 np->header.spare = jp->cksum;
953 /* Send a udp datagram */
955 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
956 int length, int istack)
960 memset(&msg, 0, sizeof(msg));
962 msg.msg_iovlen = nvecs;
964 msg.msg_namelen = sizeof(struct sockaddr_in);
966 rxi_Sendmsg(socket, &msg, 0);
970 #elif !defined(UKERNEL)
972 * message receipt is done in rxk_input or rx_put.
975 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
977 * Copy an mblock to the contiguous area pointed to by cp.
978 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
979 * but it doesn't really.
980 * Returns the number of bytes not transferred.
981 * The message is NOT changed.
984 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
988 for (; mp && len > 0; mp = mp->b_cont) {
989 if (mp->b_datap->db_type != M_DATA) {
992 n = MIN(len, (mp->b_wptr - mp->b_rptr));
993 memcpy(cp, (char *)mp->b_rptr, n);
1001 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1002 * but it doesn't really.
1003 * This sucks, anyway, do it like m_cpy.... below
1006 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1009 register int m, n, o, t, i;
1011 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1012 if (mp->b_datap->db_type != M_DATA) {
1015 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1021 t = iovs[i].iov_len;
1024 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1034 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1035 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1037 #if !defined(AFS_LINUX20_ENV)
1039 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1042 unsigned int l1, l2, i, t;
1044 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1045 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1048 if (m->m_len <= off) {
1058 p1 = mtod(m, caddr_t) + off;
1059 l1 = m->m_len - off;
1061 p2 = iovs[0].iov_base;
1062 l2 = iovs[0].iov_len;
1065 t = MIN(l1, MIN(l2, (unsigned int)len));
1076 p1 = mtod(m, caddr_t);
1082 p2 = iovs[i].iov_base;
1083 l2 = iovs[i].iov_len;
1091 #endif /* AFS_SUN5_ENV */
1093 #if !defined(AFS_LINUX20_ENV)
1095 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1096 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1102 struct rx_packet *phandle;
1103 int hdr_len, data_len;
1108 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1115 #endif /*KERNEL && !UKERNEL */
1118 /* send a response to a debug packet */
1121 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1122 afs_int32 ahost, short aport, int istack)
1124 struct rx_debugIn tin;
1126 struct rx_serverQueueEntry *np, *nqe;
1129 * Only respond to client-initiated Rx debug packets,
1130 * and clear the client flag in the response.
1132 if (ap->header.flags & RX_CLIENT_INITIATED) {
1133 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1134 rxi_EncodePacketHeader(ap);
1139 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1140 /* all done with packet, now set length to the truth, so we can
1141 * reuse this packet */
1142 rx_computelen(ap, ap->length);
1144 tin.type = ntohl(tin.type);
1145 tin.index = ntohl(tin.index);
1147 case RX_DEBUGI_GETSTATS:{
1148 struct rx_debugStats tstat;
1150 /* get basic stats */
1151 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1152 tstat.version = RX_DEBUGI_VERSION;
1153 #ifndef RX_ENABLE_LOCKS
1154 tstat.waitingForPackets = rx_waitingForPackets;
1156 tstat.nFreePackets = htonl(rx_nFreePackets);
1157 tstat.callsExecuted = htonl(rxi_nCalls);
1158 tstat.packetReclaims = htonl(rx_packetReclaims);
1159 tstat.usedFDs = CountFDs(64);
1160 tstat.nWaiting = htonl(rx_nWaiting);
1161 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1163 tstat.idleThreads = htonl(tstat.idleThreads);
1164 tl = sizeof(struct rx_debugStats) - ap->length;
1166 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1169 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1171 ap->length = sizeof(struct rx_debugStats);
1172 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1173 rx_computelen(ap, ap->length);
1178 case RX_DEBUGI_GETALLCONN:
1179 case RX_DEBUGI_GETCONN:{
1181 register struct rx_connection *tc;
1182 struct rx_call *tcall;
1183 struct rx_debugConn tconn;
1184 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1187 tl = sizeof(struct rx_debugConn) - ap->length;
1189 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1193 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1194 /* get N'th (maybe) "interesting" connection info */
1195 for (i = 0; i < rx_hashTableSize; i++) {
1196 #if !defined(KERNEL)
1197 /* the time complexity of the algorithm used here
1198 * exponentially increses with the number of connections.
1200 #ifdef AFS_PTHREAD_ENV
1206 MUTEX_ENTER(&rx_connHashTable_lock);
1207 /* We might be slightly out of step since we are not
1208 * locking each call, but this is only debugging output.
1210 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1211 if ((all || rxi_IsConnInteresting(tc))
1212 && tin.index-- <= 0) {
1213 tconn.host = tc->peer->host;
1214 tconn.port = tc->peer->port;
1215 tconn.cid = htonl(tc->cid);
1216 tconn.epoch = htonl(tc->epoch);
1217 tconn.serial = htonl(tc->serial);
1218 for (j = 0; j < RX_MAXCALLS; j++) {
1219 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1220 if ((tcall = tc->call[j])) {
1221 tconn.callState[j] = tcall->state;
1222 tconn.callMode[j] = tcall->mode;
1223 tconn.callFlags[j] = tcall->flags;
1224 if (queue_IsNotEmpty(&tcall->rq))
1225 tconn.callOther[j] |= RX_OTHER_IN;
1226 if (queue_IsNotEmpty(&tcall->tq))
1227 tconn.callOther[j] |= RX_OTHER_OUT;
1229 tconn.callState[j] = RX_STATE_NOTINIT;
1232 tconn.natMTU = htonl(tc->peer->natMTU);
1233 tconn.error = htonl(tc->error);
1234 tconn.flags = tc->flags;
1235 tconn.type = tc->type;
1236 tconn.securityIndex = tc->securityIndex;
1237 if (tc->securityObject) {
1238 RXS_GetStats(tc->securityObject, tc,
1240 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1241 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1244 DOHTONL(packetsReceived);
1245 DOHTONL(packetsSent);
1246 DOHTONL(bytesReceived);
1250 sizeof(tconn.secStats.spares) /
1255 sizeof(tconn.secStats.sparel) /
1256 sizeof(afs_int32); i++)
1260 MUTEX_EXIT(&rx_connHashTable_lock);
1261 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1264 ap->length = sizeof(struct rx_debugConn);
1265 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1271 MUTEX_EXIT(&rx_connHashTable_lock);
1273 /* if we make it here, there are no interesting packets */
1274 tconn.cid = htonl(0xffffffff); /* means end */
1275 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1278 ap->length = sizeof(struct rx_debugConn);
1279 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1285 * Pass back all the peer structures we have available
1288 case RX_DEBUGI_GETPEER:{
1290 register struct rx_peer *tp;
1291 struct rx_debugPeer tpeer;
1294 tl = sizeof(struct rx_debugPeer) - ap->length;
1296 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1300 memset((char *)&tpeer, 0, sizeof(tpeer));
1301 for (i = 0; i < rx_hashTableSize; i++) {
1302 #if !defined(KERNEL)
1303 /* the time complexity of the algorithm used here
1304 * exponentially increses with the number of peers.
1306 * Yielding after processing each hash table entry
1307 * and dropping rx_peerHashTable_lock.
1308 * also increases the risk that we will miss a new
1309 * entry - but we are willing to live with this
1310 * limitation since this is meant for debugging only
1312 #ifdef AFS_PTHREAD_ENV
1318 MUTEX_ENTER(&rx_peerHashTable_lock);
1319 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1320 if (tin.index-- <= 0) {
1321 tpeer.host = tp->host;
1322 tpeer.port = tp->port;
1323 tpeer.ifMTU = htons(tp->ifMTU);
1324 tpeer.idleWhen = htonl(tp->idleWhen);
1325 tpeer.refCount = htons(tp->refCount);
1326 tpeer.burstSize = tp->burstSize;
1327 tpeer.burst = tp->burst;
1328 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1329 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1330 tpeer.rtt = htonl(tp->rtt);
1331 tpeer.rtt_dev = htonl(tp->rtt_dev);
1332 tpeer.timeout.sec = htonl(tp->timeout.sec);
1333 tpeer.timeout.usec = htonl(tp->timeout.usec);
1334 tpeer.nSent = htonl(tp->nSent);
1335 tpeer.reSends = htonl(tp->reSends);
1336 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1337 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1338 tpeer.rateFlag = htonl(tp->rateFlag);
1339 tpeer.natMTU = htons(tp->natMTU);
1340 tpeer.maxMTU = htons(tp->maxMTU);
1341 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1342 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1343 tpeer.MTU = htons(tp->MTU);
1344 tpeer.cwind = htons(tp->cwind);
1345 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1346 tpeer.congestSeq = htons(tp->congestSeq);
1347 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1348 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1349 tpeer.bytesReceived.high =
1350 htonl(tp->bytesReceived.high);
1351 tpeer.bytesReceived.low =
1352 htonl(tp->bytesReceived.low);
1354 MUTEX_EXIT(&rx_peerHashTable_lock);
1355 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1358 ap->length = sizeof(struct rx_debugPeer);
1359 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1365 MUTEX_EXIT(&rx_peerHashTable_lock);
1367 /* if we make it here, there are no interesting packets */
1368 tpeer.host = htonl(0xffffffff); /* means end */
1369 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1372 ap->length = sizeof(struct rx_debugPeer);
1373 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1378 case RX_DEBUGI_RXSTATS:{
1382 tl = sizeof(rx_stats) - ap->length;
1384 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1388 /* Since its all int32s convert to network order with a loop. */
1389 MUTEX_ENTER(&rx_stats_mutex);
1390 s = (afs_int32 *) & rx_stats;
1391 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1392 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1395 ap->length = sizeof(rx_stats);
1396 MUTEX_EXIT(&rx_stats_mutex);
1397 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1403 /* error response packet */
1404 tin.type = htonl(RX_DEBUGI_BADTYPE);
1405 tin.index = tin.type;
1406 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1408 ap->length = sizeof(struct rx_debugIn);
1409 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1417 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1418 afs_int32 ahost, short aport, int istack)
1423 * Only respond to client-initiated version requests, and
1424 * clear that flag in the response.
1426 if (ap->header.flags & RX_CLIENT_INITIATED) {
1429 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1430 rxi_EncodePacketHeader(ap);
1431 memset(buf, 0, sizeof(buf));
1432 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1433 rx_packetwrite(ap, 0, 65, buf);
1436 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1444 /* send a debug packet back to the sender */
1446 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1447 afs_int32 ahost, short aport, afs_int32 istack)
1449 struct sockaddr_in taddr;
1455 int waslocked = ISAFS_GLOCK();
1458 taddr.sin_family = AF_INET;
1459 taddr.sin_port = aport;
1460 taddr.sin_addr.s_addr = ahost;
1461 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1462 taddr.sin_len = sizeof(struct sockaddr_in);
1465 /* We need to trim the niovecs. */
1466 nbytes = apacket->length;
1467 for (i = 1; i < apacket->niovecs; i++) {
1468 if (nbytes <= apacket->wirevec[i].iov_len) {
1469 savelen = apacket->wirevec[i].iov_len;
1470 saven = apacket->niovecs;
1471 apacket->wirevec[i].iov_len = nbytes;
1472 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
1474 nbytes -= apacket->wirevec[i].iov_len;
1478 #ifdef RX_KERNEL_TRACE
1479 if (ICL_SETACTIVE(afs_iclSetp)) {
1482 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1483 "before osi_NetSend()");
1491 /* debug packets are not reliably delivered, hence the cast below. */
1492 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1493 apacket->length + RX_HEADER_SIZE, istack);
1495 #ifdef RX_KERNEL_TRACE
1496 if (ICL_SETACTIVE(afs_iclSetp)) {
1498 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1499 "after osi_NetSend()");
1509 if (saven) { /* means we truncated the packet above. */
1510 apacket->wirevec[i - 1].iov_len = savelen;
1511 apacket->niovecs = saven;
1516 /* Send the packet to appropriate destination for the specified
1517 * call. The header is first encoded and placed in the packet.
1520 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1521 struct rx_packet *p, int istack)
1527 struct sockaddr_in addr;
1528 register struct rx_peer *peer = conn->peer;
1531 char deliveryType = 'S';
1533 /* The address we're sending the packet to */
1534 memset(&addr, 0, sizeof(addr));
1535 addr.sin_family = AF_INET;
1536 addr.sin_port = peer->port;
1537 addr.sin_addr.s_addr = peer->host;
1539 /* This stuff should be revamped, I think, so that most, if not
1540 * all, of the header stuff is always added here. We could
1541 * probably do away with the encode/decode routines. XXXXX */
1543 /* Stamp each packet with a unique serial number. The serial
1544 * number is maintained on a connection basis because some types
1545 * of security may be based on the serial number of the packet,
1546 * and security is handled on a per authenticated-connection
1548 /* Pre-increment, to guarantee no zero serial number; a zero
1549 * serial number means the packet was never sent. */
1550 MUTEX_ENTER(&conn->conn_data_lock);
1551 p->header.serial = ++conn->serial;
1552 MUTEX_EXIT(&conn->conn_data_lock);
1553 /* This is so we can adjust retransmit time-outs better in the face of
1554 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1556 if (p->firstSerial == 0) {
1557 p->firstSerial = p->header.serial;
1560 /* If an output tracer function is defined, call it with the packet and
1561 * network address. Note this function may modify its arguments. */
1562 if (rx_almostSent) {
1563 int drop = (*rx_almostSent) (p, &addr);
1564 /* drop packet if return value is non-zero? */
1566 deliveryType = 'D'; /* Drop the packet */
1570 /* Get network byte order header */
1571 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1572 * touch ALL the fields */
1574 /* Send the packet out on the same socket that related packets are being
1578 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1581 /* Possibly drop this packet, for testing purposes */
1582 if ((deliveryType == 'D')
1583 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1584 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1585 deliveryType = 'D'; /* Drop the packet */
1587 deliveryType = 'S'; /* Send the packet */
1588 #endif /* RXDEBUG */
1590 /* Loop until the packet is sent. We'd prefer just to use a
1591 * blocking socket, but unfortunately the interface doesn't
1592 * allow us to have the socket block in send mode, and not
1593 * block in receive mode */
1596 waslocked = ISAFS_GLOCK();
1597 #ifdef RX_KERNEL_TRACE
1598 if (ICL_SETACTIVE(afs_iclSetp)) {
1601 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1602 "before osi_NetSend()");
1611 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1612 p->length + RX_HEADER_SIZE, istack)) != 0) {
1613 /* send failed, so let's hurry up the resend, eh? */
1614 MUTEX_ENTER(&rx_stats_mutex);
1615 rx_stats.netSendFailures++;
1616 MUTEX_EXIT(&rx_stats_mutex);
1617 p->retryTime = p->timeSent; /* resend it very soon */
1618 clock_Addmsec(&(p->retryTime),
1619 10 + (((afs_uint32) p->backoff) << 8));
1621 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1622 /* Linux is nice -- it can tell us right away that we cannot
1623 * reach this recipient by returning an ENETUNREACH error
1624 * code. So, when this happens let's "down" the host NOW so
1625 * we don't sit around waiting for this host to timeout later.
1627 if (call && code == -ENETUNREACH)
1628 call->lastReceiveTime = 0;
1632 #ifdef RX_KERNEL_TRACE
1633 if (ICL_SETACTIVE(afs_iclSetp)) {
1635 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1636 "after osi_NetSend()");
1648 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1650 MUTEX_ENTER(&rx_stats_mutex);
1651 rx_stats.packetsSent[p->header.type - 1]++;
1652 MUTEX_EXIT(&rx_stats_mutex);
1653 MUTEX_ENTER(&peer->peer_lock);
1654 hadd32(peer->bytesSent, p->length);
1655 MUTEX_EXIT(&peer->peer_lock);
1658 /* Send a list of packets to appropriate destination for the specified
1659 * connection. The headers are first encoded and placed in the packets.
1662 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1663 struct rx_packet **list, int len, int istack)
1665 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1668 struct sockaddr_in addr;
1669 register struct rx_peer *peer = conn->peer;
1671 struct rx_packet *p = NULL;
1672 struct iovec wirevec[RX_MAXIOVECS];
1673 int i, length, code;
1676 struct rx_jumboHeader *jp;
1678 char deliveryType = 'S';
1680 /* The address we're sending the packet to */
1681 addr.sin_family = AF_INET;
1682 addr.sin_port = peer->port;
1683 addr.sin_addr.s_addr = peer->host;
1685 if (len + 1 > RX_MAXIOVECS) {
1686 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1690 * Stamp the packets in this jumbogram with consecutive serial numbers
1692 MUTEX_ENTER(&conn->conn_data_lock);
1693 serial = conn->serial;
1694 conn->serial += len;
1695 MUTEX_EXIT(&conn->conn_data_lock);
1698 /* This stuff should be revamped, I think, so that most, if not
1699 * all, of the header stuff is always added here. We could
1700 * probably do away with the encode/decode routines. XXXXX */
1703 length = RX_HEADER_SIZE;
1704 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1705 wirevec[0].iov_len = RX_HEADER_SIZE;
1706 for (i = 0; i < len; i++) {
1709 /* The whole 3.5 jumbogram scheme relies on packets fitting
1710 * in a single packet buffer. */
1711 if (p->niovecs > 2) {
1712 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1715 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1718 if (p->length != RX_JUMBOBUFFERSIZE) {
1719 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1721 p->header.flags |= RX_JUMBO_PACKET;
1722 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1723 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1725 wirevec[i + 1].iov_len = p->length;
1726 length += p->length;
1728 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1730 /* Convert jumbo packet header to network byte order */
1731 temp = (afs_uint32) (p->header.flags) << 24;
1732 temp |= (afs_uint32) (p->header.spare);
1733 *(afs_uint32 *) jp = htonl(temp);
1735 jp = (struct rx_jumboHeader *)
1736 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1738 /* Stamp each packet with a unique serial number. The serial
1739 * number is maintained on a connection basis because some types
1740 * of security may be based on the serial number of the packet,
1741 * and security is handled on a per authenticated-connection
1743 /* Pre-increment, to guarantee no zero serial number; a zero
1744 * serial number means the packet was never sent. */
1745 p->header.serial = ++serial;
1746 /* This is so we can adjust retransmit time-outs better in the face of
1747 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1749 if (p->firstSerial == 0) {
1750 p->firstSerial = p->header.serial;
1753 /* If an output tracer function is defined, call it with the packet and
1754 * network address. Note this function may modify its arguments. */
1755 if (rx_almostSent) {
1756 int drop = (*rx_almostSent) (p, &addr);
1757 /* drop packet if return value is non-zero? */
1759 deliveryType = 'D'; /* Drop the packet */
1763 /* Get network byte order header */
1764 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1765 * touch ALL the fields */
1768 /* Send the packet out on the same socket that related packets are being
1772 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1775 /* Possibly drop this packet, for testing purposes */
1776 if ((deliveryType == 'D')
1777 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1778 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1779 deliveryType = 'D'; /* Drop the packet */
1781 deliveryType = 'S'; /* Send the packet */
1782 #endif /* RXDEBUG */
1784 /* Loop until the packet is sent. We'd prefer just to use a
1785 * blocking socket, but unfortunately the interface doesn't
1786 * allow us to have the socket block in send mode, and not
1787 * block in receive mode */
1789 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1790 waslocked = ISAFS_GLOCK();
1791 if (!istack && waslocked)
1795 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1797 /* send failed, so let's hurry up the resend, eh? */
1798 MUTEX_ENTER(&rx_stats_mutex);
1799 rx_stats.netSendFailures++;
1800 MUTEX_EXIT(&rx_stats_mutex);
1801 for (i = 0; i < len; i++) {
1803 p->retryTime = p->timeSent; /* resend it very soon */
1804 clock_Addmsec(&(p->retryTime),
1805 10 + (((afs_uint32) p->backoff) << 8));
1807 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1808 /* Linux is nice -- it can tell us right away that we cannot
1809 * reach this recipient by returning an ENETUNREACH error
1810 * code. So, when this happens let's "down" the host NOW so
1811 * we don't sit around waiting for this host to timeout later.
1813 if (call && code == -ENETUNREACH)
1814 call->lastReceiveTime = 0;
1817 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1818 if (!istack && waslocked)
1827 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1828 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1],
1829 peer->host, peer->port, p->header.serial, p->header.epoch,
1830 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1831 (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1834 MUTEX_ENTER(&rx_stats_mutex);
1835 rx_stats.packetsSent[p->header.type - 1]++;
1836 MUTEX_EXIT(&rx_stats_mutex);
1837 MUTEX_ENTER(&peer->peer_lock);
1839 hadd32(peer->bytesSent, p->length);
1840 MUTEX_EXIT(&peer->peer_lock);
1844 /* Send a "special" packet to the peer connection. If call is
1845 * specified, then the packet is directed to a specific call channel
1846 * associated with the connection, otherwise it is directed to the
1847 * connection only. Uses optionalPacket if it is supplied, rather than
1848 * allocating a new packet buffer. Nbytes is the length of the data
1849 * portion of the packet. If data is non-null, nbytes of data are
1850 * copied into the packet. Type is the type of the packet, as defined
1851 * in rx.h. Bug: there's a lot of duplication between this and other
1852 * routines. This needs to be cleaned up. */
1854 rxi_SendSpecial(register struct rx_call *call,
1855 register struct rx_connection *conn,
1856 struct rx_packet *optionalPacket, int type, char *data,
1857 int nbytes, int istack)
1859 /* Some of the following stuff should be common code for all
1860 * packet sends (it's repeated elsewhere) */
1861 register struct rx_packet *p;
1863 int savelen = 0, saven = 0;
1864 int channel, callNumber;
1866 channel = call->channel;
1867 callNumber = *call->callNumber;
1868 /* BUSY packets refer to the next call on this connection */
1869 if (type == RX_PACKET_TYPE_BUSY) {
1878 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1880 osi_Panic("rxi_SendSpecial failure");
1887 p->header.serviceId = conn->serviceId;
1888 p->header.securityIndex = conn->securityIndex;
1889 p->header.cid = (conn->cid | channel);
1890 p->header.callNumber = callNumber;
1892 p->header.epoch = conn->epoch;
1893 p->header.type = type;
1894 p->header.flags = 0;
1895 if (conn->type == RX_CLIENT_CONNECTION)
1896 p->header.flags |= RX_CLIENT_INITIATED;
1898 rx_packetwrite(p, 0, nbytes, data);
1900 for (i = 1; i < p->niovecs; i++) {
1901 if (nbytes <= p->wirevec[i].iov_len) {
1902 savelen = p->wirevec[i].iov_len;
1904 p->wirevec[i].iov_len = nbytes;
1905 p->niovecs = i + 1; /* so condition fails because i == niovecs */
1907 nbytes -= p->wirevec[i].iov_len;
1911 rxi_Send(call, p, istack);
1913 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1914 if (saven) { /* means we truncated the packet above. We probably don't */
1915 /* really need to do this, but it seems safer this way, given that */
1916 /* sneaky optionalPacket... */
1917 p->wirevec[i - 1].iov_len = savelen;
1920 if (!optionalPacket)
1922 return optionalPacket;
1926 /* Encode the packet's header (from the struct header in the packet to
1927 * the net byte order representation in the wire representation of the
1928 * packet, which is what is actually sent out on the wire) */
1930 rxi_EncodePacketHeader(register struct rx_packet *p)
1932 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1934 memset((char *)buf, 0, RX_HEADER_SIZE);
1935 *buf++ = htonl(p->header.epoch);
1936 *buf++ = htonl(p->header.cid);
1937 *buf++ = htonl(p->header.callNumber);
1938 *buf++ = htonl(p->header.seq);
1939 *buf++ = htonl(p->header.serial);
1940 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1941 | (((afs_uint32) p->header.flags) << 16)
1942 | (p->header.userStatus << 8) | p->header.securityIndex);
1943 /* Note: top 16 bits of this next word were reserved */
1944 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1947 /* Decode the packet's header (from net byte order to a struct header) */
1949 rxi_DecodePacketHeader(register struct rx_packet *p)
1951 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1954 p->header.epoch = ntohl(*buf);
1956 p->header.cid = ntohl(*buf);
1958 p->header.callNumber = ntohl(*buf);
1960 p->header.seq = ntohl(*buf);
1962 p->header.serial = ntohl(*buf);
1968 /* C will truncate byte fields to bytes for me */
1969 p->header.type = temp >> 24;
1970 p->header.flags = temp >> 16;
1971 p->header.userStatus = temp >> 8;
1972 p->header.securityIndex = temp >> 0;
1977 p->header.serviceId = (temp & 0xffff);
1978 p->header.spare = temp >> 16;
1979 /* Note: top 16 bits of this last word are the security checksum */
1983 rxi_PrepareSendPacket(register struct rx_call *call,
1984 register struct rx_packet *p, register int last)
1986 register struct rx_connection *conn = call->conn;
1988 ssize_t len; /* len must be a signed type; it can go negative */
1990 p->flags &= ~RX_PKTFLAG_ACKED;
1991 p->header.cid = (conn->cid | call->channel);
1992 p->header.serviceId = conn->serviceId;
1993 p->header.securityIndex = conn->securityIndex;
1994 p->header.callNumber = *call->callNumber;
1995 p->header.seq = call->tnext++;
1996 p->header.epoch = conn->epoch;
1997 p->header.type = RX_PACKET_TYPE_DATA;
1998 p->header.flags = 0;
1999 p->header.spare = 0;
2000 if (conn->type == RX_CLIENT_CONNECTION)
2001 p->header.flags |= RX_CLIENT_INITIATED;
2004 p->header.flags |= RX_LAST_PACKET;
2006 clock_Zero(&p->retryTime); /* Never yet transmitted */
2007 clock_Zero(&p->firstSent); /* Never yet transmitted */
2008 p->header.serial = 0; /* Another way of saying never transmitted... */
2011 /* Now that we're sure this is the last data on the call, make sure
2012 * that the "length" and the sum of the iov_lens matches. */
2013 len = p->length + call->conn->securityHeaderSize;
2015 for (i = 1; i < p->niovecs && len > 0; i++) {
2016 len -= p->wirevec[i].iov_len;
2019 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2021 /* Free any extra elements in the wirevec */
2022 for (j = MAX(2, i); j < p->niovecs; j++) {
2023 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2026 p->wirevec[i - 1].iov_len += len;
2028 RXS_PreparePacket(conn->securityObject, call, p);
2031 /* Given an interface MTU size, calculate an adjusted MTU size that
2032 * will make efficient use of the RX buffers when the peer is sending
2033 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2035 rxi_AdjustIfMTU(int mtu)
2040 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2041 if (mtu <= adjMTU) {
2048 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2049 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2052 /* Given an interface MTU size, and the peer's advertised max receive
2053 * size, calculate an adjisted maxMTU size that makes efficient use
2054 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2056 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2058 int maxMTU = mtu * rxi_nSendFrags;
2059 maxMTU = MIN(maxMTU, peerMaxMTU);
2060 return rxi_AdjustIfMTU(maxMTU);
2063 /* Given a packet size, figure out how many datagram packet will fit.
2064 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2065 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2066 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2068 rxi_AdjustDgramPackets(int frags, int mtu)
2071 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2074 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2075 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2076 /* subtract the size of the first and last packets */
2077 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2081 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));