2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
74 #include "rx_xmit_nt.h"
77 #include <sys/socket.h>
78 #include <netinet/in.h>
84 #include <sys/sysmacros.h>
86 #include "rx_packet.h"
87 #include "rx_globals.h"
103 /* rxdb_fileID is used to identify the lock location, along with line#. */
104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
105 #endif /* RX_LOCKS_DB */
106 struct rx_packet *rx_mallocedP = 0;
108 extern char cml_version_number[];
109 extern int (*rx_almostSent) ();
111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
112 afs_int32 ahost, short aport,
115 /* some rules about packets:
116 * 1. When a packet is allocated, the final iov_buf contains room for
117 * a security trailer, but iov_len masks that fact. If the security
118 * package wants to add the trailer, it may do so, and then extend
119 * iov_len appropriately. For this reason, packet's niovecs and
120 * iov_len fields should be accurate before calling PreparePacket.
124 * all packet buffers (iov_base) are integral multiples of
126 * offset is an integral multiple of the word size.
129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
133 for (l = 0, i = 1; i < packet->niovecs; i++) {
134 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the word size.
147 * offset is an integral multiple of the word size.
150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
154 for (l = 0, i = 1; i < packet->niovecs; i++) {
155 if (l + packet->wirevec[i].iov_len > offset) {
156 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
157 (offset - l))) = data;
160 l += packet->wirevec[i].iov_len;
167 * all packet buffers (iov_base) are integral multiples of the
169 * offset is an integral multiple of the word size.
171 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
177 unsigned int i, j, l, r;
178 for (l = 0, i = 1; i < packet->niovecs; i++) {
179 if (l + packet->wirevec[i].iov_len > offset) {
182 l += packet->wirevec[i].iov_len;
185 /* i is the iovec which contains the first little bit of data in which we
186 * are interested. l is the total length of everything prior to this iovec.
187 * j is the number of bytes we can safely copy out of this iovec.
188 * offset only applies to the first iovec.
191 while ((resid > 0) && (i < packet->niovecs)) {
192 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
193 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
196 l += packet->wirevec[i].iov_len;
201 return (resid ? (r - resid) : r);
206 * all packet buffers (iov_base) are integral multiples of the
208 * offset is an integral multiple of the word size.
211 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
216 for (l = 0, i = 1; i < packet->niovecs; i++) {
217 if (l + packet->wirevec[i].iov_len > offset) {
220 l += packet->wirevec[i].iov_len;
223 /* i is the iovec which contains the first little bit of data in which we
224 * are interested. l is the total length of everything prior to this iovec.
225 * j is the number of bytes we can safely copy out of this iovec.
226 * offset only applies to the first iovec.
229 while ((resid > 0) && (i < RX_MAXWVECS)) {
230 if (i >= packet->niovecs)
231 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
234 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
235 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
239 l += packet->wirevec[i].iov_len;
244 return (resid ? (r - resid) : r);
247 static struct rx_packet *
254 MUTEX_ENTER(&rx_freePktQ_lock);
257 if (rxi_OverQuota(class)) {
259 rxi_NeedMorePackets = TRUE;
260 MUTEX_ENTER(&rx_stats_mutex);
262 case RX_PACKET_CLASS_RECEIVE:
263 rx_stats.receivePktAllocFailures++;
265 case RX_PACKET_CLASS_SEND:
266 rx_stats.sendPktAllocFailures++;
268 case RX_PACKET_CLASS_SPECIAL:
269 rx_stats.specialPktAllocFailures++;
271 case RX_PACKET_CLASS_RECV_CBUF:
272 rx_stats.receiveCbufPktAllocFailures++;
274 case RX_PACKET_CLASS_SEND_CBUF:
275 rx_stats.sendCbufPktAllocFailures++;
278 MUTEX_EXIT(&rx_stats_mutex);
282 if (queue_IsEmpty(&rx_freePacketQueue)) {
284 rxi_NeedMorePackets = TRUE;
288 if (queue_IsEmpty(&rx_freePacketQueue)) {
289 rxi_MorePacketsNoLock(rx_initSendWindow);
294 c = queue_First(&rx_freePacketQueue, rx_packet);
296 if (!(c->flags & RX_PKTFLAG_FREE))
297 osi_Panic("rxi_AllocPacket: packet not free\n");
298 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
304 MUTEX_EXIT(&rx_freePktQ_lock);
311 * Free a packet currently used as a continuation buffer
314 rxi_freeCBuf(struct rx_packet *c)
319 MUTEX_ENTER(&rx_freePktQ_lock);
321 rxi_FreePacketNoLock(c);
322 /* Wakeup anyone waiting for packets */
325 MUTEX_EXIT(&rx_freePktQ_lock);
329 /* this one is kind of awful.
330 * In rxkad, the packet has been all shortened, and everything, ready for
331 * sending. All of a sudden, we discover we need some of that space back.
332 * This isn't terribly general, because it knows that the packets are only
333 * rounded up to the EBS (userdata + security header).
336 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
340 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
341 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
342 p->wirevec[i].iov_len += nb;
346 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
347 p->wirevec[i].iov_len += nb;
355 /* get sufficient space to store nb bytes of data (or more), and hook
356 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
357 * returns the number of bytes >0 which it failed to come up with.
358 * Don't need to worry about locking on packet, since only
359 * one thread can manipulate one at a time. Locking on continution
360 * packets is handled by allocCBuf */
361 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
363 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
367 for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
368 register struct rx_packet *cb;
369 if ((cb = allocCBuf(class))) {
370 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
371 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
372 nb -= RX_CBUFFERSIZE;
373 p->length += RX_CBUFFERSIZE;
382 /* Add more packet buffers */
384 rxi_MorePackets(int apackets)
386 struct rx_packet *p, *e;
390 getme = apackets * sizeof(struct rx_packet);
391 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
393 PIN(p, getme); /* XXXXX */
394 memset((char *)p, 0, getme);
396 MUTEX_ENTER(&rx_freePktQ_lock);
398 for (e = p + apackets; p < e; p++) {
399 p->wirevec[0].iov_base = (char *)(p->wirehead);
400 p->wirevec[0].iov_len = RX_HEADER_SIZE;
401 p->wirevec[1].iov_base = (char *)(p->localdata);
402 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
403 p->flags |= RX_PKTFLAG_FREE;
406 queue_Append(&rx_freePacketQueue, p);
408 rx_nFreePackets += apackets;
409 rxi_NeedMorePackets = FALSE;
412 MUTEX_EXIT(&rx_freePktQ_lock);
417 /* Add more packet buffers */
419 rxi_MorePacketsNoLock(int apackets)
421 struct rx_packet *p, *e;
424 /* allocate enough packets that 1/4 of the packets will be able
425 * to hold maximal amounts of data */
426 apackets += (apackets / 4)
427 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
428 getme = apackets * sizeof(struct rx_packet);
429 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
431 memset((char *)p, 0, getme);
433 for (e = p + apackets; p < e; p++) {
434 p->wirevec[0].iov_base = (char *)(p->wirehead);
435 p->wirevec[0].iov_len = RX_HEADER_SIZE;
436 p->wirevec[1].iov_base = (char *)(p->localdata);
437 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
438 p->flags |= RX_PKTFLAG_FREE;
441 queue_Append(&rx_freePacketQueue, p);
443 rx_nFreePackets += apackets;
444 rxi_NeedMorePackets = FALSE;
450 rxi_FreeAllPackets(void)
452 /* must be called at proper interrupt level, etcetera */
453 /* MTUXXX need to free all Packets */
454 osi_Free(rx_mallocedP,
455 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
456 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
459 /* Allocate more packets iff we need more continuation buffers */
460 /* In kernel, can't page in memory with interrupts disabled, so we
461 * don't use the event mechanism. */
463 rx_CheckPackets(void)
465 if (rxi_NeedMorePackets) {
466 rxi_MorePackets(rx_initSendWindow);
470 /* In the packet freeing routine below, the assumption is that
471 we want all of the packets to be used equally frequently, so that we
472 don't get packet buffers paging out. It would be just as valid to
473 assume that we DO want them to page out if not many are being used.
474 In any event, we assume the former, and append the packets to the end
476 /* This explanation is bogus. The free list doesn't remain in any kind of
477 useful order for afs_int32: the packets in use get pretty much randomly scattered
478 across all the pages. In order to permit unused {packets,bufs} to page out, they
479 must be stored so that packets which are adjacent in memory are adjacent in the
480 free list. An array springs rapidly to mind.
483 /* Actually free the packet p. */
485 rxi_FreePacketNoLock(struct rx_packet *p)
487 dpf(("Free %lx\n", (unsigned long)p));
489 if (p->flags & RX_PKTFLAG_FREE)
490 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
492 p->flags |= RX_PKTFLAG_FREE;
493 queue_Append(&rx_freePacketQueue, p);
497 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
499 struct iovec *iov, *end;
501 if (first != 1) /* MTUXXX */
502 osi_Panic("FreeDataBufs 1: first must be 1");
503 iov = &p->wirevec[1];
504 end = iov + (p->niovecs - 1);
505 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
506 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
507 for (iov++; iov < end; iov++) {
509 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
510 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
518 int rxi_nBadIovecs = 0;
520 /* rxi_RestoreDataBufs
522 * Restore the correct sizes to the iovecs. Called when reusing a packet
523 * for reading off the wire.
526 rxi_RestoreDataBufs(struct rx_packet *p)
529 struct iovec *iov = &p->wirevec[2];
531 p->wirevec[0].iov_base = (char *)(p->wirehead);
532 p->wirevec[0].iov_len = RX_HEADER_SIZE;
533 p->wirevec[1].iov_base = (char *)(p->localdata);
534 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
536 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
537 if (!iov->iov_base) {
542 iov->iov_len = RX_CBUFFERSIZE;
547 rxi_TrimDataBufs(struct rx_packet *p, int first)
550 struct iovec *iov, *end;
554 osi_Panic("TrimDataBufs 1: first must be 1");
556 /* Skip over continuation buffers containing message data */
557 iov = &p->wirevec[2];
558 end = iov + (p->niovecs - 2);
559 length = p->length - p->wirevec[1].iov_len;
560 for (; iov < end && length > 0; iov++) {
562 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
563 length -= iov->iov_len;
566 /* iov now points to the first empty data buffer. */
571 MUTEX_ENTER(&rx_freePktQ_lock);
573 for (; iov < end; iov++) {
575 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
576 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
581 MUTEX_EXIT(&rx_freePktQ_lock);
587 /* Free the packet p. P is assumed not to be on any queue, i.e.
588 * remove it yourself first if you call this routine. */
590 rxi_FreePacket(struct rx_packet *p)
595 MUTEX_ENTER(&rx_freePktQ_lock);
597 rxi_FreeDataBufsNoLock(p, 1);
598 rxi_FreePacketNoLock(p);
599 /* Wakeup anyone waiting for packets */
602 MUTEX_EXIT(&rx_freePktQ_lock);
607 /* rxi_AllocPacket sets up p->length so it reflects the number of
608 * bytes in the packet at this point, **not including** the header.
609 * The header is absolutely necessary, besides, this is the way the
610 * length field is usually used */
612 rxi_AllocPacketNoLock(int class)
614 register struct rx_packet *p;
617 if (rxi_OverQuota(class)) {
618 rxi_NeedMorePackets = TRUE;
619 MUTEX_ENTER(&rx_stats_mutex);
621 case RX_PACKET_CLASS_RECEIVE:
622 rx_stats.receivePktAllocFailures++;
624 case RX_PACKET_CLASS_SEND:
625 rx_stats.sendPktAllocFailures++;
627 case RX_PACKET_CLASS_SPECIAL:
628 rx_stats.specialPktAllocFailures++;
630 case RX_PACKET_CLASS_RECV_CBUF:
631 rx_stats.receiveCbufPktAllocFailures++;
633 case RX_PACKET_CLASS_SEND_CBUF:
634 rx_stats.sendCbufPktAllocFailures++;
637 MUTEX_EXIT(&rx_stats_mutex);
638 return (struct rx_packet *)0;
642 MUTEX_ENTER(&rx_stats_mutex);
643 rx_stats.packetRequests++;
644 MUTEX_EXIT(&rx_stats_mutex);
647 if (queue_IsEmpty(&rx_freePacketQueue))
648 osi_Panic("rxi_AllocPacket error");
650 if (queue_IsEmpty(&rx_freePacketQueue))
651 rxi_MorePacketsNoLock(rx_initSendWindow);
655 p = queue_First(&rx_freePacketQueue, rx_packet);
656 if (!(p->flags & RX_PKTFLAG_FREE))
657 osi_Panic("rxi_AllocPacket: packet not free\n");
659 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
662 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
665 /* have to do this here because rx_FlushWrite fiddles with the iovs in
666 * order to truncate outbound packets. In the near future, may need
667 * to allocate bufs from a static pool here, and/or in AllocSendPacket
669 p->wirevec[0].iov_base = (char *)(p->wirehead);
670 p->wirevec[0].iov_len = RX_HEADER_SIZE;
671 p->wirevec[1].iov_base = (char *)(p->localdata);
672 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
674 p->length = RX_FIRSTBUFFERSIZE;
679 rxi_AllocPacket(int class)
681 register struct rx_packet *p;
683 MUTEX_ENTER(&rx_freePktQ_lock);
684 p = rxi_AllocPacketNoLock(class);
685 MUTEX_EXIT(&rx_freePktQ_lock);
689 /* This guy comes up with as many buffers as it {takes,can get} given
690 * the MTU for this call. It also sets the packet length before
691 * returning. caution: this is often called at NETPRI
692 * Called with call locked.
695 rxi_AllocSendPacket(register struct rx_call *call, int want)
697 register struct rx_packet *p = (struct rx_packet *)0;
699 register unsigned delta;
702 mud = call->MTU - RX_HEADER_SIZE;
704 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
705 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
707 while (!(call->error)) {
708 MUTEX_ENTER(&rx_freePktQ_lock);
709 /* if an error occurred, or we get the packet we want, we're done */
710 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
711 MUTEX_EXIT(&rx_freePktQ_lock);
714 want = MIN(want, mud);
716 if ((unsigned)want > p->length)
717 (void)rxi_AllocDataBuf(p, (want - p->length),
718 RX_PACKET_CLASS_SEND_CBUF);
720 if ((unsigned)p->length > mud)
723 if (delta >= p->length) {
732 /* no error occurred, and we didn't get a packet, so we sleep.
733 * At this point, we assume that packets will be returned
734 * sooner or later, as packets are acknowledged, and so we
737 call->flags |= RX_CALL_WAIT_PACKETS;
738 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
739 MUTEX_EXIT(&call->lock);
740 rx_waitingForPackets = 1;
742 #ifdef RX_ENABLE_LOCKS
743 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
745 osi_rxSleep(&rx_waitingForPackets);
747 MUTEX_EXIT(&rx_freePktQ_lock);
748 MUTEX_ENTER(&call->lock);
749 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
750 call->flags &= ~RX_CALL_WAIT_PACKETS;
759 /* count the number of used FDs */
761 CountFDs(register int amax)
764 register int i, code;
768 for (i = 0; i < amax; i++) {
769 code = fstat(i, &tstat);
778 #define CountFDs(amax) amax
782 #if !defined(KERNEL) || defined(UKERNEL)
784 /* This function reads a single packet from the interface into the
785 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
786 * (host,port) of the sender are stored in the supplied variables, and
787 * the data length of the packet is stored in the packet structure.
788 * The header is decoded. */
790 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
793 struct sockaddr_in from;
796 register afs_int32 tlen, savelen;
798 rx_computelen(p, tlen);
799 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
801 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
802 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
803 * it once in order to avoid races. */
806 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
814 /* Extend the last iovec for padding, it's just to make sure that the
815 * read doesn't return more data than we expect, and is done to get around
816 * our problems caused by the lack of a length field in the rx header.
817 * Use the extra buffer that follows the localdata in each packet
819 savelen = p->wirevec[p->niovecs - 1].iov_len;
820 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
822 memset((char *)&msg, 0, sizeof(msg));
823 msg.msg_name = (char *)&from;
824 msg.msg_namelen = sizeof(struct sockaddr_in);
825 msg.msg_iov = p->wirevec;
826 msg.msg_iovlen = p->niovecs;
827 nbytes = rxi_Recvmsg(socket, &msg, 0);
829 /* restore the vec to its correct state */
830 p->wirevec[p->niovecs - 1].iov_len = savelen;
832 p->length = (nbytes - RX_HEADER_SIZE);
833 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
835 rxi_MorePackets(rx_initSendWindow);
836 else if (nbytes < 0 && errno == EWOULDBLOCK) {
837 MUTEX_ENTER(&rx_stats_mutex);
838 rx_stats.noPacketOnRead++;
839 MUTEX_EXIT(&rx_stats_mutex);
841 MUTEX_ENTER(&rx_stats_mutex);
842 rx_stats.bogusPacketOnRead++;
843 rx_stats.bogusHost = from.sin_addr.s_addr;
844 MUTEX_EXIT(&rx_stats_mutex);
845 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
846 from.sin_port, nbytes));
850 /* Extract packet header. */
851 rxi_DecodePacketHeader(p);
853 *host = from.sin_addr.s_addr;
854 *port = from.sin_port;
855 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
856 struct rx_peer *peer;
857 MUTEX_ENTER(&rx_stats_mutex);
858 rx_stats.packetsRead[p->header.type - 1]++;
859 MUTEX_EXIT(&rx_stats_mutex);
861 * Try to look up this peer structure. If it doesn't exist,
862 * don't create a new one -
863 * we don't keep count of the bytes sent/received if a peer
864 * structure doesn't already exist.
866 * The peer/connection cleanup code assumes that there is 1 peer
867 * per connection. If we actually created a peer structure here
868 * and this packet was an rxdebug packet, the peer structure would
869 * never be cleaned up.
871 peer = rxi_FindPeer(*host, *port, 0, 0);
872 /* Since this may not be associated with a connection,
873 * it may have no refCount, meaning we could race with
876 if (peer && (peer->refCount > 0)) {
877 MUTEX_ENTER(&peer->peer_lock);
878 hadd32(peer->bytesReceived, p->length);
879 MUTEX_EXIT(&peer->peer_lock);
883 /* Free any empty packet buffers at the end of this packet */
884 rxi_TrimDataBufs(p, 1);
890 #endif /* !KERNEL || UKERNEL */
892 /* This function splits off the first packet in a jumbo packet.
893 * As of AFS 3.5, jumbograms contain more than one fixed size
894 * packet, and the RX_JUMBO_PACKET flag is set in all but the
895 * last packet header. All packets (except the last) are padded to
896 * fall on RX_CBUFFERSIZE boundaries.
897 * HACK: We store the length of the first n-1 packets in the
898 * last two pad bytes. */
901 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
904 struct rx_packet *np;
905 struct rx_jumboHeader *jp;
911 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
912 * bytes in length. All but the first packet are preceded by
913 * an abbreviated four byte header. The length of the last packet
914 * is calculated from the size of the jumbogram. */
915 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
917 if ((int)p->length < length) {
918 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
921 niov = p->niovecs - 2;
923 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
926 iov = &p->wirevec[2];
927 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
929 /* Get a pointer to the abbreviated packet header */
930 jp = (struct rx_jumboHeader *)
931 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
933 /* Set up the iovecs for the next packet */
934 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
935 np->wirevec[0].iov_len = sizeof(struct rx_header);
936 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
937 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
938 np->niovecs = niov + 1;
939 for (i = 2, iov++; i <= niov; i++, iov++) {
940 np->wirevec[i] = *iov;
942 np->length = p->length - length;
943 p->length = RX_JUMBOBUFFERSIZE;
946 /* Convert the jumbo packet header to host byte order */
947 temp = ntohl(*(afs_uint32 *) jp);
948 jp->flags = (u_char) (temp >> 24);
949 jp->cksum = (u_short) (temp);
951 /* Fill in the packet header */
952 np->header = p->header;
953 np->header.serial = p->header.serial + 1;
954 np->header.seq = p->header.seq + 1;
955 np->header.flags = jp->flags;
956 np->header.spare = jp->cksum;
962 /* Send a udp datagram */
964 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
965 int length, int istack)
969 memset(&msg, 0, sizeof(msg));
971 msg.msg_iovlen = nvecs;
973 msg.msg_namelen = sizeof(struct sockaddr_in);
975 rxi_Sendmsg(socket, &msg, 0);
979 #elif !defined(UKERNEL)
981 * message receipt is done in rxk_input or rx_put.
984 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
986 * Copy an mblock to the contiguous area pointed to by cp.
987 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
988 * but it doesn't really.
989 * Returns the number of bytes not transferred.
990 * The message is NOT changed.
993 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
997 for (; mp && len > 0; mp = mp->b_cont) {
998 if (mp->b_datap->db_type != M_DATA) {
1001 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1002 memcpy(cp, (char *)mp->b_rptr, n);
1010 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1011 * but it doesn't really.
1012 * This sucks, anyway, do it like m_cpy.... below
1015 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1018 register int m, n, o, t, i;
1020 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1021 if (mp->b_datap->db_type != M_DATA) {
1024 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1030 t = iovs[i].iov_len;
1033 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1043 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1044 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1046 #if !defined(AFS_LINUX20_ENV)
1048 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1051 unsigned int l1, l2, i, t;
1053 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1054 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1057 if (m->m_len <= off) {
1067 p1 = mtod(m, caddr_t) + off;
1068 l1 = m->m_len - off;
1070 p2 = iovs[0].iov_base;
1071 l2 = iovs[0].iov_len;
1074 t = MIN(l1, MIN(l2, (unsigned int)len));
1085 p1 = mtod(m, caddr_t);
1091 p2 = iovs[i].iov_base;
1092 l2 = iovs[i].iov_len;
1100 #endif /* AFS_SUN5_ENV */
1102 #if !defined(AFS_LINUX20_ENV)
1104 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1105 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1111 struct rx_packet *phandle;
1112 int hdr_len, data_len;
1117 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1124 #endif /*KERNEL && !UKERNEL */
1127 /* send a response to a debug packet */
1130 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1131 afs_int32 ahost, short aport, int istack)
1133 struct rx_debugIn tin;
1135 struct rx_serverQueueEntry *np, *nqe;
1138 * Only respond to client-initiated Rx debug packets,
1139 * and clear the client flag in the response.
1141 if (ap->header.flags & RX_CLIENT_INITIATED) {
1142 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1143 rxi_EncodePacketHeader(ap);
1148 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1149 /* all done with packet, now set length to the truth, so we can
1150 * reuse this packet */
1151 rx_computelen(ap, ap->length);
1153 tin.type = ntohl(tin.type);
1154 tin.index = ntohl(tin.index);
1156 case RX_DEBUGI_GETSTATS:{
1157 struct rx_debugStats tstat;
1159 /* get basic stats */
1160 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1161 tstat.version = RX_DEBUGI_VERSION;
1162 #ifndef RX_ENABLE_LOCKS
1163 tstat.waitingForPackets = rx_waitingForPackets;
1165 MUTEX_ENTER(&rx_serverPool_lock);
1166 tstat.nFreePackets = htonl(rx_nFreePackets);
1167 tstat.callsExecuted = htonl(rxi_nCalls);
1168 tstat.packetReclaims = htonl(rx_packetReclaims);
1169 tstat.usedFDs = CountFDs(64);
1170 tstat.nWaiting = htonl(rx_nWaiting);
1171 tstat.nWaited = htonl(rx_nWaited);
1172 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1174 MUTEX_EXIT(&rx_serverPool_lock);
1175 tstat.idleThreads = htonl(tstat.idleThreads);
1176 tl = sizeof(struct rx_debugStats) - ap->length;
1178 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1181 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1183 ap->length = sizeof(struct rx_debugStats);
1184 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1185 rx_computelen(ap, ap->length);
1190 case RX_DEBUGI_GETALLCONN:
1191 case RX_DEBUGI_GETCONN:{
1193 register struct rx_connection *tc;
1194 struct rx_call *tcall;
1195 struct rx_debugConn tconn;
1196 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1199 tl = sizeof(struct rx_debugConn) - ap->length;
1201 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1205 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1206 /* get N'th (maybe) "interesting" connection info */
1207 for (i = 0; i < rx_hashTableSize; i++) {
1208 #if !defined(KERNEL)
1209 /* the time complexity of the algorithm used here
1210 * exponentially increses with the number of connections.
1212 #ifdef AFS_PTHREAD_ENV
1218 MUTEX_ENTER(&rx_connHashTable_lock);
1219 /* We might be slightly out of step since we are not
1220 * locking each call, but this is only debugging output.
1222 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1223 if ((all || rxi_IsConnInteresting(tc))
1224 && tin.index-- <= 0) {
1225 tconn.host = tc->peer->host;
1226 tconn.port = tc->peer->port;
1227 tconn.cid = htonl(tc->cid);
1228 tconn.epoch = htonl(tc->epoch);
1229 tconn.serial = htonl(tc->serial);
1230 for (j = 0; j < RX_MAXCALLS; j++) {
1231 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1232 if ((tcall = tc->call[j])) {
1233 tconn.callState[j] = tcall->state;
1234 tconn.callMode[j] = tcall->mode;
1235 tconn.callFlags[j] = tcall->flags;
1236 if (queue_IsNotEmpty(&tcall->rq))
1237 tconn.callOther[j] |= RX_OTHER_IN;
1238 if (queue_IsNotEmpty(&tcall->tq))
1239 tconn.callOther[j] |= RX_OTHER_OUT;
1241 tconn.callState[j] = RX_STATE_NOTINIT;
1244 tconn.natMTU = htonl(tc->peer->natMTU);
1245 tconn.error = htonl(tc->error);
1246 tconn.flags = tc->flags;
1247 tconn.type = tc->type;
1248 tconn.securityIndex = tc->securityIndex;
1249 if (tc->securityObject) {
1250 RXS_GetStats(tc->securityObject, tc,
1252 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1253 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1256 DOHTONL(packetsReceived);
1257 DOHTONL(packetsSent);
1258 DOHTONL(bytesReceived);
1262 sizeof(tconn.secStats.spares) /
1267 sizeof(tconn.secStats.sparel) /
1268 sizeof(afs_int32); i++)
1272 MUTEX_EXIT(&rx_connHashTable_lock);
1273 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1276 ap->length = sizeof(struct rx_debugConn);
1277 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1283 MUTEX_EXIT(&rx_connHashTable_lock);
1285 /* if we make it here, there are no interesting packets */
1286 tconn.cid = htonl(0xffffffff); /* means end */
1287 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1290 ap->length = sizeof(struct rx_debugConn);
1291 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1297 * Pass back all the peer structures we have available
1300 case RX_DEBUGI_GETPEER:{
1302 register struct rx_peer *tp;
1303 struct rx_debugPeer tpeer;
1306 tl = sizeof(struct rx_debugPeer) - ap->length;
1308 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1312 memset((char *)&tpeer, 0, sizeof(tpeer));
1313 for (i = 0; i < rx_hashTableSize; i++) {
1314 #if !defined(KERNEL)
1315 /* the time complexity of the algorithm used here
1316 * exponentially increses with the number of peers.
1318 * Yielding after processing each hash table entry
1319 * and dropping rx_peerHashTable_lock.
1320 * also increases the risk that we will miss a new
1321 * entry - but we are willing to live with this
1322 * limitation since this is meant for debugging only
1324 #ifdef AFS_PTHREAD_ENV
1330 MUTEX_ENTER(&rx_peerHashTable_lock);
1331 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1332 if (tin.index-- <= 0) {
1333 tpeer.host = tp->host;
1334 tpeer.port = tp->port;
1335 tpeer.ifMTU = htons(tp->ifMTU);
1336 tpeer.idleWhen = htonl(tp->idleWhen);
1337 tpeer.refCount = htons(tp->refCount);
1338 tpeer.burstSize = tp->burstSize;
1339 tpeer.burst = tp->burst;
1340 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1341 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1342 tpeer.rtt = htonl(tp->rtt);
1343 tpeer.rtt_dev = htonl(tp->rtt_dev);
1344 tpeer.timeout.sec = htonl(tp->timeout.sec);
1345 tpeer.timeout.usec = htonl(tp->timeout.usec);
1346 tpeer.nSent = htonl(tp->nSent);
1347 tpeer.reSends = htonl(tp->reSends);
1348 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1349 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1350 tpeer.rateFlag = htonl(tp->rateFlag);
1351 tpeer.natMTU = htons(tp->natMTU);
1352 tpeer.maxMTU = htons(tp->maxMTU);
1353 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1354 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1355 tpeer.MTU = htons(tp->MTU);
1356 tpeer.cwind = htons(tp->cwind);
1357 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1358 tpeer.congestSeq = htons(tp->congestSeq);
1359 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1360 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1361 tpeer.bytesReceived.high =
1362 htonl(tp->bytesReceived.high);
1363 tpeer.bytesReceived.low =
1364 htonl(tp->bytesReceived.low);
1366 MUTEX_EXIT(&rx_peerHashTable_lock);
1367 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1370 ap->length = sizeof(struct rx_debugPeer);
1371 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1377 MUTEX_EXIT(&rx_peerHashTable_lock);
1379 /* if we make it here, there are no interesting packets */
1380 tpeer.host = htonl(0xffffffff); /* means end */
1381 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1384 ap->length = sizeof(struct rx_debugPeer);
1385 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1390 case RX_DEBUGI_RXSTATS:{
1394 tl = sizeof(rx_stats) - ap->length;
1396 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1400 /* Since its all int32s convert to network order with a loop. */
1401 MUTEX_ENTER(&rx_stats_mutex);
1402 s = (afs_int32 *) & rx_stats;
1403 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1404 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1407 ap->length = sizeof(rx_stats);
1408 MUTEX_EXIT(&rx_stats_mutex);
1409 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1415 /* error response packet */
1416 tin.type = htonl(RX_DEBUGI_BADTYPE);
1417 tin.index = tin.type;
1418 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1420 ap->length = sizeof(struct rx_debugIn);
1421 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1429 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1430 afs_int32 ahost, short aport, int istack)
1435 * Only respond to client-initiated version requests, and
1436 * clear that flag in the response.
1438 if (ap->header.flags & RX_CLIENT_INITIATED) {
1441 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1442 rxi_EncodePacketHeader(ap);
1443 memset(buf, 0, sizeof(buf));
1444 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1445 rx_packetwrite(ap, 0, 65, buf);
1448 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1456 /* send a debug packet back to the sender */
1458 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1459 afs_int32 ahost, short aport, afs_int32 istack)
1461 struct sockaddr_in taddr;
1467 int waslocked = ISAFS_GLOCK();
1470 taddr.sin_family = AF_INET;
1471 taddr.sin_port = aport;
1472 taddr.sin_addr.s_addr = ahost;
1473 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1474 taddr.sin_len = sizeof(struct sockaddr_in);
1477 /* We need to trim the niovecs. */
1478 nbytes = apacket->length;
1479 for (i = 1; i < apacket->niovecs; i++) {
1480 if (nbytes <= apacket->wirevec[i].iov_len) {
1481 savelen = apacket->wirevec[i].iov_len;
1482 saven = apacket->niovecs;
1483 apacket->wirevec[i].iov_len = nbytes;
1484 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
1486 nbytes -= apacket->wirevec[i].iov_len;
1489 #ifdef RX_KERNEL_TRACE
1490 if (ICL_SETACTIVE(afs_iclSetp)) {
1493 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1494 "before osi_NetSend()");
1502 /* debug packets are not reliably delivered, hence the cast below. */
1503 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1504 apacket->length + RX_HEADER_SIZE, istack);
1506 #ifdef RX_KERNEL_TRACE
1507 if (ICL_SETACTIVE(afs_iclSetp)) {
1509 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1510 "after osi_NetSend()");
1519 if (saven) { /* means we truncated the packet above. */
1520 apacket->wirevec[i - 1].iov_len = savelen;
1521 apacket->niovecs = saven;
1526 /* Send the packet to appropriate destination for the specified
1527 * call. The header is first encoded and placed in the packet.
1530 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1531 struct rx_packet *p, int istack)
1537 struct sockaddr_in addr;
1538 register struct rx_peer *peer = conn->peer;
1541 char deliveryType = 'S';
1543 /* The address we're sending the packet to */
1544 memset(&addr, 0, sizeof(addr));
1545 addr.sin_family = AF_INET;
1546 addr.sin_port = peer->port;
1547 addr.sin_addr.s_addr = peer->host;
1549 /* This stuff should be revamped, I think, so that most, if not
1550 * all, of the header stuff is always added here. We could
1551 * probably do away with the encode/decode routines. XXXXX */
1553 /* Stamp each packet with a unique serial number. The serial
1554 * number is maintained on a connection basis because some types
1555 * of security may be based on the serial number of the packet,
1556 * and security is handled on a per authenticated-connection
1558 /* Pre-increment, to guarantee no zero serial number; a zero
1559 * serial number means the packet was never sent. */
1560 MUTEX_ENTER(&conn->conn_data_lock);
1561 p->header.serial = ++conn->serial;
1562 MUTEX_EXIT(&conn->conn_data_lock);
1563 /* This is so we can adjust retransmit time-outs better in the face of
1564 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1566 if (p->firstSerial == 0) {
1567 p->firstSerial = p->header.serial;
1570 /* If an output tracer function is defined, call it with the packet and
1571 * network address. Note this function may modify its arguments. */
1572 if (rx_almostSent) {
1573 int drop = (*rx_almostSent) (p, &addr);
1574 /* drop packet if return value is non-zero? */
1576 deliveryType = 'D'; /* Drop the packet */
1580 /* Get network byte order header */
1581 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1582 * touch ALL the fields */
1584 /* Send the packet out on the same socket that related packets are being
1588 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1591 /* Possibly drop this packet, for testing purposes */
1592 if ((deliveryType == 'D')
1593 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1594 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1595 deliveryType = 'D'; /* Drop the packet */
1597 deliveryType = 'S'; /* Send the packet */
1598 #endif /* RXDEBUG */
1600 /* Loop until the packet is sent. We'd prefer just to use a
1601 * blocking socket, but unfortunately the interface doesn't
1602 * allow us to have the socket block in send mode, and not
1603 * block in receive mode */
1605 waslocked = ISAFS_GLOCK();
1606 #ifdef RX_KERNEL_TRACE
1607 if (ICL_SETACTIVE(afs_iclSetp)) {
1610 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1611 "before osi_NetSend()");
1620 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1621 p->length + RX_HEADER_SIZE, istack)) != 0) {
1622 /* send failed, so let's hurry up the resend, eh? */
1623 MUTEX_ENTER(&rx_stats_mutex);
1624 rx_stats.netSendFailures++;
1625 MUTEX_EXIT(&rx_stats_mutex);
1626 p->retryTime = p->timeSent; /* resend it very soon */
1627 clock_Addmsec(&(p->retryTime),
1628 10 + (((afs_uint32) p->backoff) << 8));
1630 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1631 /* Linux is nice -- it can tell us right away that we cannot
1632 * reach this recipient by returning an ENETUNREACH error
1633 * code. So, when this happens let's "down" the host NOW so
1634 * we don't sit around waiting for this host to timeout later.
1636 if (call && code == -ENETUNREACH)
1637 call->lastReceiveTime = 0;
1641 #ifdef RX_KERNEL_TRACE
1642 if (ICL_SETACTIVE(afs_iclSetp)) {
1644 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1645 "after osi_NetSend()");
1656 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1658 MUTEX_ENTER(&rx_stats_mutex);
1659 rx_stats.packetsSent[p->header.type - 1]++;
1660 MUTEX_EXIT(&rx_stats_mutex);
1661 MUTEX_ENTER(&peer->peer_lock);
1662 hadd32(peer->bytesSent, p->length);
1663 MUTEX_EXIT(&peer->peer_lock);
1666 /* Send a list of packets to appropriate destination for the specified
1667 * connection. The headers are first encoded and placed in the packets.
1670 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1671 struct rx_packet **list, int len, int istack)
1673 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1676 struct sockaddr_in addr;
1677 register struct rx_peer *peer = conn->peer;
1679 struct rx_packet *p = NULL;
1680 struct iovec wirevec[RX_MAXIOVECS];
1681 int i, length, code;
1684 struct rx_jumboHeader *jp;
1686 char deliveryType = 'S';
1688 /* The address we're sending the packet to */
1689 addr.sin_family = AF_INET;
1690 addr.sin_port = peer->port;
1691 addr.sin_addr.s_addr = peer->host;
1693 if (len + 1 > RX_MAXIOVECS) {
1694 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1698 * Stamp the packets in this jumbogram with consecutive serial numbers
1700 MUTEX_ENTER(&conn->conn_data_lock);
1701 serial = conn->serial;
1702 conn->serial += len;
1703 MUTEX_EXIT(&conn->conn_data_lock);
1706 /* This stuff should be revamped, I think, so that most, if not
1707 * all, of the header stuff is always added here. We could
1708 * probably do away with the encode/decode routines. XXXXX */
1711 length = RX_HEADER_SIZE;
1712 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1713 wirevec[0].iov_len = RX_HEADER_SIZE;
1714 for (i = 0; i < len; i++) {
1717 /* The whole 3.5 jumbogram scheme relies on packets fitting
1718 * in a single packet buffer. */
1719 if (p->niovecs > 2) {
1720 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1723 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1726 if (p->length != RX_JUMBOBUFFERSIZE) {
1727 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1729 p->header.flags |= RX_JUMBO_PACKET;
1730 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1731 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1733 wirevec[i + 1].iov_len = p->length;
1734 length += p->length;
1736 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1738 /* Convert jumbo packet header to network byte order */
1739 temp = (afs_uint32) (p->header.flags) << 24;
1740 temp |= (afs_uint32) (p->header.spare);
1741 *(afs_uint32 *) jp = htonl(temp);
1743 jp = (struct rx_jumboHeader *)
1744 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1746 /* Stamp each packet with a unique serial number. The serial
1747 * number is maintained on a connection basis because some types
1748 * of security may be based on the serial number of the packet,
1749 * and security is handled on a per authenticated-connection
1751 /* Pre-increment, to guarantee no zero serial number; a zero
1752 * serial number means the packet was never sent. */
1753 p->header.serial = ++serial;
1754 /* This is so we can adjust retransmit time-outs better in the face of
1755 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1757 if (p->firstSerial == 0) {
1758 p->firstSerial = p->header.serial;
1761 /* If an output tracer function is defined, call it with the packet and
1762 * network address. Note this function may modify its arguments. */
1763 if (rx_almostSent) {
1764 int drop = (*rx_almostSent) (p, &addr);
1765 /* drop packet if return value is non-zero? */
1767 deliveryType = 'D'; /* Drop the packet */
1771 /* Get network byte order header */
1772 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1773 * touch ALL the fields */
1776 /* Send the packet out on the same socket that related packets are being
1780 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1783 /* Possibly drop this packet, for testing purposes */
1784 if ((deliveryType == 'D')
1785 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1786 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1787 deliveryType = 'D'; /* Drop the packet */
1789 deliveryType = 'S'; /* Send the packet */
1790 #endif /* RXDEBUG */
1792 /* Loop until the packet is sent. We'd prefer just to use a
1793 * blocking socket, but unfortunately the interface doesn't
1794 * allow us to have the socket block in send mode, and not
1795 * block in receive mode */
1796 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1797 waslocked = ISAFS_GLOCK();
1798 if (!istack && waslocked)
1802 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1804 /* send failed, so let's hurry up the resend, eh? */
1805 MUTEX_ENTER(&rx_stats_mutex);
1806 rx_stats.netSendFailures++;
1807 MUTEX_EXIT(&rx_stats_mutex);
1808 for (i = 0; i < len; i++) {
1810 p->retryTime = p->timeSent; /* resend it very soon */
1811 clock_Addmsec(&(p->retryTime),
1812 10 + (((afs_uint32) p->backoff) << 8));
1814 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1815 /* Linux is nice -- it can tell us right away that we cannot
1816 * reach this recipient by returning an ENETUNREACH error
1817 * code. So, when this happens let's "down" the host NOW so
1818 * we don't sit around waiting for this host to timeout later.
1820 if (call && code == -ENETUNREACH)
1821 call->lastReceiveTime = 0;
1824 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1825 if (!istack && waslocked)
1833 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1836 MUTEX_ENTER(&rx_stats_mutex);
1837 rx_stats.packetsSent[p->header.type - 1]++;
1838 MUTEX_EXIT(&rx_stats_mutex);
1839 MUTEX_ENTER(&peer->peer_lock);
1841 hadd32(peer->bytesSent, p->length);
1842 MUTEX_EXIT(&peer->peer_lock);
1846 /* Send a "special" packet to the peer connection. If call is
1847 * specified, then the packet is directed to a specific call channel
1848 * associated with the connection, otherwise it is directed to the
1849 * connection only. Uses optionalPacket if it is supplied, rather than
1850 * allocating a new packet buffer. Nbytes is the length of the data
1851 * portion of the packet. If data is non-null, nbytes of data are
1852 * copied into the packet. Type is the type of the packet, as defined
1853 * in rx.h. Bug: there's a lot of duplication between this and other
1854 * routines. This needs to be cleaned up. */
1856 rxi_SendSpecial(register struct rx_call *call,
1857 register struct rx_connection *conn,
1858 struct rx_packet *optionalPacket, int type, char *data,
1859 int nbytes, int istack)
1861 /* Some of the following stuff should be common code for all
1862 * packet sends (it's repeated elsewhere) */
1863 register struct rx_packet *p;
1865 int savelen = 0, saven = 0;
1866 int channel, callNumber;
1868 channel = call->channel;
1869 callNumber = *call->callNumber;
1870 /* BUSY packets refer to the next call on this connection */
1871 if (type == RX_PACKET_TYPE_BUSY) {
1880 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1882 osi_Panic("rxi_SendSpecial failure");
1889 p->header.serviceId = conn->serviceId;
1890 p->header.securityIndex = conn->securityIndex;
1891 p->header.cid = (conn->cid | channel);
1892 p->header.callNumber = callNumber;
1894 p->header.epoch = conn->epoch;
1895 p->header.type = type;
1896 p->header.flags = 0;
1897 if (conn->type == RX_CLIENT_CONNECTION)
1898 p->header.flags |= RX_CLIENT_INITIATED;
1900 rx_packetwrite(p, 0, nbytes, data);
1902 for (i = 1; i < p->niovecs; i++) {
1903 if (nbytes <= p->wirevec[i].iov_len) {
1904 savelen = p->wirevec[i].iov_len;
1906 p->wirevec[i].iov_len = nbytes;
1907 p->niovecs = i + 1; /* so condition fails because i == niovecs */
1909 nbytes -= p->wirevec[i].iov_len;
1913 rxi_Send(call, p, istack);
1915 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1916 if (saven) { /* means we truncated the packet above. We probably don't */
1917 /* really need to do this, but it seems safer this way, given that */
1918 /* sneaky optionalPacket... */
1919 p->wirevec[i - 1].iov_len = savelen;
1922 if (!optionalPacket)
1924 return optionalPacket;
1928 /* Encode the packet's header (from the struct header in the packet to
1929 * the net byte order representation in the wire representation of the
1930 * packet, which is what is actually sent out on the wire) */
1932 rxi_EncodePacketHeader(register struct rx_packet *p)
1934 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1936 memset((char *)buf, 0, RX_HEADER_SIZE);
1937 *buf++ = htonl(p->header.epoch);
1938 *buf++ = htonl(p->header.cid);
1939 *buf++ = htonl(p->header.callNumber);
1940 *buf++ = htonl(p->header.seq);
1941 *buf++ = htonl(p->header.serial);
1942 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1943 | (((afs_uint32) p->header.flags) << 16)
1944 | (p->header.userStatus << 8) | p->header.securityIndex);
1945 /* Note: top 16 bits of this next word were reserved */
1946 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1949 /* Decode the packet's header (from net byte order to a struct header) */
1951 rxi_DecodePacketHeader(register struct rx_packet *p)
1953 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1956 p->header.epoch = ntohl(*buf);
1958 p->header.cid = ntohl(*buf);
1960 p->header.callNumber = ntohl(*buf);
1962 p->header.seq = ntohl(*buf);
1964 p->header.serial = ntohl(*buf);
1970 /* C will truncate byte fields to bytes for me */
1971 p->header.type = temp >> 24;
1972 p->header.flags = temp >> 16;
1973 p->header.userStatus = temp >> 8;
1974 p->header.securityIndex = temp >> 0;
1979 p->header.serviceId = (temp & 0xffff);
1980 p->header.spare = temp >> 16;
1981 /* Note: top 16 bits of this last word are the security checksum */
1985 rxi_PrepareSendPacket(register struct rx_call *call,
1986 register struct rx_packet *p, register int last)
1988 register struct rx_connection *conn = call->conn;
1990 ssize_t len; /* len must be a signed type; it can go negative */
1992 p->flags &= ~RX_PKTFLAG_ACKED;
1993 p->header.cid = (conn->cid | call->channel);
1994 p->header.serviceId = conn->serviceId;
1995 p->header.securityIndex = conn->securityIndex;
1996 p->header.callNumber = *call->callNumber;
1997 p->header.seq = call->tnext++;
1998 p->header.epoch = conn->epoch;
1999 p->header.type = RX_PACKET_TYPE_DATA;
2000 p->header.flags = 0;
2001 p->header.spare = 0;
2002 if (conn->type == RX_CLIENT_CONNECTION)
2003 p->header.flags |= RX_CLIENT_INITIATED;
2006 p->header.flags |= RX_LAST_PACKET;
2008 clock_Zero(&p->retryTime); /* Never yet transmitted */
2009 clock_Zero(&p->firstSent); /* Never yet transmitted */
2010 p->header.serial = 0; /* Another way of saying never transmitted... */
2013 /* Now that we're sure this is the last data on the call, make sure
2014 * that the "length" and the sum of the iov_lens matches. */
2015 len = p->length + call->conn->securityHeaderSize;
2017 for (i = 1; i < p->niovecs && len > 0; i++) {
2018 len -= p->wirevec[i].iov_len;
2021 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2023 /* Free any extra elements in the wirevec */
2024 for (j = MAX(2, i); j < p->niovecs; j++) {
2025 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2028 p->wirevec[i - 1].iov_len += len;
2030 RXS_PreparePacket(conn->securityObject, call, p);
2033 /* Given an interface MTU size, calculate an adjusted MTU size that
2034 * will make efficient use of the RX buffers when the peer is sending
2035 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2037 rxi_AdjustIfMTU(int mtu)
2042 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2043 if (mtu <= adjMTU) {
2050 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2051 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2054 /* Given an interface MTU size, and the peer's advertised max receive
2055 * size, calculate an adjisted maxMTU size that makes efficient use
2056 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2058 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2060 int maxMTU = mtu * rxi_nSendFrags;
2061 maxMTU = MIN(maxMTU, peerMaxMTU);
2062 return rxi_AdjustIfMTU(maxMTU);
2065 /* Given a packet size, figure out how many datagram packet will fit.
2066 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2067 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2068 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2070 rxi_AdjustDgramPackets(int frags, int mtu)
2073 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2076 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2077 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2078 /* subtract the size of the first and last packets */
2079 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2083 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));