2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
74 #include "rx_xmit_nt.h"
77 #include <sys/socket.h>
78 #include <netinet/in.h>
84 #include <sys/sysmacros.h>
86 #include "rx_packet.h"
87 #include "rx_globals.h"
103 /* rxdb_fileID is used to identify the lock location, along with line#. */
104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
105 #endif /* RX_LOCKS_DB */
106 struct rx_packet *rx_mallocedP = 0;
108 extern char cml_version_number[];
109 extern int (*rx_almostSent) ();
111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
112 afs_int32 ahost, short aport,
115 /* some rules about packets:
116 * 1. When a packet is allocated, the final iov_buf contains room for
117 * a security trailer, but iov_len masks that fact. If the security
118 * package wants to add the trailer, it may do so, and then extend
119 * iov_len appropriately. For this reason, packet's niovecs and
120 * iov_len fields should be accurate before calling PreparePacket.
124 * all packet buffers (iov_base) are integral multiples of
126 * offset is an integral multiple of the word size.
129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
133 for (l = 0, i = 1; i < packet->niovecs; i++) {
134 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the word size.
147 * offset is an integral multiple of the word size.
150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
154 for (l = 0, i = 1; i < packet->niovecs; i++) {
155 if (l + packet->wirevec[i].iov_len > offset) {
156 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
157 (offset - l))) = data;
160 l += packet->wirevec[i].iov_len;
167 * all packet buffers (iov_base) are integral multiples of the
169 * offset is an integral multiple of the word size.
171 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
177 unsigned int i, j, l, r;
178 for (l = 0, i = 1; i < packet->niovecs; i++) {
179 if (l + packet->wirevec[i].iov_len > offset) {
182 l += packet->wirevec[i].iov_len;
185 /* i is the iovec which contains the first little bit of data in which we
186 * are interested. l is the total length of everything prior to this iovec.
187 * j is the number of bytes we can safely copy out of this iovec.
190 while ((resid > 0) && (i < packet->niovecs)) {
191 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
192 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
194 l += packet->wirevec[i].iov_len;
198 return (resid ? (r - resid) : r);
203 * all packet buffers (iov_base) are integral multiples of the
205 * offset is an integral multiple of the word size.
208 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
213 for (l = 0, i = 1; i < packet->niovecs; i++) {
214 if (l + packet->wirevec[i].iov_len > offset) {
217 l += packet->wirevec[i].iov_len;
220 /* i is the iovec which contains the first little bit of data in which we
221 * are interested. l is the total length of everything prior to this iovec.
222 * j is the number of bytes we can safely copy out of this iovec.
225 while ((resid > 0) && (i < RX_MAXWVECS)) {
226 if (i >= packet->niovecs)
227 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
230 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
231 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
234 l += packet->wirevec[i].iov_len;
238 return (resid ? (r - resid) : r);
241 static struct rx_packet *
248 MUTEX_ENTER(&rx_freePktQ_lock);
251 if (rxi_OverQuota(class)) {
253 rxi_NeedMorePackets = TRUE;
254 MUTEX_ENTER(&rx_stats_mutex);
256 case RX_PACKET_CLASS_RECEIVE:
257 rx_stats.receivePktAllocFailures++;
259 case RX_PACKET_CLASS_SEND:
260 rx_stats.sendPktAllocFailures++;
262 case RX_PACKET_CLASS_SPECIAL:
263 rx_stats.specialPktAllocFailures++;
265 case RX_PACKET_CLASS_RECV_CBUF:
266 rx_stats.receiveCbufPktAllocFailures++;
268 case RX_PACKET_CLASS_SEND_CBUF:
269 rx_stats.sendCbufPktAllocFailures++;
272 MUTEX_EXIT(&rx_stats_mutex);
276 if (queue_IsEmpty(&rx_freePacketQueue)) {
278 rxi_NeedMorePackets = TRUE;
282 if (queue_IsEmpty(&rx_freePacketQueue)) {
283 rxi_MorePacketsNoLock(rx_initSendWindow);
288 c = queue_First(&rx_freePacketQueue, rx_packet);
290 if (!(c->flags & RX_PKTFLAG_FREE))
291 osi_Panic("rxi_AllocPacket: packet not free\n");
292 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
298 MUTEX_EXIT(&rx_freePktQ_lock);
305 * Free a packet currently used as a continuation buffer
308 rxi_freeCBuf(struct rx_packet *c)
313 MUTEX_ENTER(&rx_freePktQ_lock);
315 rxi_FreePacketNoLock(c);
316 /* Wakeup anyone waiting for packets */
319 MUTEX_EXIT(&rx_freePktQ_lock);
323 /* this one is kind of awful.
324 * In rxkad, the packet has been all shortened, and everything, ready for
325 * sending. All of a sudden, we discover we need some of that space back.
326 * This isn't terribly general, because it knows that the packets are only
327 * rounded up to the EBS (userdata + security header).
330 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
334 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
335 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
336 p->wirevec[i].iov_len += nb;
340 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
341 p->wirevec[i].iov_len += nb;
349 /* get sufficient space to store nb bytes of data (or more), and hook
350 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
351 * returns the number of bytes >0 which it failed to come up with.
352 * Don't need to worry about locking on packet, since only
353 * one thread can manipulate one at a time. Locking on continution
354 * packets is handled by allocCBuf */
355 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
357 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
361 for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
362 register struct rx_packet *cb;
363 if ((cb = allocCBuf(class))) {
364 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
365 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
366 nb -= RX_CBUFFERSIZE;
367 p->length += RX_CBUFFERSIZE;
376 /* Add more packet buffers */
378 rxi_MorePackets(int apackets)
380 struct rx_packet *p, *e;
384 getme = apackets * sizeof(struct rx_packet);
385 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
387 PIN(p, getme); /* XXXXX */
388 memset((char *)p, 0, getme);
391 MUTEX_ENTER(&rx_freePktQ_lock);
393 for (e = p + apackets; p < e; p++) {
394 p->wirevec[0].iov_base = (char *)(p->wirehead);
395 p->wirevec[0].iov_len = RX_HEADER_SIZE;
396 p->wirevec[1].iov_base = (char *)(p->localdata);
397 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
398 p->flags |= RX_PKTFLAG_FREE;
401 queue_Append(&rx_freePacketQueue, p);
403 rx_nFreePackets += apackets;
404 rxi_NeedMorePackets = FALSE;
408 MUTEX_EXIT(&rx_freePktQ_lock);
413 /* Add more packet buffers */
415 rxi_MorePacketsNoLock(int apackets)
417 struct rx_packet *p, *e;
420 /* allocate enough packets that 1/4 of the packets will be able
421 * to hold maximal amounts of data */
422 apackets += (apackets / 4)
423 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
424 getme = apackets * sizeof(struct rx_packet);
425 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
427 memset((char *)p, 0, getme);
429 for (e = p + apackets; p < e; p++) {
430 p->wirevec[0].iov_base = (char *)(p->wirehead);
431 p->wirevec[0].iov_len = RX_HEADER_SIZE;
432 p->wirevec[1].iov_base = (char *)(p->localdata);
433 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
434 p->flags |= RX_PKTFLAG_FREE;
437 queue_Append(&rx_freePacketQueue, p);
439 rx_nFreePackets += apackets;
440 rxi_NeedMorePackets = FALSE;
446 rxi_FreeAllPackets(void)
448 /* must be called at proper interrupt level, etcetera */
449 /* MTUXXX need to free all Packets */
450 osi_Free(rx_mallocedP,
451 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
452 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
455 /* Allocate more packets iff we need more continuation buffers */
456 /* In kernel, can't page in memory with interrupts disabled, so we
457 * don't use the event mechanism. */
459 rx_CheckPackets(void)
461 if (rxi_NeedMorePackets) {
462 rxi_MorePackets(rx_initSendWindow);
466 /* In the packet freeing routine below, the assumption is that
467 we want all of the packets to be used equally frequently, so that we
468 don't get packet buffers paging out. It would be just as valid to
469 assume that we DO want them to page out if not many are being used.
470 In any event, we assume the former, and append the packets to the end
472 /* This explanation is bogus. The free list doesn't remain in any kind of
473 useful order for afs_int32: the packets in use get pretty much randomly scattered
474 across all the pages. In order to permit unused {packets,bufs} to page out, they
475 must be stored so that packets which are adjacent in memory are adjacent in the
476 free list. An array springs rapidly to mind.
479 /* Actually free the packet p. */
481 rxi_FreePacketNoLock(struct rx_packet *p)
483 dpf(("Free %lx\n", (unsigned long)p));
485 if (p->flags & RX_PKTFLAG_FREE)
486 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
488 p->flags |= RX_PKTFLAG_FREE;
489 queue_Append(&rx_freePacketQueue, p);
493 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
495 struct iovec *iov, *end;
497 if (first != 1) /* MTUXXX */
498 osi_Panic("FreeDataBufs 1: first must be 1");
499 iov = &p->wirevec[1];
500 end = iov + (p->niovecs - 1);
501 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
502 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
503 for (iov++; iov < end; iov++) {
505 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
506 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
514 int rxi_nBadIovecs = 0;
516 /* rxi_RestoreDataBufs
518 * Restore the correct sizes to the iovecs. Called when reusing a packet
519 * for reading off the wire.
522 rxi_RestoreDataBufs(struct rx_packet *p)
525 struct iovec *iov = &p->wirevec[2];
527 p->wirevec[0].iov_base = (char *)(p->wirehead);
528 p->wirevec[0].iov_len = RX_HEADER_SIZE;
529 p->wirevec[1].iov_base = (char *)(p->localdata);
530 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
532 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
533 if (!iov->iov_base) {
538 iov->iov_len = RX_CBUFFERSIZE;
543 rxi_TrimDataBufs(struct rx_packet *p, int first)
546 struct iovec *iov, *end;
550 osi_Panic("TrimDataBufs 1: first must be 1");
552 /* Skip over continuation buffers containing message data */
553 iov = &p->wirevec[2];
554 end = iov + (p->niovecs - 2);
555 length = p->length - p->wirevec[1].iov_len;
556 for (; iov < end && length > 0; iov++) {
558 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
559 length -= iov->iov_len;
562 /* iov now points to the first empty data buffer. */
567 MUTEX_ENTER(&rx_freePktQ_lock);
569 for (; iov < end; iov++) {
571 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
572 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
577 MUTEX_EXIT(&rx_freePktQ_lock);
583 /* Free the packet p. P is assumed not to be on any queue, i.e.
584 * remove it yourself first if you call this routine. */
586 rxi_FreePacket(struct rx_packet *p)
591 MUTEX_ENTER(&rx_freePktQ_lock);
593 rxi_FreeDataBufsNoLock(p, 1);
594 rxi_FreePacketNoLock(p);
595 /* Wakeup anyone waiting for packets */
598 MUTEX_EXIT(&rx_freePktQ_lock);
603 /* rxi_AllocPacket sets up p->length so it reflects the number of
604 * bytes in the packet at this point, **not including** the header.
605 * The header is absolutely necessary, besides, this is the way the
606 * length field is usually used */
608 rxi_AllocPacketNoLock(int class)
610 register struct rx_packet *p;
613 if (rxi_OverQuota(class)) {
614 rxi_NeedMorePackets = TRUE;
615 MUTEX_ENTER(&rx_stats_mutex);
617 case RX_PACKET_CLASS_RECEIVE:
618 rx_stats.receivePktAllocFailures++;
620 case RX_PACKET_CLASS_SEND:
621 rx_stats.sendPktAllocFailures++;
623 case RX_PACKET_CLASS_SPECIAL:
624 rx_stats.specialPktAllocFailures++;
626 case RX_PACKET_CLASS_RECV_CBUF:
627 rx_stats.receiveCbufPktAllocFailures++;
629 case RX_PACKET_CLASS_SEND_CBUF:
630 rx_stats.sendCbufPktAllocFailures++;
633 MUTEX_EXIT(&rx_stats_mutex);
634 return (struct rx_packet *)0;
638 MUTEX_ENTER(&rx_stats_mutex);
639 rx_stats.packetRequests++;
640 MUTEX_EXIT(&rx_stats_mutex);
643 if (queue_IsEmpty(&rx_freePacketQueue))
644 osi_Panic("rxi_AllocPacket error");
646 if (queue_IsEmpty(&rx_freePacketQueue))
647 rxi_MorePacketsNoLock(rx_initSendWindow);
651 p = queue_First(&rx_freePacketQueue, rx_packet);
652 if (!(p->flags & RX_PKTFLAG_FREE))
653 osi_Panic("rxi_AllocPacket: packet not free\n");
655 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
658 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
661 /* have to do this here because rx_FlushWrite fiddles with the iovs in
662 * order to truncate outbound packets. In the near future, may need
663 * to allocate bufs from a static pool here, and/or in AllocSendPacket
665 p->wirevec[0].iov_base = (char *)(p->wirehead);
666 p->wirevec[0].iov_len = RX_HEADER_SIZE;
667 p->wirevec[1].iov_base = (char *)(p->localdata);
668 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
670 p->length = RX_FIRSTBUFFERSIZE;
675 rxi_AllocPacket(int class)
677 register struct rx_packet *p;
679 MUTEX_ENTER(&rx_freePktQ_lock);
680 p = rxi_AllocPacketNoLock(class);
681 MUTEX_EXIT(&rx_freePktQ_lock);
685 /* This guy comes up with as many buffers as it {takes,can get} given
686 * the MTU for this call. It also sets the packet length before
687 * returning. caution: this is often called at NETPRI
688 * Called with call locked.
691 rxi_AllocSendPacket(register struct rx_call *call, int want)
693 register struct rx_packet *p = (struct rx_packet *)0;
695 register unsigned delta;
698 mud = call->MTU - RX_HEADER_SIZE;
700 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
701 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
703 while (!(call->error)) {
704 MUTEX_ENTER(&rx_freePktQ_lock);
705 /* if an error occurred, or we get the packet we want, we're done */
706 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
707 MUTEX_EXIT(&rx_freePktQ_lock);
710 want = MIN(want, mud);
712 if ((unsigned)want > p->length)
713 (void)rxi_AllocDataBuf(p, (want - p->length),
714 RX_PACKET_CLASS_SEND_CBUF);
716 if ((unsigned)p->length > mud)
719 if (delta >= p->length) {
728 /* no error occurred, and we didn't get a packet, so we sleep.
729 * At this point, we assume that packets will be returned
730 * sooner or later, as packets are acknowledged, and so we
733 call->flags |= RX_CALL_WAIT_PACKETS;
734 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
735 MUTEX_EXIT(&call->lock);
736 rx_waitingForPackets = 1;
738 #ifdef RX_ENABLE_LOCKS
739 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
741 osi_rxSleep(&rx_waitingForPackets);
743 MUTEX_EXIT(&rx_freePktQ_lock);
744 MUTEX_ENTER(&call->lock);
745 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
746 call->flags &= ~RX_CALL_WAIT_PACKETS;
755 /* count the number of used FDs */
757 CountFDs(register int amax)
760 register int i, code;
764 for (i = 0; i < amax; i++) {
765 code = fstat(i, &tstat);
774 #define CountFDs(amax) amax
778 #if !defined(KERNEL) || defined(UKERNEL)
780 /* This function reads a single packet from the interface into the
781 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
782 * (host,port) of the sender are stored in the supplied variables, and
783 * the data length of the packet is stored in the packet structure.
784 * The header is decoded. */
786 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
789 struct sockaddr_in from;
792 register afs_int32 tlen, savelen;
794 rx_computelen(p, tlen);
795 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
797 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
798 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
799 * it once in order to avoid races. */
802 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
810 /* Extend the last iovec for padding, it's just to make sure that the
811 * read doesn't return more data than we expect, and is done to get around
812 * our problems caused by the lack of a length field in the rx header.
813 * Use the extra buffer that follows the localdata in each packet
815 savelen = p->wirevec[p->niovecs - 1].iov_len;
816 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
818 memset((char *)&msg, 0, sizeof(msg));
819 msg.msg_name = (char *)&from;
820 msg.msg_namelen = sizeof(struct sockaddr_in);
821 msg.msg_iov = p->wirevec;
822 msg.msg_iovlen = p->niovecs;
823 nbytes = rxi_Recvmsg(socket, &msg, 0);
825 /* restore the vec to its correct state */
826 p->wirevec[p->niovecs - 1].iov_len = savelen;
828 p->length = (nbytes - RX_HEADER_SIZE);
829 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
831 rxi_MorePackets(rx_initSendWindow);
832 else if (nbytes < 0 && errno == EWOULDBLOCK) {
833 MUTEX_ENTER(&rx_stats_mutex);
834 rx_stats.noPacketOnRead++;
835 MUTEX_EXIT(&rx_stats_mutex);
837 MUTEX_ENTER(&rx_stats_mutex);
838 rx_stats.bogusPacketOnRead++;
839 rx_stats.bogusHost = from.sin_addr.s_addr;
840 MUTEX_EXIT(&rx_stats_mutex);
841 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
842 from.sin_port, nbytes));
846 /* Extract packet header. */
847 rxi_DecodePacketHeader(p);
849 *host = from.sin_addr.s_addr;
850 *port = from.sin_port;
851 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
852 struct rx_peer *peer;
853 MUTEX_ENTER(&rx_stats_mutex);
854 rx_stats.packetsRead[p->header.type - 1]++;
855 MUTEX_EXIT(&rx_stats_mutex);
857 * Try to look up this peer structure. If it doesn't exist,
858 * don't create a new one -
859 * we don't keep count of the bytes sent/received if a peer
860 * structure doesn't already exist.
862 * The peer/connection cleanup code assumes that there is 1 peer
863 * per connection. If we actually created a peer structure here
864 * and this packet was an rxdebug packet, the peer structure would
865 * never be cleaned up.
867 peer = rxi_FindPeer(*host, *port, 0, 0);
868 /* Since this may not be associated with a connection,
869 * it may have no refCount, meaning we could race with
872 if (peer && (peer->refCount > 0)) {
873 MUTEX_ENTER(&peer->peer_lock);
874 hadd32(peer->bytesReceived, p->length);
875 MUTEX_EXIT(&peer->peer_lock);
879 /* Free any empty packet buffers at the end of this packet */
880 rxi_TrimDataBufs(p, 1);
886 #endif /* !KERNEL || UKERNEL */
888 /* This function splits off the first packet in a jumbo packet.
889 * As of AFS 3.5, jumbograms contain more than one fixed size
890 * packet, and the RX_JUMBO_PACKET flag is set in all but the
891 * last packet header. All packets (except the last) are padded to
892 * fall on RX_CBUFFERSIZE boundaries.
893 * HACK: We store the length of the first n-1 packets in the
894 * last two pad bytes. */
897 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
900 struct rx_packet *np;
901 struct rx_jumboHeader *jp;
907 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
908 * bytes in length. All but the first packet are preceded by
909 * an abbreviated four byte header. The length of the last packet
910 * is calculated from the size of the jumbogram. */
911 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
913 if ((int)p->length < length) {
914 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
917 niov = p->niovecs - 2;
919 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
922 iov = &p->wirevec[2];
923 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
925 /* Get a pointer to the abbreviated packet header */
926 jp = (struct rx_jumboHeader *)
927 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
929 /* Set up the iovecs for the next packet */
930 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
931 np->wirevec[0].iov_len = sizeof(struct rx_header);
932 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
933 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
934 np->niovecs = niov + 1;
935 for (i = 2, iov++; i <= niov; i++, iov++) {
936 np->wirevec[i] = *iov;
938 np->length = p->length - length;
939 p->length = RX_JUMBOBUFFERSIZE;
942 /* Convert the jumbo packet header to host byte order */
943 temp = ntohl(*(afs_uint32 *) jp);
944 jp->flags = (u_char) (temp >> 24);
945 jp->cksum = (u_short) (temp);
947 /* Fill in the packet header */
948 np->header = p->header;
949 np->header.serial = p->header.serial + 1;
950 np->header.seq = p->header.seq + 1;
951 np->header.flags = jp->flags;
952 np->header.spare = jp->cksum;
958 /* Send a udp datagram */
960 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
961 int length, int istack)
965 memset(&msg, 0, sizeof(msg));
967 msg.msg_iovlen = nvecs;
969 msg.msg_namelen = sizeof(struct sockaddr_in);
971 rxi_Sendmsg(socket, &msg, 0);
975 #elif !defined(UKERNEL)
977 * message receipt is done in rxk_input or rx_put.
980 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
982 * Copy an mblock to the contiguous area pointed to by cp.
983 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
984 * but it doesn't really.
985 * Returns the number of bytes not transferred.
986 * The message is NOT changed.
989 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
993 for (; mp && len > 0; mp = mp->b_cont) {
994 if (mp->b_datap->db_type != M_DATA) {
997 n = MIN(len, (mp->b_wptr - mp->b_rptr));
998 memcpy(cp, (char *)mp->b_rptr, n);
1006 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1007 * but it doesn't really.
1008 * This sucks, anyway, do it like m_cpy.... below
1011 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1014 register int m, n, o, t, i;
1016 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1017 if (mp->b_datap->db_type != M_DATA) {
1020 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1026 t = iovs[i].iov_len;
1029 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1039 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1040 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1042 #if !defined(AFS_LINUX20_ENV)
1044 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1047 unsigned int l1, l2, i, t;
1049 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1050 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1053 if (m->m_len <= off) {
1063 p1 = mtod(m, caddr_t) + off;
1064 l1 = m->m_len - off;
1066 p2 = iovs[0].iov_base;
1067 l2 = iovs[0].iov_len;
1070 t = MIN(l1, MIN(l2, (unsigned int)len));
1081 p1 = mtod(m, caddr_t);
1087 p2 = iovs[i].iov_base;
1088 l2 = iovs[i].iov_len;
1096 #endif /* AFS_SUN5_ENV */
1098 #if !defined(AFS_LINUX20_ENV)
1100 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1101 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1107 struct rx_packet *phandle;
1108 int hdr_len, data_len;
1113 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1120 #endif /*KERNEL && !UKERNEL */
1123 /* send a response to a debug packet */
1126 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1127 afs_int32 ahost, short aport, int istack)
1129 struct rx_debugIn tin;
1131 struct rx_serverQueueEntry *np, *nqe;
1134 * Only respond to client-initiated Rx debug packets,
1135 * and clear the client flag in the response.
1137 if (ap->header.flags & RX_CLIENT_INITIATED) {
1138 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1139 rxi_EncodePacketHeader(ap);
1144 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1145 /* all done with packet, now set length to the truth, so we can
1146 * reuse this packet */
1147 rx_computelen(ap, ap->length);
1149 tin.type = ntohl(tin.type);
1150 tin.index = ntohl(tin.index);
1152 case RX_DEBUGI_GETSTATS:{
1153 struct rx_debugStats tstat;
1155 /* get basic stats */
1156 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1157 tstat.version = RX_DEBUGI_VERSION;
1158 #ifndef RX_ENABLE_LOCKS
1159 tstat.waitingForPackets = rx_waitingForPackets;
1161 MUTEX_ENTER(&rx_serverPool_lock);
1162 tstat.nFreePackets = htonl(rx_nFreePackets);
1163 tstat.callsExecuted = htonl(rxi_nCalls);
1164 tstat.packetReclaims = htonl(rx_packetReclaims);
1165 tstat.usedFDs = CountFDs(64);
1166 tstat.nWaiting = htonl(rx_nWaiting);
1167 tstat.nWaited = htonl(rx_nWaited);
1168 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1170 MUTEX_EXIT(&rx_serverPool_lock);
1171 tstat.idleThreads = htonl(tstat.idleThreads);
1172 tl = sizeof(struct rx_debugStats) - ap->length;
1174 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1177 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1179 ap->length = sizeof(struct rx_debugStats);
1180 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1181 rx_computelen(ap, ap->length);
1186 case RX_DEBUGI_GETALLCONN:
1187 case RX_DEBUGI_GETCONN:{
1189 register struct rx_connection *tc;
1190 struct rx_call *tcall;
1191 struct rx_debugConn tconn;
1192 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1195 tl = sizeof(struct rx_debugConn) - ap->length;
1197 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1201 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1202 /* get N'th (maybe) "interesting" connection info */
1203 for (i = 0; i < rx_hashTableSize; i++) {
1204 #if !defined(KERNEL)
1205 /* the time complexity of the algorithm used here
1206 * exponentially increses with the number of connections.
1208 #ifdef AFS_PTHREAD_ENV
1214 MUTEX_ENTER(&rx_connHashTable_lock);
1215 /* We might be slightly out of step since we are not
1216 * locking each call, but this is only debugging output.
1218 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1219 if ((all || rxi_IsConnInteresting(tc))
1220 && tin.index-- <= 0) {
1221 tconn.host = tc->peer->host;
1222 tconn.port = tc->peer->port;
1223 tconn.cid = htonl(tc->cid);
1224 tconn.epoch = htonl(tc->epoch);
1225 tconn.serial = htonl(tc->serial);
1226 for (j = 0; j < RX_MAXCALLS; j++) {
1227 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1228 if ((tcall = tc->call[j])) {
1229 tconn.callState[j] = tcall->state;
1230 tconn.callMode[j] = tcall->mode;
1231 tconn.callFlags[j] = tcall->flags;
1232 if (queue_IsNotEmpty(&tcall->rq))
1233 tconn.callOther[j] |= RX_OTHER_IN;
1234 if (queue_IsNotEmpty(&tcall->tq))
1235 tconn.callOther[j] |= RX_OTHER_OUT;
1237 tconn.callState[j] = RX_STATE_NOTINIT;
1240 tconn.natMTU = htonl(tc->peer->natMTU);
1241 tconn.error = htonl(tc->error);
1242 tconn.flags = tc->flags;
1243 tconn.type = tc->type;
1244 tconn.securityIndex = tc->securityIndex;
1245 if (tc->securityObject) {
1246 RXS_GetStats(tc->securityObject, tc,
1248 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1249 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1252 DOHTONL(packetsReceived);
1253 DOHTONL(packetsSent);
1254 DOHTONL(bytesReceived);
1258 sizeof(tconn.secStats.spares) /
1263 sizeof(tconn.secStats.sparel) /
1264 sizeof(afs_int32); i++)
1268 MUTEX_EXIT(&rx_connHashTable_lock);
1269 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1272 ap->length = sizeof(struct rx_debugConn);
1273 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1279 MUTEX_EXIT(&rx_connHashTable_lock);
1281 /* if we make it here, there are no interesting packets */
1282 tconn.cid = htonl(0xffffffff); /* means end */
1283 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1286 ap->length = sizeof(struct rx_debugConn);
1287 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1293 * Pass back all the peer structures we have available
1296 case RX_DEBUGI_GETPEER:{
1298 register struct rx_peer *tp;
1299 struct rx_debugPeer tpeer;
1302 tl = sizeof(struct rx_debugPeer) - ap->length;
1304 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1308 memset((char *)&tpeer, 0, sizeof(tpeer));
1309 for (i = 0; i < rx_hashTableSize; i++) {
1310 #if !defined(KERNEL)
1311 /* the time complexity of the algorithm used here
1312 * exponentially increses with the number of peers.
1314 * Yielding after processing each hash table entry
1315 * and dropping rx_peerHashTable_lock.
1316 * also increases the risk that we will miss a new
1317 * entry - but we are willing to live with this
1318 * limitation since this is meant for debugging only
1320 #ifdef AFS_PTHREAD_ENV
1326 MUTEX_ENTER(&rx_peerHashTable_lock);
1327 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1328 if (tin.index-- <= 0) {
1329 tpeer.host = tp->host;
1330 tpeer.port = tp->port;
1331 tpeer.ifMTU = htons(tp->ifMTU);
1332 tpeer.idleWhen = htonl(tp->idleWhen);
1333 tpeer.refCount = htons(tp->refCount);
1334 tpeer.burstSize = tp->burstSize;
1335 tpeer.burst = tp->burst;
1336 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1337 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1338 tpeer.rtt = htonl(tp->rtt);
1339 tpeer.rtt_dev = htonl(tp->rtt_dev);
1340 tpeer.timeout.sec = htonl(tp->timeout.sec);
1341 tpeer.timeout.usec = htonl(tp->timeout.usec);
1342 tpeer.nSent = htonl(tp->nSent);
1343 tpeer.reSends = htonl(tp->reSends);
1344 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1345 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1346 tpeer.rateFlag = htonl(tp->rateFlag);
1347 tpeer.natMTU = htons(tp->natMTU);
1348 tpeer.maxMTU = htons(tp->maxMTU);
1349 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1350 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1351 tpeer.MTU = htons(tp->MTU);
1352 tpeer.cwind = htons(tp->cwind);
1353 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1354 tpeer.congestSeq = htons(tp->congestSeq);
1355 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1356 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1357 tpeer.bytesReceived.high =
1358 htonl(tp->bytesReceived.high);
1359 tpeer.bytesReceived.low =
1360 htonl(tp->bytesReceived.low);
1362 MUTEX_EXIT(&rx_peerHashTable_lock);
1363 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1366 ap->length = sizeof(struct rx_debugPeer);
1367 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1373 MUTEX_EXIT(&rx_peerHashTable_lock);
1375 /* if we make it here, there are no interesting packets */
1376 tpeer.host = htonl(0xffffffff); /* means end */
1377 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1380 ap->length = sizeof(struct rx_debugPeer);
1381 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1386 case RX_DEBUGI_RXSTATS:{
1390 tl = sizeof(rx_stats) - ap->length;
1392 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1396 /* Since its all int32s convert to network order with a loop. */
1397 MUTEX_ENTER(&rx_stats_mutex);
1398 s = (afs_int32 *) & rx_stats;
1399 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1400 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1403 ap->length = sizeof(rx_stats);
1404 MUTEX_EXIT(&rx_stats_mutex);
1405 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1411 /* error response packet */
1412 tin.type = htonl(RX_DEBUGI_BADTYPE);
1413 tin.index = tin.type;
1414 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1416 ap->length = sizeof(struct rx_debugIn);
1417 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1425 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1426 afs_int32 ahost, short aport, int istack)
1431 * Only respond to client-initiated version requests, and
1432 * clear that flag in the response.
1434 if (ap->header.flags & RX_CLIENT_INITIATED) {
1437 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1438 rxi_EncodePacketHeader(ap);
1439 memset(buf, 0, sizeof(buf));
1440 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1441 rx_packetwrite(ap, 0, 65, buf);
1444 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1452 /* send a debug packet back to the sender */
1454 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1455 afs_int32 ahost, short aport, afs_int32 istack)
1457 struct sockaddr_in taddr;
1463 int waslocked = ISAFS_GLOCK();
1466 taddr.sin_family = AF_INET;
1467 taddr.sin_port = aport;
1468 taddr.sin_addr.s_addr = ahost;
1469 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1470 taddr.sin_len = sizeof(struct sockaddr_in);
1473 /* We need to trim the niovecs. */
1474 nbytes = apacket->length;
1475 for (i = 1; i < apacket->niovecs; i++) {
1476 if (nbytes <= apacket->wirevec[i].iov_len) {
1477 savelen = apacket->wirevec[i].iov_len;
1478 saven = apacket->niovecs;
1479 apacket->wirevec[i].iov_len = nbytes;
1480 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
1482 nbytes -= apacket->wirevec[i].iov_len;
1486 #ifdef RX_KERNEL_TRACE
1487 if (ICL_SETACTIVE(afs_iclSetp)) {
1490 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1491 "before osi_NetSend()");
1499 /* debug packets are not reliably delivered, hence the cast below. */
1500 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1501 apacket->length + RX_HEADER_SIZE, istack);
1503 #ifdef RX_KERNEL_TRACE
1504 if (ICL_SETACTIVE(afs_iclSetp)) {
1506 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1507 "after osi_NetSend()");
1517 if (saven) { /* means we truncated the packet above. */
1518 apacket->wirevec[i - 1].iov_len = savelen;
1519 apacket->niovecs = saven;
1524 /* Send the packet to appropriate destination for the specified
1525 * call. The header is first encoded and placed in the packet.
1528 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1529 struct rx_packet *p, int istack)
1535 struct sockaddr_in addr;
1536 register struct rx_peer *peer = conn->peer;
1539 char deliveryType = 'S';
1541 /* The address we're sending the packet to */
1542 memset(&addr, 0, sizeof(addr));
1543 addr.sin_family = AF_INET;
1544 addr.sin_port = peer->port;
1545 addr.sin_addr.s_addr = peer->host;
1547 /* This stuff should be revamped, I think, so that most, if not
1548 * all, of the header stuff is always added here. We could
1549 * probably do away with the encode/decode routines. XXXXX */
1551 /* Stamp each packet with a unique serial number. The serial
1552 * number is maintained on a connection basis because some types
1553 * of security may be based on the serial number of the packet,
1554 * and security is handled on a per authenticated-connection
1556 /* Pre-increment, to guarantee no zero serial number; a zero
1557 * serial number means the packet was never sent. */
1558 MUTEX_ENTER(&conn->conn_data_lock);
1559 p->header.serial = ++conn->serial;
1560 MUTEX_EXIT(&conn->conn_data_lock);
1561 /* This is so we can adjust retransmit time-outs better in the face of
1562 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1564 if (p->firstSerial == 0) {
1565 p->firstSerial = p->header.serial;
1568 /* If an output tracer function is defined, call it with the packet and
1569 * network address. Note this function may modify its arguments. */
1570 if (rx_almostSent) {
1571 int drop = (*rx_almostSent) (p, &addr);
1572 /* drop packet if return value is non-zero? */
1574 deliveryType = 'D'; /* Drop the packet */
1578 /* Get network byte order header */
1579 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1580 * touch ALL the fields */
1582 /* Send the packet out on the same socket that related packets are being
1586 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1589 /* Possibly drop this packet, for testing purposes */
1590 if ((deliveryType == 'D')
1591 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1592 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1593 deliveryType = 'D'; /* Drop the packet */
1595 deliveryType = 'S'; /* Send the packet */
1596 #endif /* RXDEBUG */
1598 /* Loop until the packet is sent. We'd prefer just to use a
1599 * blocking socket, but unfortunately the interface doesn't
1600 * allow us to have the socket block in send mode, and not
1601 * block in receive mode */
1604 waslocked = ISAFS_GLOCK();
1605 #ifdef RX_KERNEL_TRACE
1606 if (ICL_SETACTIVE(afs_iclSetp)) {
1609 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1610 "before osi_NetSend()");
1619 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1620 p->length + RX_HEADER_SIZE, istack)) != 0) {
1621 /* send failed, so let's hurry up the resend, eh? */
1622 MUTEX_ENTER(&rx_stats_mutex);
1623 rx_stats.netSendFailures++;
1624 MUTEX_EXIT(&rx_stats_mutex);
1625 p->retryTime = p->timeSent; /* resend it very soon */
1626 clock_Addmsec(&(p->retryTime),
1627 10 + (((afs_uint32) p->backoff) << 8));
1629 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1630 /* Linux is nice -- it can tell us right away that we cannot
1631 * reach this recipient by returning an ENETUNREACH error
1632 * code. So, when this happens let's "down" the host NOW so
1633 * we don't sit around waiting for this host to timeout later.
1635 if (call && code == -ENETUNREACH)
1636 call->lastReceiveTime = 0;
1640 #ifdef RX_KERNEL_TRACE
1641 if (ICL_SETACTIVE(afs_iclSetp)) {
1643 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1644 "after osi_NetSend()");
1656 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1658 MUTEX_ENTER(&rx_stats_mutex);
1659 rx_stats.packetsSent[p->header.type - 1]++;
1660 MUTEX_EXIT(&rx_stats_mutex);
1661 MUTEX_ENTER(&peer->peer_lock);
1662 hadd32(peer->bytesSent, p->length);
1663 MUTEX_EXIT(&peer->peer_lock);
1666 /* Send a list of packets to appropriate destination for the specified
1667 * connection. The headers are first encoded and placed in the packets.
1670 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1671 struct rx_packet **list, int len, int istack)
1673 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1676 struct sockaddr_in addr;
1677 register struct rx_peer *peer = conn->peer;
1679 struct rx_packet *p = NULL;
1680 struct iovec wirevec[RX_MAXIOVECS];
1681 int i, length, code;
1684 struct rx_jumboHeader *jp;
1686 char deliveryType = 'S';
1688 /* The address we're sending the packet to */
1689 addr.sin_family = AF_INET;
1690 addr.sin_port = peer->port;
1691 addr.sin_addr.s_addr = peer->host;
1693 if (len + 1 > RX_MAXIOVECS) {
1694 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1698 * Stamp the packets in this jumbogram with consecutive serial numbers
1700 MUTEX_ENTER(&conn->conn_data_lock);
1701 serial = conn->serial;
1702 conn->serial += len;
1703 MUTEX_EXIT(&conn->conn_data_lock);
1706 /* This stuff should be revamped, I think, so that most, if not
1707 * all, of the header stuff is always added here. We could
1708 * probably do away with the encode/decode routines. XXXXX */
1711 length = RX_HEADER_SIZE;
1712 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1713 wirevec[0].iov_len = RX_HEADER_SIZE;
1714 for (i = 0; i < len; i++) {
1717 /* The whole 3.5 jumbogram scheme relies on packets fitting
1718 * in a single packet buffer. */
1719 if (p->niovecs > 2) {
1720 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1723 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1726 if (p->length != RX_JUMBOBUFFERSIZE) {
1727 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1729 p->header.flags |= RX_JUMBO_PACKET;
1730 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1731 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1733 wirevec[i + 1].iov_len = p->length;
1734 length += p->length;
1736 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1738 /* Convert jumbo packet header to network byte order */
1739 temp = (afs_uint32) (p->header.flags) << 24;
1740 temp |= (afs_uint32) (p->header.spare);
1741 *(afs_uint32 *) jp = htonl(temp);
1743 jp = (struct rx_jumboHeader *)
1744 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1746 /* Stamp each packet with a unique serial number. The serial
1747 * number is maintained on a connection basis because some types
1748 * of security may be based on the serial number of the packet,
1749 * and security is handled on a per authenticated-connection
1751 /* Pre-increment, to guarantee no zero serial number; a zero
1752 * serial number means the packet was never sent. */
1753 p->header.serial = ++serial;
1754 /* This is so we can adjust retransmit time-outs better in the face of
1755 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1757 if (p->firstSerial == 0) {
1758 p->firstSerial = p->header.serial;
1761 /* If an output tracer function is defined, call it with the packet and
1762 * network address. Note this function may modify its arguments. */
1763 if (rx_almostSent) {
1764 int drop = (*rx_almostSent) (p, &addr);
1765 /* drop packet if return value is non-zero? */
1767 deliveryType = 'D'; /* Drop the packet */
1771 /* Get network byte order header */
1772 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1773 * touch ALL the fields */
1776 /* Send the packet out on the same socket that related packets are being
1780 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1783 /* Possibly drop this packet, for testing purposes */
1784 if ((deliveryType == 'D')
1785 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1786 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1787 deliveryType = 'D'; /* Drop the packet */
1789 deliveryType = 'S'; /* Send the packet */
1790 #endif /* RXDEBUG */
1792 /* Loop until the packet is sent. We'd prefer just to use a
1793 * blocking socket, but unfortunately the interface doesn't
1794 * allow us to have the socket block in send mode, and not
1795 * block in receive mode */
1797 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1798 waslocked = ISAFS_GLOCK();
1799 if (!istack && waslocked)
1803 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1805 /* send failed, so let's hurry up the resend, eh? */
1806 MUTEX_ENTER(&rx_stats_mutex);
1807 rx_stats.netSendFailures++;
1808 MUTEX_EXIT(&rx_stats_mutex);
1809 for (i = 0; i < len; i++) {
1811 p->retryTime = p->timeSent; /* resend it very soon */
1812 clock_Addmsec(&(p->retryTime),
1813 10 + (((afs_uint32) p->backoff) << 8));
1815 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1816 /* Linux is nice -- it can tell us right away that we cannot
1817 * reach this recipient by returning an ENETUNREACH error
1818 * code. So, when this happens let's "down" the host NOW so
1819 * we don't sit around waiting for this host to timeout later.
1821 if (call && code == -ENETUNREACH)
1822 call->lastReceiveTime = 0;
1825 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1826 if (!istack && waslocked)
1835 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1838 MUTEX_ENTER(&rx_stats_mutex);
1839 rx_stats.packetsSent[p->header.type - 1]++;
1840 MUTEX_EXIT(&rx_stats_mutex);
1841 MUTEX_ENTER(&peer->peer_lock);
1843 hadd32(peer->bytesSent, p->length);
1844 MUTEX_EXIT(&peer->peer_lock);
1848 /* Send a "special" packet to the peer connection. If call is
1849 * specified, then the packet is directed to a specific call channel
1850 * associated with the connection, otherwise it is directed to the
1851 * connection only. Uses optionalPacket if it is supplied, rather than
1852 * allocating a new packet buffer. Nbytes is the length of the data
1853 * portion of the packet. If data is non-null, nbytes of data are
1854 * copied into the packet. Type is the type of the packet, as defined
1855 * in rx.h. Bug: there's a lot of duplication between this and other
1856 * routines. This needs to be cleaned up. */
1858 rxi_SendSpecial(register struct rx_call *call,
1859 register struct rx_connection *conn,
1860 struct rx_packet *optionalPacket, int type, char *data,
1861 int nbytes, int istack)
1863 /* Some of the following stuff should be common code for all
1864 * packet sends (it's repeated elsewhere) */
1865 register struct rx_packet *p;
1867 int savelen = 0, saven = 0;
1868 int channel, callNumber;
1870 channel = call->channel;
1871 callNumber = *call->callNumber;
1872 /* BUSY packets refer to the next call on this connection */
1873 if (type == RX_PACKET_TYPE_BUSY) {
1882 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1884 osi_Panic("rxi_SendSpecial failure");
1891 p->header.serviceId = conn->serviceId;
1892 p->header.securityIndex = conn->securityIndex;
1893 p->header.cid = (conn->cid | channel);
1894 p->header.callNumber = callNumber;
1896 p->header.epoch = conn->epoch;
1897 p->header.type = type;
1898 p->header.flags = 0;
1899 if (conn->type == RX_CLIENT_CONNECTION)
1900 p->header.flags |= RX_CLIENT_INITIATED;
1902 rx_packetwrite(p, 0, nbytes, data);
1904 for (i = 1; i < p->niovecs; i++) {
1905 if (nbytes <= p->wirevec[i].iov_len) {
1906 savelen = p->wirevec[i].iov_len;
1908 p->wirevec[i].iov_len = nbytes;
1909 p->niovecs = i + 1; /* so condition fails because i == niovecs */
1911 nbytes -= p->wirevec[i].iov_len;
1915 rxi_Send(call, p, istack);
1917 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1918 if (saven) { /* means we truncated the packet above. We probably don't */
1919 /* really need to do this, but it seems safer this way, given that */
1920 /* sneaky optionalPacket... */
1921 p->wirevec[i - 1].iov_len = savelen;
1924 if (!optionalPacket)
1926 return optionalPacket;
1930 /* Encode the packet's header (from the struct header in the packet to
1931 * the net byte order representation in the wire representation of the
1932 * packet, which is what is actually sent out on the wire) */
1934 rxi_EncodePacketHeader(register struct rx_packet *p)
1936 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1938 memset((char *)buf, 0, RX_HEADER_SIZE);
1939 *buf++ = htonl(p->header.epoch);
1940 *buf++ = htonl(p->header.cid);
1941 *buf++ = htonl(p->header.callNumber);
1942 *buf++ = htonl(p->header.seq);
1943 *buf++ = htonl(p->header.serial);
1944 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1945 | (((afs_uint32) p->header.flags) << 16)
1946 | (p->header.userStatus << 8) | p->header.securityIndex);
1947 /* Note: top 16 bits of this next word were reserved */
1948 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1951 /* Decode the packet's header (from net byte order to a struct header) */
1953 rxi_DecodePacketHeader(register struct rx_packet *p)
1955 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1958 p->header.epoch = ntohl(*buf);
1960 p->header.cid = ntohl(*buf);
1962 p->header.callNumber = ntohl(*buf);
1964 p->header.seq = ntohl(*buf);
1966 p->header.serial = ntohl(*buf);
1972 /* C will truncate byte fields to bytes for me */
1973 p->header.type = temp >> 24;
1974 p->header.flags = temp >> 16;
1975 p->header.userStatus = temp >> 8;
1976 p->header.securityIndex = temp >> 0;
1981 p->header.serviceId = (temp & 0xffff);
1982 p->header.spare = temp >> 16;
1983 /* Note: top 16 bits of this last word are the security checksum */
1987 rxi_PrepareSendPacket(register struct rx_call *call,
1988 register struct rx_packet *p, register int last)
1990 register struct rx_connection *conn = call->conn;
1992 ssize_t len; /* len must be a signed type; it can go negative */
1994 p->flags &= ~RX_PKTFLAG_ACKED;
1995 p->header.cid = (conn->cid | call->channel);
1996 p->header.serviceId = conn->serviceId;
1997 p->header.securityIndex = conn->securityIndex;
1998 p->header.callNumber = *call->callNumber;
1999 p->header.seq = call->tnext++;
2000 p->header.epoch = conn->epoch;
2001 p->header.type = RX_PACKET_TYPE_DATA;
2002 p->header.flags = 0;
2003 p->header.spare = 0;
2004 if (conn->type == RX_CLIENT_CONNECTION)
2005 p->header.flags |= RX_CLIENT_INITIATED;
2008 p->header.flags |= RX_LAST_PACKET;
2010 clock_Zero(&p->retryTime); /* Never yet transmitted */
2011 clock_Zero(&p->firstSent); /* Never yet transmitted */
2012 p->header.serial = 0; /* Another way of saying never transmitted... */
2015 /* Now that we're sure this is the last data on the call, make sure
2016 * that the "length" and the sum of the iov_lens matches. */
2017 len = p->length + call->conn->securityHeaderSize;
2019 for (i = 1; i < p->niovecs && len > 0; i++) {
2020 len -= p->wirevec[i].iov_len;
2023 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2025 /* Free any extra elements in the wirevec */
2026 for (j = MAX(2, i); j < p->niovecs; j++) {
2027 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2030 p->wirevec[i - 1].iov_len += len;
2032 RXS_PreparePacket(conn->securityObject, call, p);
2035 /* Given an interface MTU size, calculate an adjusted MTU size that
2036 * will make efficient use of the RX buffers when the peer is sending
2037 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2039 rxi_AdjustIfMTU(int mtu)
2044 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2045 if (mtu <= adjMTU) {
2052 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2053 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2056 /* Given an interface MTU size, and the peer's advertised max receive
2057 * size, calculate an adjisted maxMTU size that makes efficient use
2058 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2060 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2062 int maxMTU = mtu * rxi_nSendFrags;
2063 maxMTU = MIN(maxMTU, peerMaxMTU);
2064 return rxi_AdjustIfMTU(maxMTU);
2067 /* Given a packet size, figure out how many datagram packet will fit.
2068 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2069 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2070 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2072 rxi_AdjustDgramPackets(int frags, int mtu)
2075 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2078 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2079 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2080 /* subtract the size of the first and last packets */
2081 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2085 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));