2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #include <sys/socket.h>
69 #include <netinet/in.h>
70 #endif /* AFS_NT40_ENV */
71 #include "rx_xmit_nt.h"
74 #include <sys/socket.h>
75 #include <netinet/in.h>
81 #include <sys/sysmacros.h>
83 #include "rx_packet.h"
84 #include "rx_globals.h"
100 /* rxdb_fileID is used to identify the lock location, along with line#. */
101 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
102 #endif /* RX_LOCKS_DB */
103 struct rx_packet *rx_mallocedP = 0;
105 extern char cml_version_number[];
106 extern int (*rx_almostSent) ();
108 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
109 afs_int32 ahost, short aport,
112 /* some rules about packets:
113 * 1. When a packet is allocated, the final iov_buf contains room for
114 * a security trailer, but iov_len masks that fact. If the security
115 * package wants to add the trailer, it may do so, and then extend
116 * iov_len appropriately. For this reason, packet's niovecs and
117 * iov_len fields should be accurate before calling PreparePacket.
121 * all packet buffers (iov_base) are integral multiples of
123 * offset is an integral multiple of the word size.
126 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
130 for (l = 0, i = 1; i < packet->niovecs; i++) {
131 if (l + packet->wirevec[i].iov_len > offset) {
133 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
136 l += packet->wirevec[i].iov_len;
143 * all packet buffers (iov_base) are integral multiples of the word size.
144 * offset is an integral multiple of the word size.
147 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
151 for (l = 0, i = 1; i < packet->niovecs; i++) {
152 if (l + packet->wirevec[i].iov_len > offset) {
153 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
154 (offset - l))) = data;
157 l += packet->wirevec[i].iov_len;
164 * all packet buffers (iov_base) are integral multiples of the
166 * offset is an integral multiple of the word size.
168 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
171 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
174 unsigned int i, j, l, r;
175 for (l = 0, i = 1; i < packet->niovecs; i++) {
176 if (l + packet->wirevec[i].iov_len > offset) {
179 l += packet->wirevec[i].iov_len;
182 /* i is the iovec which contains the first little bit of data in which we
183 * are interested. l is the total length of everything prior to this iovec.
184 * j is the number of bytes we can safely copy out of this iovec.
187 while ((resid > 0) && (i < packet->niovecs)) {
188 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
189 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
191 l += packet->wirevec[i].iov_len;
195 return (resid ? (r - resid) : r);
200 * all packet buffers (iov_base) are integral multiples of the
202 * offset is an integral multiple of the word size.
205 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
210 for (l = 0, i = 1; i < packet->niovecs; i++) {
211 if (l + packet->wirevec[i].iov_len > offset) {
214 l += packet->wirevec[i].iov_len;
217 /* i is the iovec which contains the first little bit of data in which we
218 * are interested. l is the total length of everything prior to this iovec.
219 * j is the number of bytes we can safely copy out of this iovec.
222 while ((resid > 0) && (i < RX_MAXWVECS)) {
223 if (i >= packet->niovecs)
224 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
227 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
228 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
231 l += packet->wirevec[i].iov_len;
235 return (resid ? (r - resid) : r);
238 static struct rx_packet *
245 MUTEX_ENTER(&rx_freePktQ_lock);
248 if (rxi_OverQuota(class)) {
250 rxi_NeedMorePackets = TRUE;
251 MUTEX_ENTER(&rx_stats_mutex);
253 case RX_PACKET_CLASS_RECEIVE:
254 rx_stats.receivePktAllocFailures++;
256 case RX_PACKET_CLASS_SEND:
257 rx_stats.sendPktAllocFailures++;
259 case RX_PACKET_CLASS_SPECIAL:
260 rx_stats.specialPktAllocFailures++;
262 case RX_PACKET_CLASS_RECV_CBUF:
263 rx_stats.receiveCbufPktAllocFailures++;
265 case RX_PACKET_CLASS_SEND_CBUF:
266 rx_stats.sendCbufPktAllocFailures++;
269 MUTEX_EXIT(&rx_stats_mutex);
273 if (queue_IsEmpty(&rx_freePacketQueue)) {
275 rxi_NeedMorePackets = TRUE;
279 if (queue_IsEmpty(&rx_freePacketQueue)) {
280 rxi_MorePacketsNoLock(rx_initSendWindow);
285 c = queue_First(&rx_freePacketQueue, rx_packet);
287 if (!(c->flags & RX_PKTFLAG_FREE))
288 osi_Panic("rxi_AllocPacket: packet not free\n");
289 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
295 MUTEX_EXIT(&rx_freePktQ_lock);
302 * Free a packet currently used as a continuation buffer
305 rxi_freeCBuf(struct rx_packet *c)
310 MUTEX_ENTER(&rx_freePktQ_lock);
312 rxi_FreePacketNoLock(c);
313 /* Wakeup anyone waiting for packets */
316 MUTEX_EXIT(&rx_freePktQ_lock);
320 /* this one is kind of awful.
321 * In rxkad, the packet has been all shortened, and everything, ready for
322 * sending. All of a sudden, we discover we need some of that space back.
323 * This isn't terribly general, because it knows that the packets are only
324 * rounded up to the EBS (userdata + security header).
327 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
331 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
332 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
333 p->wirevec[i].iov_len += nb;
337 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
338 p->wirevec[i].iov_len += nb;
346 /* get sufficient space to store nb bytes of data (or more), and hook
347 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
348 * returns the number of bytes >0 which it failed to come up with.
349 * Don't need to worry about locking on packet, since only
350 * one thread can manipulate one at a time. Locking on continution
351 * packets is handled by allocCBuf */
352 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
354 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
358 for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
359 register struct rx_packet *cb;
360 if ((cb = allocCBuf(class))) {
361 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
362 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
363 nb -= RX_CBUFFERSIZE;
364 p->length += RX_CBUFFERSIZE;
373 /* Add more packet buffers */
375 rxi_MorePackets(int apackets)
377 struct rx_packet *p, *e;
381 getme = apackets * sizeof(struct rx_packet);
382 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
384 PIN(p, getme); /* XXXXX */
385 memset((char *)p, 0, getme);
388 MUTEX_ENTER(&rx_freePktQ_lock);
390 for (e = p + apackets; p < e; p++) {
391 p->wirevec[0].iov_base = (char *)(p->wirehead);
392 p->wirevec[0].iov_len = RX_HEADER_SIZE;
393 p->wirevec[1].iov_base = (char *)(p->localdata);
394 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
395 p->flags |= RX_PKTFLAG_FREE;
398 queue_Append(&rx_freePacketQueue, p);
400 rx_nFreePackets += apackets;
401 rxi_NeedMorePackets = FALSE;
405 MUTEX_EXIT(&rx_freePktQ_lock);
410 /* Add more packet buffers */
412 rxi_MorePacketsNoLock(int apackets)
414 struct rx_packet *p, *e;
417 /* allocate enough packets that 1/4 of the packets will be able
418 * to hold maximal amounts of data */
419 apackets += (apackets / 4)
420 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
421 getme = apackets * sizeof(struct rx_packet);
422 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
424 memset((char *)p, 0, getme);
426 for (e = p + apackets; p < e; p++) {
427 p->wirevec[0].iov_base = (char *)(p->wirehead);
428 p->wirevec[0].iov_len = RX_HEADER_SIZE;
429 p->wirevec[1].iov_base = (char *)(p->localdata);
430 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
431 p->flags |= RX_PKTFLAG_FREE;
434 queue_Append(&rx_freePacketQueue, p);
436 rx_nFreePackets += apackets;
437 rxi_NeedMorePackets = FALSE;
443 rxi_FreeAllPackets(void)
445 /* must be called at proper interrupt level, etcetera */
446 /* MTUXXX need to free all Packets */
447 osi_Free(rx_mallocedP,
448 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
449 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
452 /* Allocate more packets iff we need more continuation buffers */
453 /* In kernel, can't page in memory with interrupts disabled, so we
454 * don't use the event mechanism. */
456 rx_CheckPackets(void)
458 if (rxi_NeedMorePackets) {
459 rxi_MorePackets(rx_initSendWindow);
463 /* In the packet freeing routine below, the assumption is that
464 we want all of the packets to be used equally frequently, so that we
465 don't get packet buffers paging out. It would be just as valid to
466 assume that we DO want them to page out if not many are being used.
467 In any event, we assume the former, and append the packets to the end
469 /* This explanation is bogus. The free list doesn't remain in any kind of
470 useful order for afs_int32: the packets in use get pretty much randomly scattered
471 across all the pages. In order to permit unused {packets,bufs} to page out, they
472 must be stored so that packets which are adjacent in memory are adjacent in the
473 free list. An array springs rapidly to mind.
476 /* Actually free the packet p. */
478 rxi_FreePacketNoLock(struct rx_packet *p)
480 dpf(("Free %x\n", (int)p));
482 if (p->flags & RX_PKTFLAG_FREE)
483 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
485 p->flags |= RX_PKTFLAG_FREE;
486 queue_Append(&rx_freePacketQueue, p);
490 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
492 struct iovec *iov, *end;
494 if (first != 1) /* MTUXXX */
495 osi_Panic("FreeDataBufs 1: first must be 1");
496 iov = &p->wirevec[1];
497 end = iov + (p->niovecs - 1);
498 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
499 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
500 for (iov++; iov < end; iov++) {
502 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
503 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
511 int rxi_nBadIovecs = 0;
513 /* rxi_RestoreDataBufs
515 * Restore the correct sizes to the iovecs. Called when reusing a packet
516 * for reading off the wire.
519 rxi_RestoreDataBufs(struct rx_packet *p)
522 struct iovec *iov = &p->wirevec[2];
524 p->wirevec[0].iov_base = (char *)(p->wirehead);
525 p->wirevec[0].iov_len = RX_HEADER_SIZE;
526 p->wirevec[1].iov_base = (char *)(p->localdata);
527 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
529 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
530 if (!iov->iov_base) {
535 iov->iov_len = RX_CBUFFERSIZE;
540 rxi_TrimDataBufs(struct rx_packet *p, int first)
543 struct iovec *iov, *end;
547 osi_Panic("TrimDataBufs 1: first must be 1");
549 /* Skip over continuation buffers containing message data */
550 iov = &p->wirevec[2];
551 end = iov + (p->niovecs - 2);
552 length = p->length - p->wirevec[1].iov_len;
553 for (; iov < end && length > 0; iov++) {
555 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
556 length -= iov->iov_len;
559 /* iov now points to the first empty data buffer. */
564 MUTEX_ENTER(&rx_freePktQ_lock);
566 for (; iov < end; iov++) {
568 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
569 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
574 MUTEX_EXIT(&rx_freePktQ_lock);
580 /* Free the packet p. P is assumed not to be on any queue, i.e.
581 * remove it yourself first if you call this routine. */
583 rxi_FreePacket(struct rx_packet *p)
588 MUTEX_ENTER(&rx_freePktQ_lock);
590 rxi_FreeDataBufsNoLock(p, 1);
591 rxi_FreePacketNoLock(p);
592 /* Wakeup anyone waiting for packets */
595 MUTEX_EXIT(&rx_freePktQ_lock);
600 /* rxi_AllocPacket sets up p->length so it reflects the number of
601 * bytes in the packet at this point, **not including** the header.
602 * The header is absolutely necessary, besides, this is the way the
603 * length field is usually used */
605 rxi_AllocPacketNoLock(int class)
607 register struct rx_packet *p;
610 if (rxi_OverQuota(class)) {
611 rxi_NeedMorePackets = TRUE;
612 MUTEX_ENTER(&rx_stats_mutex);
614 case RX_PACKET_CLASS_RECEIVE:
615 rx_stats.receivePktAllocFailures++;
617 case RX_PACKET_CLASS_SEND:
618 rx_stats.sendPktAllocFailures++;
620 case RX_PACKET_CLASS_SPECIAL:
621 rx_stats.specialPktAllocFailures++;
623 case RX_PACKET_CLASS_RECV_CBUF:
624 rx_stats.receiveCbufPktAllocFailures++;
626 case RX_PACKET_CLASS_SEND_CBUF:
627 rx_stats.sendCbufPktAllocFailures++;
630 MUTEX_EXIT(&rx_stats_mutex);
631 return (struct rx_packet *)0;
635 MUTEX_ENTER(&rx_stats_mutex);
636 rx_stats.packetRequests++;
637 MUTEX_EXIT(&rx_stats_mutex);
640 if (queue_IsEmpty(&rx_freePacketQueue))
641 osi_Panic("rxi_AllocPacket error");
643 if (queue_IsEmpty(&rx_freePacketQueue))
644 rxi_MorePacketsNoLock(rx_initSendWindow);
648 p = queue_First(&rx_freePacketQueue, rx_packet);
649 if (!(p->flags & RX_PKTFLAG_FREE))
650 osi_Panic("rxi_AllocPacket: packet not free\n");
652 dpf(("Alloc %x, class %d\n", (int)p, class));
655 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
658 /* have to do this here because rx_FlushWrite fiddles with the iovs in
659 * order to truncate outbound packets. In the near future, may need
660 * to allocate bufs from a static pool here, and/or in AllocSendPacket
662 p->wirevec[0].iov_base = (char *)(p->wirehead);
663 p->wirevec[0].iov_len = RX_HEADER_SIZE;
664 p->wirevec[1].iov_base = (char *)(p->localdata);
665 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
667 p->length = RX_FIRSTBUFFERSIZE;
672 rxi_AllocPacket(int class)
674 register struct rx_packet *p;
676 MUTEX_ENTER(&rx_freePktQ_lock);
677 p = rxi_AllocPacketNoLock(class);
678 MUTEX_EXIT(&rx_freePktQ_lock);
682 /* This guy comes up with as many buffers as it {takes,can get} given
683 * the MTU for this call. It also sets the packet length before
684 * returning. caution: this is often called at NETPRI
685 * Called with call locked.
688 rxi_AllocSendPacket(register struct rx_call *call, int want)
690 register struct rx_packet *p = (struct rx_packet *)0;
692 register unsigned delta;
695 mud = call->MTU - RX_HEADER_SIZE;
697 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
698 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
700 while (!(call->error)) {
701 MUTEX_ENTER(&rx_freePktQ_lock);
702 /* if an error occurred, or we get the packet we want, we're done */
703 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
704 MUTEX_EXIT(&rx_freePktQ_lock);
707 want = MIN(want, mud);
709 if ((unsigned)want > p->length)
710 (void)rxi_AllocDataBuf(p, (want - p->length),
711 RX_PACKET_CLASS_SEND_CBUF);
713 if ((unsigned)p->length > mud)
716 if (delta >= p->length) {
725 /* no error occurred, and we didn't get a packet, so we sleep.
726 * At this point, we assume that packets will be returned
727 * sooner or later, as packets are acknowledged, and so we
730 call->flags |= RX_CALL_WAIT_PACKETS;
731 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
732 MUTEX_EXIT(&call->lock);
733 rx_waitingForPackets = 1;
735 #ifdef RX_ENABLE_LOCKS
736 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
738 osi_rxSleep(&rx_waitingForPackets);
740 MUTEX_EXIT(&rx_freePktQ_lock);
741 MUTEX_ENTER(&call->lock);
742 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
743 call->flags &= ~RX_CALL_WAIT_PACKETS;
752 /* count the number of used FDs */
754 CountFDs(register int amax)
757 register int i, code;
761 for (i = 0; i < amax; i++) {
762 code = fstat(i, &tstat);
771 #define CountFDs(amax) amax
775 #if !defined(KERNEL) || defined(UKERNEL)
777 /* This function reads a single packet from the interface into the
778 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
779 * (host,port) of the sender are stored in the supplied variables, and
780 * the data length of the packet is stored in the packet structure.
781 * The header is decoded. */
783 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
786 struct sockaddr_in from;
789 register afs_int32 tlen, savelen;
791 rx_computelen(p, tlen);
792 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
794 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
795 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
796 * it once in order to avoid races. */
799 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
807 /* Extend the last iovec for padding, it's just to make sure that the
808 * read doesn't return more data than we expect, and is done to get around
809 * our problems caused by the lack of a length field in the rx header.
810 * Use the extra buffer that follows the localdata in each packet
812 savelen = p->wirevec[p->niovecs - 1].iov_len;
813 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
815 memset((char *)&msg, 0, sizeof(msg));
816 msg.msg_name = (char *)&from;
817 msg.msg_namelen = sizeof(struct sockaddr_in);
818 msg.msg_iov = p->wirevec;
819 msg.msg_iovlen = p->niovecs;
820 nbytes = rxi_Recvmsg(socket, &msg, 0);
822 /* restore the vec to its correct state */
823 p->wirevec[p->niovecs - 1].iov_len = savelen;
825 p->length = (nbytes - RX_HEADER_SIZE);
826 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
828 rxi_MorePackets(rx_initSendWindow);
830 else if (nbytes < 0 && errno == EWOULDBLOCK) {
831 MUTEX_ENTER(&rx_stats_mutex);
832 rx_stats.noPacketOnRead++;
833 MUTEX_EXIT(&rx_stats_mutex);
837 MUTEX_ENTER(&rx_stats_mutex);
838 rx_stats.bogusPacketOnRead++;
839 rx_stats.bogusHost = from.sin_addr.s_addr;
840 MUTEX_EXIT(&rx_stats_mutex);
841 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
842 from.sin_port, nbytes));
846 /* Extract packet header. */
847 rxi_DecodePacketHeader(p);
849 *host = from.sin_addr.s_addr;
850 *port = from.sin_port;
851 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
852 struct rx_peer *peer;
853 MUTEX_ENTER(&rx_stats_mutex);
854 rx_stats.packetsRead[p->header.type - 1]++;
855 MUTEX_EXIT(&rx_stats_mutex);
857 * Try to look up this peer structure. If it doesn't exist,
858 * don't create a new one -
859 * we don't keep count of the bytes sent/received if a peer
860 * structure doesn't already exist.
862 * The peer/connection cleanup code assumes that there is 1 peer
863 * per connection. If we actually created a peer structure here
864 * and this packet was an rxdebug packet, the peer structure would
865 * never be cleaned up.
867 peer = rxi_FindPeer(*host, *port, 0, 0);
869 MUTEX_ENTER(&peer->peer_lock);
870 hadd32(peer->bytesReceived, p->length);
871 MUTEX_EXIT(&peer->peer_lock);
875 /* Free any empty packet buffers at the end of this packet */
876 rxi_TrimDataBufs(p, 1);
882 #endif /* !KERNEL || UKERNEL */
884 /* This function splits off the first packet in a jumbo packet.
885 * As of AFS 3.5, jumbograms contain more than one fixed size
886 * packet, and the RX_JUMBO_PACKET flag is set in all but the
887 * last packet header. All packets (except the last) are padded to
888 * fall on RX_CBUFFERSIZE boundaries.
889 * HACK: We store the length of the first n-1 packets in the
890 * last two pad bytes. */
893 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
896 struct rx_packet *np;
897 struct rx_jumboHeader *jp;
903 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
904 * bytes in length. All but the first packet are preceded by
905 * an abbreviated four byte header. The length of the last packet
906 * is calculated from the size of the jumbogram. */
907 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
909 if ((int)p->length < length) {
910 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
913 niov = p->niovecs - 2;
915 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
918 iov = &p->wirevec[2];
919 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
921 /* Get a pointer to the abbreviated packet header */
922 jp = (struct rx_jumboHeader *)
923 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
925 /* Set up the iovecs for the next packet */
926 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
927 np->wirevec[0].iov_len = sizeof(struct rx_header);
928 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
929 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
930 np->niovecs = niov + 1;
931 for (i = 2, iov++; i <= niov; i++, iov++) {
932 np->wirevec[i] = *iov;
934 np->length = p->length - length;
935 p->length = RX_JUMBOBUFFERSIZE;
938 /* Convert the jumbo packet header to host byte order */
939 temp = ntohl(*(afs_uint32 *) jp);
940 jp->flags = (u_char) (temp >> 24);
941 jp->cksum = (u_short) (temp);
943 /* Fill in the packet header */
944 np->header = p->header;
945 np->header.serial = p->header.serial + 1;
946 np->header.seq = p->header.seq + 1;
947 np->header.flags = jp->flags;
948 np->header.spare = jp->cksum;
954 /* Send a udp datagram */
956 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
957 int length, int istack)
961 memset(&msg, 0, sizeof(msg));
963 msg.msg_iovlen = nvecs;
965 msg.msg_namelen = sizeof(struct sockaddr_in);
967 rxi_Sendmsg(socket, &msg, 0);
971 #elif !defined(UKERNEL)
973 * message receipt is done in rxk_input or rx_put.
976 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
978 * Copy an mblock to the contiguous area pointed to by cp.
979 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
980 * but it doesn't really.
981 * Returns the number of bytes not transferred.
982 * The message is NOT changed.
985 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
989 for (; mp && len > 0; mp = mp->b_cont) {
990 if (mp->b_datap->db_type != M_DATA) {
993 n = MIN(len, (mp->b_wptr - mp->b_rptr));
994 memcpy(cp, (char *)mp->b_rptr, n);
1002 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1003 * but it doesn't really.
1004 * This sucks, anyway, do it like m_cpy.... below
1007 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1010 register int m, n, o, t, i;
1012 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1013 if (mp->b_datap->db_type != M_DATA) {
1016 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1022 t = iovs[i].iov_len;
1025 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1035 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1036 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1038 #if !defined(AFS_LINUX20_ENV)
1040 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1043 unsigned int l1, l2, i, t;
1045 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1046 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1049 if (m->m_len <= off) {
1059 p1 = mtod(m, caddr_t) + off;
1060 l1 = m->m_len - off;
1062 p2 = iovs[0].iov_base;
1063 l2 = iovs[0].iov_len;
1066 t = MIN(l1, MIN(l2, (unsigned int)len));
1077 p1 = mtod(m, caddr_t);
1083 p2 = iovs[i].iov_base;
1084 l2 = iovs[i].iov_len;
1092 #endif /* AFS_SUN5_ENV */
1094 #if !defined(AFS_LINUX20_ENV)
1096 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1097 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1103 struct rx_packet *phandle;
1104 int hdr_len, data_len;
1109 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1116 #endif /*KERNEL && !UKERNEL */
1119 /* send a response to a debug packet */
1122 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1123 afs_int32 ahost, short aport, int istack)
1125 struct rx_debugIn tin;
1127 struct rx_serverQueueEntry *np, *nqe;
1130 * Only respond to client-initiated Rx debug packets,
1131 * and clear the client flag in the response.
1133 if (ap->header.flags & RX_CLIENT_INITIATED) {
1134 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1135 rxi_EncodePacketHeader(ap);
1140 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1141 /* all done with packet, now set length to the truth, so we can
1142 * reuse this packet */
1143 rx_computelen(ap, ap->length);
1145 tin.type = ntohl(tin.type);
1146 tin.index = ntohl(tin.index);
1148 case RX_DEBUGI_GETSTATS:{
1149 struct rx_debugStats tstat;
1151 /* get basic stats */
1152 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1153 tstat.version = RX_DEBUGI_VERSION;
1154 #ifndef RX_ENABLE_LOCKS
1155 tstat.waitingForPackets = rx_waitingForPackets;
1157 MUTEX_ENTER(&rx_serverPool_lock);
1158 tstat.nFreePackets = htonl(rx_nFreePackets);
1159 tstat.callsExecuted = htonl(rxi_nCalls);
1160 tstat.packetReclaims = htonl(rx_packetReclaims);
1161 tstat.usedFDs = CountFDs(64);
1162 tstat.nWaiting = htonl(rx_nWaiting);
1163 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1165 MUTEX_EXIT(&rx_serverPool_lock);
1166 tstat.idleThreads = htonl(tstat.idleThreads);
1167 tl = sizeof(struct rx_debugStats) - ap->length;
1169 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1172 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1174 ap->length = sizeof(struct rx_debugStats);
1175 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1176 rx_computelen(ap, ap->length);
1181 case RX_DEBUGI_GETALLCONN:
1182 case RX_DEBUGI_GETCONN:{
1184 register struct rx_connection *tc;
1185 struct rx_call *tcall;
1186 struct rx_debugConn tconn;
1187 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1190 tl = sizeof(struct rx_debugConn) - ap->length;
1192 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1196 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1197 /* get N'th (maybe) "interesting" connection info */
1198 for (i = 0; i < rx_hashTableSize; i++) {
1199 #if !defined(KERNEL)
1200 /* the time complexity of the algorithm used here
1201 * exponentially increses with the number of connections.
1203 #ifdef AFS_PTHREAD_ENV
1209 MUTEX_ENTER(&rx_connHashTable_lock);
1210 /* We might be slightly out of step since we are not
1211 * locking each call, but this is only debugging output.
1213 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1214 if ((all || rxi_IsConnInteresting(tc))
1215 && tin.index-- <= 0) {
1216 tconn.host = tc->peer->host;
1217 tconn.port = tc->peer->port;
1218 tconn.cid = htonl(tc->cid);
1219 tconn.epoch = htonl(tc->epoch);
1220 tconn.serial = htonl(tc->serial);
1221 for (j = 0; j < RX_MAXCALLS; j++) {
1222 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1223 if ((tcall = tc->call[j])) {
1224 tconn.callState[j] = tcall->state;
1225 tconn.callMode[j] = tcall->mode;
1226 tconn.callFlags[j] = tcall->flags;
1227 if (queue_IsNotEmpty(&tcall->rq))
1228 tconn.callOther[j] |= RX_OTHER_IN;
1229 if (queue_IsNotEmpty(&tcall->tq))
1230 tconn.callOther[j] |= RX_OTHER_OUT;
1232 tconn.callState[j] = RX_STATE_NOTINIT;
1235 tconn.natMTU = htonl(tc->peer->natMTU);
1236 tconn.error = htonl(tc->error);
1237 tconn.flags = tc->flags;
1238 tconn.type = tc->type;
1239 tconn.securityIndex = tc->securityIndex;
1240 if (tc->securityObject) {
1241 RXS_GetStats(tc->securityObject, tc,
1243 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1244 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1247 DOHTONL(packetsReceived);
1248 DOHTONL(packetsSent);
1249 DOHTONL(bytesReceived);
1253 sizeof(tconn.secStats.spares) /
1258 sizeof(tconn.secStats.sparel) /
1259 sizeof(afs_int32); i++)
1263 MUTEX_EXIT(&rx_connHashTable_lock);
1264 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1267 ap->length = sizeof(struct rx_debugConn);
1268 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1274 MUTEX_EXIT(&rx_connHashTable_lock);
1276 /* if we make it here, there are no interesting packets */
1277 tconn.cid = htonl(0xffffffff); /* means end */
1278 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1281 ap->length = sizeof(struct rx_debugConn);
1282 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1288 * Pass back all the peer structures we have available
1291 case RX_DEBUGI_GETPEER:{
1293 register struct rx_peer *tp;
1294 struct rx_debugPeer tpeer;
1297 tl = sizeof(struct rx_debugPeer) - ap->length;
1299 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1303 memset((char *)&tpeer, 0, sizeof(tpeer));
1304 for (i = 0; i < rx_hashTableSize; i++) {
1305 #if !defined(KERNEL)
1306 /* the time complexity of the algorithm used here
1307 * exponentially increses with the number of peers.
1309 * Yielding after processing each hash table entry
1310 * and dropping rx_peerHashTable_lock.
1311 * also increases the risk that we will miss a new
1312 * entry - but we are willing to live with this
1313 * limitation since this is meant for debugging only
1315 #ifdef AFS_PTHREAD_ENV
1321 MUTEX_ENTER(&rx_peerHashTable_lock);
1322 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1323 if (tin.index-- <= 0) {
1324 tpeer.host = tp->host;
1325 tpeer.port = tp->port;
1326 tpeer.ifMTU = htons(tp->ifMTU);
1327 tpeer.idleWhen = htonl(tp->idleWhen);
1328 tpeer.refCount = htons(tp->refCount);
1329 tpeer.burstSize = tp->burstSize;
1330 tpeer.burst = tp->burst;
1331 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1332 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1333 tpeer.rtt = htonl(tp->rtt);
1334 tpeer.rtt_dev = htonl(tp->rtt_dev);
1335 tpeer.timeout.sec = htonl(tp->timeout.sec);
1336 tpeer.timeout.usec = htonl(tp->timeout.usec);
1337 tpeer.nSent = htonl(tp->nSent);
1338 tpeer.reSends = htonl(tp->reSends);
1339 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1340 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1341 tpeer.rateFlag = htonl(tp->rateFlag);
1342 tpeer.natMTU = htons(tp->natMTU);
1343 tpeer.maxMTU = htons(tp->maxMTU);
1344 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1345 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1346 tpeer.MTU = htons(tp->MTU);
1347 tpeer.cwind = htons(tp->cwind);
1348 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1349 tpeer.congestSeq = htons(tp->congestSeq);
1350 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1351 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1352 tpeer.bytesReceived.high =
1353 htonl(tp->bytesReceived.high);
1354 tpeer.bytesReceived.low =
1355 htonl(tp->bytesReceived.low);
1357 MUTEX_EXIT(&rx_peerHashTable_lock);
1358 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1361 ap->length = sizeof(struct rx_debugPeer);
1362 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1368 MUTEX_EXIT(&rx_peerHashTable_lock);
1370 /* if we make it here, there are no interesting packets */
1371 tpeer.host = htonl(0xffffffff); /* means end */
1372 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1375 ap->length = sizeof(struct rx_debugPeer);
1376 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1381 case RX_DEBUGI_RXSTATS:{
1385 tl = sizeof(rx_stats) - ap->length;
1387 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1391 /* Since its all int32s convert to network order with a loop. */
1392 MUTEX_ENTER(&rx_stats_mutex);
1393 s = (afs_int32 *) & rx_stats;
1394 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1395 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1398 ap->length = sizeof(rx_stats);
1399 MUTEX_EXIT(&rx_stats_mutex);
1400 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1406 /* error response packet */
1407 tin.type = htonl(RX_DEBUGI_BADTYPE);
1408 tin.index = tin.type;
1409 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1411 ap->length = sizeof(struct rx_debugIn);
1412 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1420 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1421 afs_int32 ahost, short aport, int istack)
1426 * Only respond to client-initiated version requests, and
1427 * clear that flag in the response.
1429 if (ap->header.flags & RX_CLIENT_INITIATED) {
1432 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1433 rxi_EncodePacketHeader(ap);
1434 memset(buf, 0, sizeof(buf));
1435 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1436 rx_packetwrite(ap, 0, 65, buf);
1439 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1447 /* send a debug packet back to the sender */
1449 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1450 afs_int32 ahost, short aport, afs_int32 istack)
1452 struct sockaddr_in taddr;
1458 int waslocked = ISAFS_GLOCK();
1461 taddr.sin_family = AF_INET;
1462 taddr.sin_port = aport;
1463 taddr.sin_addr.s_addr = ahost;
1464 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1465 taddr.sin_len = sizeof(struct sockaddr_in);
1468 /* We need to trim the niovecs. */
1469 nbytes = apacket->length;
1470 for (i = 1; i < apacket->niovecs; i++) {
1471 if (nbytes <= apacket->wirevec[i].iov_len) {
1472 savelen = apacket->wirevec[i].iov_len;
1473 saven = apacket->niovecs;
1474 apacket->wirevec[i].iov_len = nbytes;
1475 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
1477 nbytes -= apacket->wirevec[i].iov_len;
1481 #ifdef RX_KERNEL_TRACE
1482 if (ICL_SETACTIVE(afs_iclSetp)) {
1485 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1486 "before osi_NetSend()");
1494 /* debug packets are not reliably delivered, hence the cast below. */
1495 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1496 apacket->length + RX_HEADER_SIZE, istack);
1498 #ifdef RX_KERNEL_TRACE
1499 if (ICL_SETACTIVE(afs_iclSetp)) {
1501 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1502 "after osi_NetSend()");
1512 if (saven) { /* means we truncated the packet above. */
1513 apacket->wirevec[i - 1].iov_len = savelen;
1514 apacket->niovecs = saven;
1519 /* Send the packet to appropriate destination for the specified
1520 * call. The header is first encoded and placed in the packet.
1523 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1524 struct rx_packet *p, int istack)
1530 struct sockaddr_in addr;
1531 register struct rx_peer *peer = conn->peer;
1534 char deliveryType = 'S';
1536 /* The address we're sending the packet to */
1537 memset(&addr, 0, sizeof(addr));
1538 addr.sin_family = AF_INET;
1539 addr.sin_port = peer->port;
1540 addr.sin_addr.s_addr = peer->host;
1542 /* This stuff should be revamped, I think, so that most, if not
1543 * all, of the header stuff is always added here. We could
1544 * probably do away with the encode/decode routines. XXXXX */
1546 /* Stamp each packet with a unique serial number. The serial
1547 * number is maintained on a connection basis because some types
1548 * of security may be based on the serial number of the packet,
1549 * and security is handled on a per authenticated-connection
1551 /* Pre-increment, to guarantee no zero serial number; a zero
1552 * serial number means the packet was never sent. */
1553 MUTEX_ENTER(&conn->conn_data_lock);
1554 p->header.serial = ++conn->serial;
1555 MUTEX_EXIT(&conn->conn_data_lock);
1556 /* This is so we can adjust retransmit time-outs better in the face of
1557 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1559 if (p->firstSerial == 0) {
1560 p->firstSerial = p->header.serial;
1563 /* If an output tracer function is defined, call it with the packet and
1564 * network address. Note this function may modify its arguments. */
1565 if (rx_almostSent) {
1566 int drop = (*rx_almostSent) (p, &addr);
1567 /* drop packet if return value is non-zero? */
1569 deliveryType = 'D'; /* Drop the packet */
1573 /* Get network byte order header */
1574 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1575 * touch ALL the fields */
1577 /* Send the packet out on the same socket that related packets are being
1581 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1584 /* Possibly drop this packet, for testing purposes */
1585 if ((deliveryType == 'D')
1586 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1587 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1588 deliveryType = 'D'; /* Drop the packet */
1590 deliveryType = 'S'; /* Send the packet */
1591 #endif /* RXDEBUG */
1593 /* Loop until the packet is sent. We'd prefer just to use a
1594 * blocking socket, but unfortunately the interface doesn't
1595 * allow us to have the socket block in send mode, and not
1596 * block in receive mode */
1599 waslocked = ISAFS_GLOCK();
1600 #ifdef RX_KERNEL_TRACE
1601 if (ICL_SETACTIVE(afs_iclSetp)) {
1604 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1605 "before osi_NetSend()");
1614 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1615 p->length + RX_HEADER_SIZE, istack)) != 0) {
1616 /* send failed, so let's hurry up the resend, eh? */
1617 MUTEX_ENTER(&rx_stats_mutex);
1618 rx_stats.netSendFailures++;
1619 MUTEX_EXIT(&rx_stats_mutex);
1620 p->retryTime = p->timeSent; /* resend it very soon */
1621 clock_Addmsec(&(p->retryTime),
1622 10 + (((afs_uint32) p->backoff) << 8));
1624 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1625 /* Linux is nice -- it can tell us right away that we cannot
1626 * reach this recipient by returning an ENETUNREACH error
1627 * code. So, when this happens let's "down" the host NOW so
1628 * we don't sit around waiting for this host to timeout later.
1630 if (call && code == -ENETUNREACH)
1631 call->lastReceiveTime = 0;
1635 #ifdef RX_KERNEL_TRACE
1636 if (ICL_SETACTIVE(afs_iclSetp)) {
1638 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1639 "after osi_NetSend()");
1651 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1653 MUTEX_ENTER(&rx_stats_mutex);
1654 rx_stats.packetsSent[p->header.type - 1]++;
1655 MUTEX_EXIT(&rx_stats_mutex);
1656 MUTEX_ENTER(&peer->peer_lock);
1657 hadd32(peer->bytesSent, p->length);
1658 MUTEX_EXIT(&peer->peer_lock);
1661 /* Send a list of packets to appropriate destination for the specified
1662 * connection. The headers are first encoded and placed in the packets.
1665 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1666 struct rx_packet **list, int len, int istack)
1668 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1671 struct sockaddr_in addr;
1672 register struct rx_peer *peer = conn->peer;
1674 struct rx_packet *p = NULL;
1675 struct iovec wirevec[RX_MAXIOVECS];
1676 int i, length, code;
1679 struct rx_jumboHeader *jp;
1681 char deliveryType = 'S';
1683 /* The address we're sending the packet to */
1684 addr.sin_family = AF_INET;
1685 addr.sin_port = peer->port;
1686 addr.sin_addr.s_addr = peer->host;
1688 if (len + 1 > RX_MAXIOVECS) {
1689 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1693 * Stamp the packets in this jumbogram with consecutive serial numbers
1695 MUTEX_ENTER(&conn->conn_data_lock);
1696 serial = conn->serial;
1697 conn->serial += len;
1698 MUTEX_EXIT(&conn->conn_data_lock);
1701 /* This stuff should be revamped, I think, so that most, if not
1702 * all, of the header stuff is always added here. We could
1703 * probably do away with the encode/decode routines. XXXXX */
1706 length = RX_HEADER_SIZE;
1707 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1708 wirevec[0].iov_len = RX_HEADER_SIZE;
1709 for (i = 0; i < len; i++) {
1712 /* The whole 3.5 jumbogram scheme relies on packets fitting
1713 * in a single packet buffer. */
1714 if (p->niovecs > 2) {
1715 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1718 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1721 if (p->length != RX_JUMBOBUFFERSIZE) {
1722 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1724 p->header.flags |= RX_JUMBO_PACKET;
1725 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1726 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1728 wirevec[i + 1].iov_len = p->length;
1729 length += p->length;
1731 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1733 /* Convert jumbo packet header to network byte order */
1734 temp = (afs_uint32) (p->header.flags) << 24;
1735 temp |= (afs_uint32) (p->header.spare);
1736 *(afs_uint32 *) jp = htonl(temp);
1738 jp = (struct rx_jumboHeader *)
1739 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1741 /* Stamp each packet with a unique serial number. The serial
1742 * number is maintained on a connection basis because some types
1743 * of security may be based on the serial number of the packet,
1744 * and security is handled on a per authenticated-connection
1746 /* Pre-increment, to guarantee no zero serial number; a zero
1747 * serial number means the packet was never sent. */
1748 p->header.serial = ++serial;
1749 /* This is so we can adjust retransmit time-outs better in the face of
1750 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1752 if (p->firstSerial == 0) {
1753 p->firstSerial = p->header.serial;
1756 /* If an output tracer function is defined, call it with the packet and
1757 * network address. Note this function may modify its arguments. */
1758 if (rx_almostSent) {
1759 int drop = (*rx_almostSent) (p, &addr);
1760 /* drop packet if return value is non-zero? */
1762 deliveryType = 'D'; /* Drop the packet */
1766 /* Get network byte order header */
1767 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1768 * touch ALL the fields */
1771 /* Send the packet out on the same socket that related packets are being
1775 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1778 /* Possibly drop this packet, for testing purposes */
1779 if ((deliveryType == 'D')
1780 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1781 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1782 deliveryType = 'D'; /* Drop the packet */
1784 deliveryType = 'S'; /* Send the packet */
1785 #endif /* RXDEBUG */
1787 /* Loop until the packet is sent. We'd prefer just to use a
1788 * blocking socket, but unfortunately the interface doesn't
1789 * allow us to have the socket block in send mode, and not
1790 * block in receive mode */
1792 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1793 waslocked = ISAFS_GLOCK();
1794 if (!istack && waslocked)
1798 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1800 /* send failed, so let's hurry up the resend, eh? */
1801 MUTEX_ENTER(&rx_stats_mutex);
1802 rx_stats.netSendFailures++;
1803 MUTEX_EXIT(&rx_stats_mutex);
1804 for (i = 0; i < len; i++) {
1806 p->retryTime = p->timeSent; /* resend it very soon */
1807 clock_Addmsec(&(p->retryTime),
1808 10 + (((afs_uint32) p->backoff) << 8));
1810 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1811 /* Linux is nice -- it can tell us right away that we cannot
1812 * reach this recipient by returning an ENETUNREACH error
1813 * code. So, when this happens let's "down" the host NOW so
1814 * we don't sit around waiting for this host to timeout later.
1816 if (call && code == -ENETUNREACH)
1817 call->lastReceiveTime = 0;
1820 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1821 if (!istack && waslocked)
1830 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1831 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1],
1832 peer->host, peer->port, p->header.serial, p->header.epoch,
1833 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1834 (int)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1837 MUTEX_ENTER(&rx_stats_mutex);
1838 rx_stats.packetsSent[p->header.type - 1]++;
1839 MUTEX_EXIT(&rx_stats_mutex);
1840 MUTEX_ENTER(&peer->peer_lock);
1842 hadd32(peer->bytesSent, p->length);
1843 MUTEX_EXIT(&peer->peer_lock);
1847 /* Send a "special" packet to the peer connection. If call is
1848 * specified, then the packet is directed to a specific call channel
1849 * associated with the connection, otherwise it is directed to the
1850 * connection only. Uses optionalPacket if it is supplied, rather than
1851 * allocating a new packet buffer. Nbytes is the length of the data
1852 * portion of the packet. If data is non-null, nbytes of data are
1853 * copied into the packet. Type is the type of the packet, as defined
1854 * in rx.h. Bug: there's a lot of duplication between this and other
1855 * routines. This needs to be cleaned up. */
1857 rxi_SendSpecial(register struct rx_call *call,
1858 register struct rx_connection *conn,
1859 struct rx_packet *optionalPacket, int type, char *data,
1860 int nbytes, int istack)
1862 /* Some of the following stuff should be common code for all
1863 * packet sends (it's repeated elsewhere) */
1864 register struct rx_packet *p;
1866 int savelen = 0, saven = 0;
1867 int channel, callNumber;
1869 channel = call->channel;
1870 callNumber = *call->callNumber;
1871 /* BUSY packets refer to the next call on this connection */
1872 if (type == RX_PACKET_TYPE_BUSY) {
1881 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1883 osi_Panic("rxi_SendSpecial failure");
1890 p->header.serviceId = conn->serviceId;
1891 p->header.securityIndex = conn->securityIndex;
1892 p->header.cid = (conn->cid | channel);
1893 p->header.callNumber = callNumber;
1895 p->header.epoch = conn->epoch;
1896 p->header.type = type;
1897 p->header.flags = 0;
1898 if (conn->type == RX_CLIENT_CONNECTION)
1899 p->header.flags |= RX_CLIENT_INITIATED;
1901 rx_packetwrite(p, 0, nbytes, data);
1903 for (i = 1; i < p->niovecs; i++) {
1904 if (nbytes <= p->wirevec[i].iov_len) {
1905 savelen = p->wirevec[i].iov_len;
1907 p->wirevec[i].iov_len = nbytes;
1908 p->niovecs = i + 1; /* so condition fails because i == niovecs */
1910 nbytes -= p->wirevec[i].iov_len;
1914 rxi_Send(call, p, istack);
1916 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1917 if (saven) { /* means we truncated the packet above. We probably don't */
1918 /* really need to do this, but it seems safer this way, given that */
1919 /* sneaky optionalPacket... */
1920 p->wirevec[i - 1].iov_len = savelen;
1923 if (!optionalPacket)
1925 return optionalPacket;
1929 /* Encode the packet's header (from the struct header in the packet to
1930 * the net byte order representation in the wire representation of the
1931 * packet, which is what is actually sent out on the wire) */
1933 rxi_EncodePacketHeader(register struct rx_packet *p)
1935 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1937 memset((char *)buf, 0, RX_HEADER_SIZE);
1938 *buf++ = htonl(p->header.epoch);
1939 *buf++ = htonl(p->header.cid);
1940 *buf++ = htonl(p->header.callNumber);
1941 *buf++ = htonl(p->header.seq);
1942 *buf++ = htonl(p->header.serial);
1943 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1944 | (((afs_uint32) p->header.flags) << 16)
1945 | (p->header.userStatus << 8) | p->header.securityIndex);
1946 /* Note: top 16 bits of this next word were reserved */
1947 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1950 /* Decode the packet's header (from net byte order to a struct header) */
1952 rxi_DecodePacketHeader(register struct rx_packet *p)
1954 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1957 p->header.epoch = ntohl(*buf);
1959 p->header.cid = ntohl(*buf);
1961 p->header.callNumber = ntohl(*buf);
1963 p->header.seq = ntohl(*buf);
1965 p->header.serial = ntohl(*buf);
1971 /* C will truncate byte fields to bytes for me */
1972 p->header.type = temp >> 24;
1973 p->header.flags = temp >> 16;
1974 p->header.userStatus = temp >> 8;
1975 p->header.securityIndex = temp >> 0;
1980 p->header.serviceId = (temp & 0xffff);
1981 p->header.spare = temp >> 16;
1982 /* Note: top 16 bits of this last word are the security checksum */
1986 rxi_PrepareSendPacket(register struct rx_call *call,
1987 register struct rx_packet *p, register int last)
1989 register struct rx_connection *conn = call->conn;
1991 ssize_t len; /* len must be a signed type; it can go negative */
1993 p->flags &= ~RX_PKTFLAG_ACKED;
1994 p->header.cid = (conn->cid | call->channel);
1995 p->header.serviceId = conn->serviceId;
1996 p->header.securityIndex = conn->securityIndex;
1997 p->header.callNumber = *call->callNumber;
1998 p->header.seq = call->tnext++;
1999 p->header.epoch = conn->epoch;
2000 p->header.type = RX_PACKET_TYPE_DATA;
2001 p->header.flags = 0;
2002 p->header.spare = 0;
2003 if (conn->type == RX_CLIENT_CONNECTION)
2004 p->header.flags |= RX_CLIENT_INITIATED;
2007 p->header.flags |= RX_LAST_PACKET;
2009 clock_Zero(&p->retryTime); /* Never yet transmitted */
2010 clock_Zero(&p->firstSent); /* Never yet transmitted */
2011 p->header.serial = 0; /* Another way of saying never transmitted... */
2014 /* Now that we're sure this is the last data on the call, make sure
2015 * that the "length" and the sum of the iov_lens matches. */
2016 len = p->length + call->conn->securityHeaderSize;
2018 for (i = 1; i < p->niovecs && len > 0; i++) {
2019 len -= p->wirevec[i].iov_len;
2022 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2024 /* Free any extra elements in the wirevec */
2025 for (j = MAX(2, i); j < p->niovecs; j++) {
2026 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2029 p->wirevec[i - 1].iov_len += len;
2031 RXS_PreparePacket(conn->securityObject, call, p);
2034 /* Given an interface MTU size, calculate an adjusted MTU size that
2035 * will make efficient use of the RX buffers when the peer is sending
2036 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2038 rxi_AdjustIfMTU(int mtu)
2043 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2044 if (mtu <= adjMTU) {
2051 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2052 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2055 /* Given an interface MTU size, and the peer's advertised max receive
2056 * size, calculate an adjisted maxMTU size that makes efficient use
2057 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2059 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2061 int maxMTU = mtu * rxi_nSendFrags;
2062 maxMTU = MIN(maxMTU, peerMaxMTU);
2063 return rxi_AdjustIfMTU(maxMTU);
2066 /* Given a packet size, figure out how many datagram packet will fit.
2067 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2068 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2069 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2071 rxi_AdjustDgramPackets(int frags, int mtu)
2074 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2077 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2078 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2079 /* subtract the size of the first and last packets */
2080 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2084 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));