2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 #include "../afs/param.h"
12 #include <afsconfig.h>
14 #include "../afs/sysincludes.h"
15 #include "../afs/afsincludes.h"
16 #include "../rx/rx_kcommon.h"
17 #include "../rx/rx_clock.h"
18 #include "../rx/rx_queue.h"
19 #include "../rx/rx_packet.h"
20 #else /* defined(UKERNEL) */
21 #include "../h/types.h"
22 #ifndef AFS_LINUX20_ENV
23 #include "../h/systm.h"
25 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
26 #include "../afs/sysincludes.h"
28 #include "../h/socket.h"
29 #include "../netinet/in.h"
30 #include "../afs/afs_osi.h"
31 #include "../rx/rx_kmutex.h"
32 #include "../rx/rx_clock.h"
33 #include "../rx/rx_queue.h"
35 #include <sys/sysmacros.h>
37 #include "../rx/rx_packet.h"
38 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
39 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
40 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
42 #include "../h/mbuf.h"
44 #endif /* defined(UKERNEL) */
45 #include "../rx/rx_globals.h"
47 #include <afs/param.h>
48 #include <afsconfig.h>
49 #include "sys/types.h"
52 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #endif /* AFS_NT40_ENV */
59 #include "rx_xmit_nt.h"
62 #include <sys/socket.h>
63 #include <netinet/in.h>
69 #include <sys/sysmacros.h>
71 #include "rx_packet.h"
72 #include "rx_globals.h"
74 #include "rx_internal.h"
88 /* rxdb_fileID is used to identify the lock location, along with line#. */
89 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
90 #endif /* RX_LOCKS_DB */
91 struct rx_packet *rx_mallocedP = 0;
93 extern char cml_version_number[];
94 extern int (*rx_almostSent)();
96 void rxi_FreePacketNoLock(struct rx_packet *p);
97 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
98 afs_int32 ahost, short aport, afs_int32 istack);
100 extern char cml_version_number[];
101 extern int (*rx_almostSent)();
102 /* some rules about packets:
103 * 1. When a packet is allocated, the final iov_buf contains room for
104 * a security trailer, but iov_len masks that fact. If the security
105 * package wants to add the trailer, it may do so, and then extend
106 * iov_len appropriately. For this reason, packet's niovecs and
107 * iov_len fields should be accurate before calling PreparePacket.
111 * all packet buffers (iov_base) are integral multiples of
113 * offset is an integral multiple of the word size.
115 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
119 for (l=0, i=1; i< packet->niovecs ; i++ ) {
120 if (l + packet->wirevec[i].iov_len > offset) {
121 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
123 l += packet->wirevec[i].iov_len;
130 * all packet buffers (iov_base) are integral multiples of the word size.
131 * offset is an integral multiple of the word size.
133 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
137 for (l=0, i=1; i< packet->niovecs ; i++ ) {
138 if (l + packet->wirevec[i].iov_len > offset) {
139 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
143 l += packet->wirevec[i].iov_len;
150 * all packet buffers (iov_base) are integral multiples of the
152 * offset is an integral multiple of the word size.
154 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
156 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
157 int resid, char *out)
159 unsigned int i, j, l, r;
160 for (l=0, i=1; i< packet->niovecs ; i++ ) {
161 if (l + packet->wirevec[i].iov_len > offset) {
164 l += packet->wirevec[i].iov_len;
167 /* i is the iovec which contains the first little bit of data in which we
168 * are interested. l is the total length of everything prior to this iovec.
169 * j is the number of bytes we can safely copy out of this iovec.
172 while ((resid > 0) && (i < packet->niovecs)) {
173 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
174 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
176 l += packet->wirevec[i].iov_len;
180 return (resid ? (r - resid) : r);
185 * all packet buffers (iov_base) are integral multiples of the
187 * offset is an integral multiple of the word size.
189 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
195 for (l=0, i=1; i < packet->niovecs; i++ ) {
196 if (l + packet->wirevec[i].iov_len > offset) {
199 l += packet->wirevec[i].iov_len;
202 /* i is the iovec which contains the first little bit of data in which we
203 * are interested. l is the total length of everything prior to this iovec.
204 * j is the number of bytes we can safely copy out of this iovec.
207 while ((resid > 0) && (i < RX_MAXWVECS)) {
208 if (i >= packet->niovecs)
209 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
212 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
213 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
216 l += packet->wirevec[i].iov_len;
220 return (resid ? (r - resid) : r);
223 static struct rx_packet * allocCBuf(int class)
227 extern void rxi_MorePacketsNoLock();
232 MUTEX_ENTER(&rx_freePktQ_lock);
235 if (rxi_OverQuota(class)) {
237 rxi_NeedMorePackets = TRUE;
238 MUTEX_ENTER(&rx_stats_mutex);
240 case RX_PACKET_CLASS_RECEIVE:
241 rx_stats.receivePktAllocFailures++;
243 case RX_PACKET_CLASS_SEND:
244 rx_stats.sendPktAllocFailures++;
246 case RX_PACKET_CLASS_SPECIAL:
247 rx_stats.specialPktAllocFailures++;
249 case RX_PACKET_CLASS_RECV_CBUF:
250 rx_stats.receiveCbufPktAllocFailures++;
252 case RX_PACKET_CLASS_SEND_CBUF:
253 rx_stats.sendCbufPktAllocFailures++;
256 MUTEX_EXIT(&rx_stats_mutex);
260 if (queue_IsEmpty(&rx_freePacketQueue)) {
262 rxi_NeedMorePackets = TRUE;
266 if (queue_IsEmpty(&rx_freePacketQueue)) {
267 rxi_MorePacketsNoLock(rx_initSendWindow);
272 c = queue_First(&rx_freePacketQueue, rx_packet);
274 if (c->header.flags != RX_FREE_PACKET)
275 osi_Panic("rxi_AllocPacket: packet not free\n");
281 MUTEX_EXIT(&rx_freePktQ_lock);
288 * Free a packet currently used as a continuation buffer
290 void rxi_freeCBuf(struct rx_packet *c)
292 extern void rxi_PacketsUnWait();
296 MUTEX_ENTER(&rx_freePktQ_lock);
298 rxi_FreePacketNoLock(c);
299 /* Wakeup anyone waiting for packets */
302 MUTEX_EXIT(&rx_freePktQ_lock);
306 /* this one is kind of awful.
307 * In rxkad, the packet has been all shortened, and everything, ready for
308 * sending. All of a sudden, we discover we need some of that space back.
309 * This isn't terribly general, because it knows that the packets are only
310 * rounded up to the EBS (userdata + security header).
312 int rxi_RoundUpPacket(p, nb)
313 struct rx_packet * p;
318 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
319 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
320 p->wirevec[i].iov_len += nb;
325 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
326 p->wirevec[i].iov_len += nb;
333 /* get sufficient space to store nb bytes of data (or more), and hook
334 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
335 * returns the number of bytes >0 which it failed to come up with.
336 * Don't need to worry about locking on packet, since only
337 * one thread can manipulate one at a time. Locking on continution
338 * packets is handled by allocCBuf */
339 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
340 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
344 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
345 register struct rx_packet *cb;
346 if ((cb = allocCBuf(class))) {
347 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
348 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
349 nb -= RX_CBUFFERSIZE;
350 p->length += RX_CBUFFERSIZE;
359 /* Add more packet buffers */
360 void rxi_MorePackets(int apackets)
362 extern void rxi_PacketsUnWait();
363 struct rx_packet *p, *e;
367 getme = apackets * sizeof(struct rx_packet);
368 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
370 PIN(p, getme); /* XXXXX */
371 bzero((char *)p, getme);
374 MUTEX_ENTER(&rx_freePktQ_lock);
376 for (e = p + apackets; p<e; p++) {
377 p->wirevec[0].iov_base = (char *) (p->wirehead);
378 p->wirevec[0].iov_len = RX_HEADER_SIZE;
379 p->wirevec[1].iov_base = (char *) (p->localdata);
380 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
381 p->header.flags = RX_FREE_PACKET;
384 queue_Append(&rx_freePacketQueue, p);
386 rx_nFreePackets += apackets;
387 rxi_NeedMorePackets = FALSE;
391 MUTEX_EXIT(&rx_freePktQ_lock);
396 /* Add more packet buffers */
397 void rxi_MorePacketsNoLock(int apackets)
399 extern void rxi_PacketsUnWait();
400 struct rx_packet *p, *e;
403 /* allocate enough packets that 1/4 of the packets will be able
404 * to hold maximal amounts of data */
405 apackets += (apackets/4)
406 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
407 getme = apackets * sizeof(struct rx_packet);
408 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
410 bzero((char *)p, getme);
412 for (e = p + apackets; p<e; p++) {
413 p->wirevec[0].iov_base = (char *) (p->wirehead);
414 p->wirevec[0].iov_len = RX_HEADER_SIZE;
415 p->wirevec[1].iov_base = (char *) (p->localdata);
416 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
417 p->header.flags = RX_FREE_PACKET;
420 queue_Append(&rx_freePacketQueue, p);
422 rx_nFreePackets += apackets;
423 rxi_NeedMorePackets = FALSE;
428 void rxi_FreeAllPackets(void)
430 /* must be called at proper interrupt level, etcetera */
431 /* MTUXXX need to free all Packets */
432 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
433 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
436 /* Allocate more packets iff we need more continuation buffers */
437 /* In kernel, can't page in memory with interrupts disabled, so we
438 * don't use the event mechanism. */
439 void rx_CheckPackets()
441 if (rxi_NeedMorePackets) {
442 rxi_MorePackets(rx_initSendWindow);
446 /* In the packet freeing routine below, the assumption is that
447 we want all of the packets to be used equally frequently, so that we
448 don't get packet buffers paging out. It would be just as valid to
449 assume that we DO want them to page out if not many are being used.
450 In any event, we assume the former, and append the packets to the end
452 /* This explanation is bogus. The free list doesn't remain in any kind of
453 useful order for afs_int32: the packets in use get pretty much randomly scattered
454 across all the pages. In order to permit unused {packets,bufs} to page out, they
455 must be stored so that packets which are adjacent in memory are adjacent in the
456 free list. An array springs rapidly to mind.
459 /* Actually free the packet p. */
460 void rxi_FreePacketNoLock(struct rx_packet *p)
462 dpf(("Free %x\n", p));
464 if (p->header.flags & RX_FREE_PACKET)
465 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
467 p->header.flags = RX_FREE_PACKET;
468 queue_Append(&rx_freePacketQueue, p);
471 int rxi_FreeDataBufsNoLock(p, first)
472 struct rx_packet * p;
475 struct iovec *iov, *end;
477 if (first != 1) /* MTUXXX */
478 osi_Panic("FreeDataBufs 1: first must be 1");
479 iov = &p->wirevec[1];
480 end = iov + (p->niovecs-1);
481 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
482 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
483 for (iov++ ; iov < end ; iov++) {
485 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
486 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
494 int rxi_nBadIovecs = 0;
496 /* rxi_RestoreDataBufs
498 * Restore the correct sizes to the iovecs. Called when reusing a packet
499 * for reading off the wire.
501 void rxi_RestoreDataBufs(struct rx_packet *p)
504 struct iovec *iov = &p->wirevec[2];
506 p->wirevec[0].iov_base = (char *) (p->wirehead);
507 p->wirevec[0].iov_len = RX_HEADER_SIZE;
508 p->wirevec[1].iov_base = (char *) (p->localdata);
509 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
511 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
512 if (!iov->iov_base) {
517 iov->iov_len = RX_CBUFFERSIZE;
521 int rxi_TrimDataBufs(p, first)
522 struct rx_packet * p;
525 extern void rxi_PacketsUnWait();
527 struct iovec *iov, *end;
531 osi_Panic("TrimDataBufs 1: first must be 1");
533 /* Skip over continuation buffers containing message data */
534 iov = &p->wirevec[2];
535 end = iov + (p->niovecs-2);
536 length = p->length - p->wirevec[1].iov_len;
537 for (; iov < end && length > 0 ; iov++) {
539 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
540 length -= iov->iov_len;
543 /* iov now points to the first empty data buffer. */
548 MUTEX_ENTER(&rx_freePktQ_lock);
550 for (; iov < end ; iov++) {
552 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
553 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
558 MUTEX_EXIT(&rx_freePktQ_lock);
564 /* Free the packet p. P is assumed not to be on any queue, i.e.
565 * remove it yourself first if you call this routine. */
566 void rxi_FreePacket(struct rx_packet *p)
568 extern void rxi_PacketsUnWait();
572 MUTEX_ENTER(&rx_freePktQ_lock);
574 rxi_FreeDataBufsNoLock(p,1);
575 rxi_FreePacketNoLock(p);
576 /* Wakeup anyone waiting for packets */
579 MUTEX_EXIT(&rx_freePktQ_lock);
584 /* rxi_AllocPacket sets up p->length so it reflects the number of
585 * bytes in the packet at this point, **not including** the header.
586 * The header is absolutely necessary, besides, this is the way the
587 * length field is usually used */
588 struct rx_packet *rxi_AllocPacketNoLock(class)
591 register struct rx_packet *p;
594 if (rxi_OverQuota(class)) {
595 rxi_NeedMorePackets = TRUE;
596 MUTEX_ENTER(&rx_stats_mutex);
598 case RX_PACKET_CLASS_RECEIVE:
599 rx_stats.receivePktAllocFailures++;
601 case RX_PACKET_CLASS_SEND:
602 rx_stats.sendPktAllocFailures++;
604 case RX_PACKET_CLASS_SPECIAL:
605 rx_stats.specialPktAllocFailures++;
607 case RX_PACKET_CLASS_RECV_CBUF:
608 rx_stats.receiveCbufPktAllocFailures++;
610 case RX_PACKET_CLASS_SEND_CBUF:
611 rx_stats.sendCbufPktAllocFailures++;
614 MUTEX_EXIT(&rx_stats_mutex);
615 return (struct rx_packet *) 0;
619 MUTEX_ENTER(&rx_stats_mutex);
620 rx_stats.packetRequests++;
621 MUTEX_EXIT(&rx_stats_mutex);
624 if (queue_IsEmpty(&rx_freePacketQueue))
625 osi_Panic("rxi_AllocPacket error");
627 if (queue_IsEmpty(&rx_freePacketQueue))
628 rxi_MorePacketsNoLock(rx_initSendWindow);
632 p = queue_First(&rx_freePacketQueue, rx_packet);
633 if (p->header.flags != RX_FREE_PACKET)
634 osi_Panic("rxi_AllocPacket: packet not free\n");
636 dpf(("Alloc %x, class %d\n", p, class));
641 /* have to do this here because rx_FlushWrite fiddles with the iovs in
642 * order to truncate outbound packets. In the near future, may need
643 * to allocate bufs from a static pool here, and/or in AllocSendPacket
645 p->wirevec[0].iov_base = (char *) (p->wirehead);
646 p->wirevec[0].iov_len = RX_HEADER_SIZE;
647 p->wirevec[1].iov_base = (char *) (p->localdata);
648 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
650 p->length = RX_FIRSTBUFFERSIZE;
654 struct rx_packet *rxi_AllocPacket(class)
657 register struct rx_packet *p;
659 MUTEX_ENTER(&rx_freePktQ_lock);
660 p = rxi_AllocPacketNoLock(class);
661 MUTEX_EXIT(&rx_freePktQ_lock);
665 /* This guy comes up with as many buffers as it {takes,can get} given
666 * the MTU for this call. It also sets the packet length before
667 * returning. caution: this is often called at NETPRI
668 * Called with call locked.
670 struct rx_packet *rxi_AllocSendPacket(call, want)
671 register struct rx_call *call;
674 register struct rx_packet *p = (struct rx_packet *) 0;
676 register unsigned delta;
679 mud = call->MTU - RX_HEADER_SIZE;
680 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
681 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
683 while (!(call->error)) {
684 MUTEX_ENTER(&rx_freePktQ_lock);
685 /* if an error occurred, or we get the packet we want, we're done */
686 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
687 MUTEX_EXIT(&rx_freePktQ_lock);
690 want = MIN(want, mud);
692 if ((unsigned) want > p->length)
693 (void) rxi_AllocDataBuf(p, (want - p->length),
694 RX_PACKET_CLASS_SEND_CBUF);
696 if ((unsigned) p->length > mud)
699 if (delta >= p->length) {
708 /* no error occurred, and we didn't get a packet, so we sleep.
709 * At this point, we assume that packets will be returned
710 * sooner or later, as packets are acknowledged, and so we
713 call->flags |= RX_CALL_WAIT_PACKETS;
714 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
715 MUTEX_EXIT(&call->lock);
716 rx_waitingForPackets = 1;
718 #ifdef RX_ENABLE_LOCKS
719 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
721 osi_rxSleep(&rx_waitingForPackets);
723 MUTEX_EXIT(&rx_freePktQ_lock);
724 MUTEX_ENTER(&call->lock);
725 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
726 call->flags &= ~RX_CALL_WAIT_PACKETS;
735 /* count the number of used FDs */
736 static int CountFDs(amax)
739 register int i, code;
743 for(i=0;i<amax;i++) {
744 code = fstat(i, &tstat);
745 if (code == 0) count++;
752 #define CountFDs(amax) amax
756 #if !defined(KERNEL) || defined(UKERNEL)
758 /* This function reads a single packet from the interface into the
759 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
760 * (host,port) of the sender are stored in the supplied variables, and
761 * the data length of the packet is stored in the packet structure.
762 * The header is decoded. */
763 int rxi_ReadPacket(socket, p, host, port)
765 register struct rx_packet *p;
769 struct sockaddr_in from;
772 register afs_int32 tlen, savelen;
774 rx_computelen(p, tlen);
775 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
777 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
778 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
779 * it once in order to avoid races. */
782 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
790 /* Extend the last iovec for padding, it's just to make sure that the
791 * read doesn't return more data than we expect, and is done to get around
792 * our problems caused by the lack of a length field in the rx header.
793 * Use the extra buffer that follows the localdata in each packet
795 savelen = p->wirevec[p->niovecs].iov_len;
796 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
798 bzero((char *)&msg, sizeof(msg));
799 msg.msg_name = (char *) &from;
800 msg.msg_namelen = sizeof(struct sockaddr_in);
801 msg.msg_iov = p->wirevec;
802 msg.msg_iovlen = p->niovecs;
803 nbytes = rxi_Recvmsg(socket, &msg, 0);
805 /* restore the vec to its correct state */
806 p->wirevec[p->niovecs].iov_len = savelen;
808 p->length = (nbytes - RX_HEADER_SIZE);
809 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
811 rxi_MorePackets(rx_initSendWindow);
813 else if (nbytes < 0 && errno == EWOULDBLOCK) {
814 MUTEX_ENTER(&rx_stats_mutex);
815 rx_stats.noPacketOnRead++;
816 MUTEX_EXIT(&rx_stats_mutex);
820 MUTEX_ENTER(&rx_stats_mutex);
821 rx_stats.bogusPacketOnRead++;
822 rx_stats.bogusHost = from.sin_addr.s_addr;
823 MUTEX_EXIT(&rx_stats_mutex);
824 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
825 from.sin_port,nbytes));
830 /* Extract packet header. */
831 rxi_DecodePacketHeader(p);
833 *host = from.sin_addr.s_addr;
834 *port = from.sin_port;
835 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
836 struct rx_peer *peer;
837 MUTEX_ENTER(&rx_stats_mutex);
838 rx_stats.packetsRead[p->header.type-1]++;
839 MUTEX_EXIT(&rx_stats_mutex);
841 * Try to look up this peer structure. If it doesn't exist,
842 * don't create a new one -
843 * we don't keep count of the bytes sent/received if a peer
844 * structure doesn't already exist.
846 * The peer/connection cleanup code assumes that there is 1 peer
847 * per connection. If we actually created a peer structure here
848 * and this packet was an rxdebug packet, the peer structure would
849 * never be cleaned up.
851 peer = rxi_FindPeer(*host, *port, 0, 0);
853 MUTEX_ENTER(&peer->peer_lock);
854 hadd32(peer->bytesReceived, p->length);
855 MUTEX_EXIT(&peer->peer_lock);
859 /* Free any empty packet buffers at the end of this packet */
860 rxi_TrimDataBufs(p, 1);
866 #endif /* !KERNEL || UKERNEL */
868 /* This function splits off the first packet in a jumbo packet.
869 * As of AFS 3.5, jumbograms contain more than one fixed size
870 * packet, and the RX_JUMBO_PACKET flag is set in all but the
871 * last packet header. All packets (except the last) are padded to
872 * fall on RX_CBUFFERSIZE boundaries.
873 * HACK: We store the length of the first n-1 packets in the
874 * last two pad bytes. */
876 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
877 register struct rx_packet *p;
882 struct rx_packet *np;
883 struct rx_jumboHeader *jp;
889 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
890 * bytes in length. All but the first packet are preceded by
891 * an abbreviated four byte header. The length of the last packet
892 * is calculated from the size of the jumbogram. */
893 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
895 if ((int)p->length < length) {
896 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
899 niov = p->niovecs - 2;
901 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
904 iov = &p->wirevec[2];
905 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
907 /* Get a pointer to the abbreviated packet header */
908 jp = (struct rx_jumboHeader *)
909 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
911 /* Set up the iovecs for the next packet */
912 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
913 np->wirevec[0].iov_len = sizeof(struct rx_header);
914 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
915 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
916 np->niovecs = niov+1;
917 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
918 np->wirevec[i] = *iov;
920 np->length = p->length - length;
921 p->length = RX_JUMBOBUFFERSIZE;
924 /* Convert the jumbo packet header to host byte order */
925 temp = ntohl(*(afs_uint32 *)jp);
926 jp->flags = (u_char)(temp >> 24);
927 jp->cksum = (u_short)(temp);
929 /* Fill in the packet header */
930 np->header = p->header;
931 np->header.serial = p->header.serial + 1;
932 np->header.seq = p->header.seq + 1;
933 np->header.flags = jp->flags;
934 np->header.spare = jp->cksum;
940 /* Send a udp datagram */
941 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
951 memset(&msg, 0, sizeof(msg));
953 msg.msg_iovlen = nvecs;
955 msg.msg_namelen = sizeof(struct sockaddr_in);
957 rxi_Sendmsg(socket, &msg, 0);
961 #elif !defined(UKERNEL)
962 /* osi_NetSend is defined in afs/afs_osinet.c
963 * message receipt is done in rxk_input or rx_put.
968 * Copy an mblock to the contiguous area pointed to by cp.
969 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
970 * but it doesn't really.
971 * Returns the number of bytes not transferred.
972 * The message is NOT changed.
974 static int cpytoc(mp, off, len, cp)
976 register int off, len;
981 for (;mp && len > 0; mp = mp->b_cont) {
982 if (mp->b_datap->db_type != M_DATA) {
985 n = MIN(len, (mp->b_wptr - mp->b_rptr));
986 bcopy((char *)mp->b_rptr, cp, n);
994 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
995 * but it doesn't really.
996 * This sucks, anyway, do it like m_cpy.... below
998 static int cpytoiovec(mp, off, len, iovs, niovs)
1000 int off, len, niovs;
1001 register struct iovec *iovs;
1003 register int m,n,o,t,i;
1005 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1006 if (mp->b_datap->db_type != M_DATA) {
1009 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1015 t = iovs[i].iov_len;
1018 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1027 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1028 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1030 #if !defined(AFS_LINUX20_ENV)
1031 static int m_cpytoiovec(m, off, len, iovs, niovs)
1033 int off, len, niovs;
1034 struct iovec iovs[];
1037 unsigned int l1, l2, i, t;
1039 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1040 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1043 if (m->m_len <= off) {
1053 p1 = mtod(m, caddr_t)+off;
1054 l1 = m->m_len - off;
1056 p2 = iovs[0].iov_base;
1057 l2 = iovs[0].iov_len;
1060 t = MIN(l1, MIN(l2, (unsigned int)len));
1069 p1 = mtod(m, caddr_t);
1075 p2 = iovs[i].iov_base;
1076 l2 = iovs[i].iov_len;
1084 #endif /* AFS_SUN5_ENV */
1086 #if !defined(AFS_LINUX20_ENV)
1087 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1094 struct rx_packet *phandle;
1095 int hdr_len, data_len;
1099 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1105 #endif /*KERNEL && !UKERNEL*/
1108 /* send a response to a debug packet */
1110 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1114 register struct rx_packet *ap;
1117 struct rx_debugIn tin;
1119 struct rx_serverQueueEntry *np, *nqe;
1121 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1122 /* all done with packet, now set length to the truth, so we can
1123 * reuse this packet */
1124 rx_computelen(ap, ap->length);
1126 tin.type = ntohl(tin.type);
1127 tin.index = ntohl(tin.index);
1129 case RX_DEBUGI_GETSTATS: {
1130 struct rx_debugStats tstat;
1132 /* get basic stats */
1133 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1134 tstat.version = RX_DEBUGI_VERSION;
1135 #ifndef RX_ENABLE_LOCKS
1136 tstat.waitingForPackets = rx_waitingForPackets;
1138 tstat.nFreePackets = htonl(rx_nFreePackets);
1139 tstat.callsExecuted = htonl(rxi_nCalls);
1140 tstat.packetReclaims = htonl(rx_packetReclaims);
1141 tstat.usedFDs = CountFDs(64);
1142 tstat.nWaiting = htonl(rx_nWaiting);
1143 queue_Count( &rx_idleServerQueue, np, nqe,
1144 rx_serverQueueEntry, tstat.idleThreads);
1145 tstat.idleThreads = htonl(tstat.idleThreads);
1146 tl = sizeof(struct rx_debugStats) - ap->length;
1148 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1151 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1152 ap->length = sizeof(struct rx_debugStats);
1153 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1154 rx_computelen(ap, ap->length);
1159 case RX_DEBUGI_GETALLCONN:
1160 case RX_DEBUGI_GETCONN: {
1162 register struct rx_connection *tc;
1163 struct rx_call *tcall;
1164 struct rx_debugConn tconn;
1165 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1168 tl = sizeof(struct rx_debugConn) - ap->length;
1170 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1174 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1175 /* get N'th (maybe) "interesting" connection info */
1176 for(i=0;i<rx_hashTableSize;i++) {
1177 #if !defined(KERNEL)
1178 /* the time complexity of the algorithm used here
1179 * exponentially increses with the number of connections.
1181 #ifdef AFS_PTHREAD_ENV
1184 (void) IOMGR_Poll();
1187 MUTEX_ENTER(&rx_connHashTable_lock);
1188 /* We might be slightly out of step since we are not
1189 * locking each call, but this is only debugging output.
1191 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1192 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1193 tconn.host = tc->peer->host;
1194 tconn.port = tc->peer->port;
1195 tconn.cid = htonl(tc->cid);
1196 tconn.epoch = htonl(tc->epoch);
1197 tconn.serial = htonl(tc->serial);
1198 for(j=0;j<RX_MAXCALLS;j++) {
1199 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1200 if ((tcall=tc->call[j])) {
1201 tconn.callState[j] = tcall->state;
1202 tconn.callMode[j] = tcall->mode;
1203 tconn.callFlags[j] = tcall->flags;
1204 if (queue_IsNotEmpty(&tcall->rq))
1205 tconn.callOther[j] |= RX_OTHER_IN;
1206 if (queue_IsNotEmpty(&tcall->tq))
1207 tconn.callOther[j] |= RX_OTHER_OUT;
1209 else tconn.callState[j] = RX_STATE_NOTINIT;
1212 tconn.natMTU = htonl(tc->peer->natMTU);
1213 tconn.error = htonl(tc->error);
1214 tconn.flags = tc->flags;
1215 tconn.type = tc->type;
1216 tconn.securityIndex = tc->securityIndex;
1217 if (tc->securityObject) {
1218 RXS_GetStats (tc->securityObject, tc,
1220 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1221 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1224 DOHTONL(packetsReceived);
1225 DOHTONL(packetsSent);
1226 DOHTONL(bytesReceived);
1229 i<sizeof(tconn.secStats.spares)/sizeof(short);
1233 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1238 MUTEX_EXIT(&rx_connHashTable_lock);
1239 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1241 ap->length = sizeof(struct rx_debugConn);
1242 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1247 MUTEX_EXIT(&rx_connHashTable_lock);
1249 /* if we make it here, there are no interesting packets */
1250 tconn.cid = htonl(0xffffffff); /* means end */
1251 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1253 ap->length = sizeof(struct rx_debugConn);
1254 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1260 * Pass back all the peer structures we have available
1263 case RX_DEBUGI_GETPEER: {
1265 register struct rx_peer *tp;
1266 struct rx_debugPeer tpeer;
1269 tl = sizeof(struct rx_debugPeer) - ap->length;
1271 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1275 bzero ((char *)&tpeer, sizeof(tpeer));
1276 for(i=0;i<rx_hashTableSize;i++) {
1277 #if !defined(KERNEL)
1278 /* the time complexity of the algorithm used here
1279 * exponentially increses with the number of peers.
1281 * Yielding after processing each hash table entry
1282 * and dropping rx_peerHashTable_lock.
1283 * also increases the risk that we will miss a new
1284 * entry - but we are willing to live with this
1285 * limitation since this is meant for debugging only
1287 #ifdef AFS_PTHREAD_ENV
1290 (void) IOMGR_Poll();
1293 MUTEX_ENTER(&rx_peerHashTable_lock);
1294 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1295 if (tin.index-- <= 0) {
1296 tpeer.host = tp->host;
1297 tpeer.port = tp->port;
1298 tpeer.ifMTU = htons(tp->ifMTU);
1299 tpeer.idleWhen = htonl(tp->idleWhen);
1300 tpeer.refCount = htons(tp->refCount);
1301 tpeer.burstSize = tp->burstSize;
1302 tpeer.burst = tp->burst;
1303 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1304 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1305 tpeer.rtt = htonl(tp->rtt);
1306 tpeer.rtt_dev = htonl(tp->rtt_dev);
1307 tpeer.timeout.sec = htonl(tp->timeout.sec);
1308 tpeer.timeout.usec = htonl(tp->timeout.usec);
1309 tpeer.nSent = htonl(tp->nSent);
1310 tpeer.reSends = htonl(tp->reSends);
1311 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1312 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1313 tpeer.rateFlag = htonl(tp->rateFlag);
1314 tpeer.natMTU = htons(tp->natMTU);
1315 tpeer.maxMTU = htons(tp->maxMTU);
1316 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1317 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1318 tpeer.MTU = htons(tp->MTU);
1319 tpeer.cwind = htons(tp->cwind);
1320 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1321 tpeer.congestSeq = htons(tp->congestSeq);
1322 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1323 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1324 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1325 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1327 MUTEX_EXIT(&rx_peerHashTable_lock);
1328 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1330 ap->length = sizeof(struct rx_debugPeer);
1331 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1336 MUTEX_EXIT(&rx_peerHashTable_lock);
1338 /* if we make it here, there are no interesting packets */
1339 tpeer.host = htonl(0xffffffff); /* means end */
1340 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1342 ap->length = sizeof(struct rx_debugPeer);
1343 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1348 case RX_DEBUGI_RXSTATS: {
1352 tl = sizeof(rx_stats) - ap->length;
1354 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1358 /* Since its all int32s convert to network order with a loop. */
1359 MUTEX_ENTER(&rx_stats_mutex);
1360 s = (afs_int32 *)&rx_stats;
1361 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1362 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1365 ap->length = sizeof(rx_stats);
1366 MUTEX_EXIT(&rx_stats_mutex);
1367 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1373 /* error response packet */
1374 tin.type = htonl(RX_DEBUGI_BADTYPE);
1375 tin.index = tin.type;
1376 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1378 ap->length = sizeof(struct rx_debugIn);
1379 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1386 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1390 register struct rx_packet *ap;
1394 rx_packetwrite(ap, 0, 65, cml_version_number+4);
1397 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1403 /* send a debug packet back to the sender */
1404 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1405 afs_int32 ahost, short aport, afs_int32 istack)
1407 struct sockaddr_in taddr;
1413 int waslocked = ISAFS_GLOCK();
1416 taddr.sin_family = AF_INET;
1417 taddr.sin_port = aport;
1418 taddr.sin_addr.s_addr = ahost;
1421 /* We need to trim the niovecs. */
1422 nbytes = apacket->length;
1423 for (i=1; i < apacket->niovecs; i++) {
1424 if (nbytes <= apacket->wirevec[i].iov_len) {
1425 savelen = apacket->wirevec[i].iov_len;
1426 saven = apacket->niovecs;
1427 apacket->wirevec[i].iov_len = nbytes;
1428 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1430 else nbytes -= apacket->wirevec[i].iov_len;
1434 if (waslocked) AFS_GUNLOCK();
1436 /* debug packets are not reliably delivered, hence the cast below. */
1437 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1438 apacket->length+RX_HEADER_SIZE, istack);
1440 if (waslocked) AFS_GLOCK();
1443 if (saven) { /* means we truncated the packet above. */
1444 apacket->wirevec[i-1].iov_len = savelen;
1445 apacket->niovecs = saven;
1450 /* Send the packet to appropriate destination for the specified
1451 * connection. The header is first encoded and placed in the packet.
1453 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1459 struct sockaddr_in addr;
1460 register struct rx_peer *peer = conn->peer;
1463 char deliveryType = 'S';
1465 /* The address we're sending the packet to */
1466 addr.sin_family = AF_INET;
1467 addr.sin_port = peer->port;
1468 addr.sin_addr.s_addr = peer->host;
1470 /* This stuff should be revamped, I think, so that most, if not
1471 * all, of the header stuff is always added here. We could
1472 * probably do away with the encode/decode routines. XXXXX */
1474 /* Stamp each packet with a unique serial number. The serial
1475 * number is maintained on a connection basis because some types
1476 * of security may be based on the serial number of the packet,
1477 * and security is handled on a per authenticated-connection
1479 /* Pre-increment, to guarantee no zero serial number; a zero
1480 * serial number means the packet was never sent. */
1481 MUTEX_ENTER(&conn->conn_data_lock);
1482 p->header.serial = ++conn->serial;
1483 MUTEX_EXIT(&conn->conn_data_lock);
1484 /* This is so we can adjust retransmit time-outs better in the face of
1485 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1487 if (p->firstSerial == 0) {
1488 p->firstSerial = p->header.serial;
1492 /* If an output tracer function is defined, call it with the packet and
1493 * network address. Note this function may modify its arguments. */
1494 if (rx_almostSent) {
1495 int drop = (*rx_almostSent) (p, &addr);
1496 /* drop packet if return value is non-zero? */
1497 if (drop) deliveryType = 'D'; /* Drop the packet */
1501 /* Get network byte order header */
1502 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1503 * touch ALL the fields */
1505 /* Send the packet out on the same socket that related packets are being
1507 socket = (conn->type == RX_CLIENT_CONNECTION
1508 ? rx_socket : conn->service->socket);
1511 /* Possibly drop this packet, for testing purposes */
1512 if ((deliveryType == 'D') ||
1513 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1514 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1515 deliveryType = 'D'; /* Drop the packet */
1518 deliveryType = 'S'; /* Send the packet */
1519 #endif /* RXDEBUG */
1521 /* Loop until the packet is sent. We'd prefer just to use a
1522 * blocking socket, but unfortunately the interface doesn't
1523 * allow us to have the socket block in send mode, and not
1524 * block in receive mode */
1527 waslocked = ISAFS_GLOCK();
1528 if (waslocked) AFS_GUNLOCK();
1530 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1531 p->length+RX_HEADER_SIZE, istack)){
1532 /* send failed, so let's hurry up the resend, eh? */
1533 MUTEX_ENTER(&rx_stats_mutex);
1534 rx_stats.netSendFailures++;
1535 MUTEX_EXIT(&rx_stats_mutex);
1536 p->retryTime = p->timeSent; /* resend it very soon */
1537 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1540 if (waslocked) AFS_GLOCK();
1545 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1546 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1547 peer->host, peer->port, p->header.serial, p->header.epoch,
1548 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1549 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1551 MUTEX_ENTER(&rx_stats_mutex);
1552 rx_stats.packetsSent[p->header.type-1]++;
1553 MUTEX_EXIT(&rx_stats_mutex);
1554 MUTEX_ENTER(&peer->peer_lock);
1555 hadd32(peer->bytesSent, p->length);
1556 MUTEX_EXIT(&peer->peer_lock);
1559 /* Send a list of packets to appropriate destination for the specified
1560 * connection. The headers are first encoded and placed in the packets.
1562 void rxi_SendPacketList(struct rx_connection * conn,
1563 struct rx_packet **list,
1567 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1570 struct sockaddr_in addr;
1571 register struct rx_peer *peer = conn->peer;
1573 struct rx_packet *p = NULL;
1574 struct iovec wirevec[RX_MAXIOVECS];
1578 struct rx_jumboHeader *jp;
1580 char deliveryType = 'S';
1582 /* The address we're sending the packet to */
1583 addr.sin_family = AF_INET;
1584 addr.sin_port = peer->port;
1585 addr.sin_addr.s_addr = peer->host;
1587 if (len+1 > RX_MAXIOVECS) {
1588 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1592 * Stamp the packets in this jumbogram with consecutive serial numbers
1594 MUTEX_ENTER(&conn->conn_data_lock);
1595 serial = conn->serial;
1596 conn->serial += len;
1597 MUTEX_EXIT(&conn->conn_data_lock);
1600 /* This stuff should be revamped, I think, so that most, if not
1601 * all, of the header stuff is always added here. We could
1602 * probably do away with the encode/decode routines. XXXXX */
1605 length = RX_HEADER_SIZE;
1606 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1607 wirevec[0].iov_len = RX_HEADER_SIZE;
1608 for (i = 0 ; i < len ; i++) {
1611 /* The whole 3.5 jumbogram scheme relies on packets fitting
1612 * in a single packet buffer. */
1613 if (p->niovecs > 2) {
1614 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1617 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1620 if (p->length != RX_JUMBOBUFFERSIZE) {
1621 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1623 p->header.flags |= RX_JUMBO_PACKET;
1624 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1625 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1627 wirevec[i+1].iov_len = p->length;
1628 length += p->length;
1630 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1632 /* Convert jumbo packet header to network byte order */
1633 temp = (afs_uint32)(p->header.flags) << 24;
1634 temp |= (afs_uint32)(p->header.spare);
1635 *(afs_uint32 *)jp = htonl(temp);
1637 jp = (struct rx_jumboHeader *)
1638 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1640 /* Stamp each packet with a unique serial number. The serial
1641 * number is maintained on a connection basis because some types
1642 * of security may be based on the serial number of the packet,
1643 * and security is handled on a per authenticated-connection
1645 /* Pre-increment, to guarantee no zero serial number; a zero
1646 * serial number means the packet was never sent. */
1647 p->header.serial = ++serial;
1648 /* This is so we can adjust retransmit time-outs better in the face of
1649 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1651 if (p->firstSerial == 0) {
1652 p->firstSerial = p->header.serial;
1656 /* If an output tracer function is defined, call it with the packet and
1657 * network address. Note this function may modify its arguments. */
1658 if (rx_almostSent) {
1659 int drop = (*rx_almostSent) (p, &addr);
1660 /* drop packet if return value is non-zero? */
1661 if (drop) deliveryType = 'D'; /* Drop the packet */
1665 /* Get network byte order header */
1666 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1667 * touch ALL the fields */
1670 /* Send the packet out on the same socket that related packets are being
1672 socket = (conn->type == RX_CLIENT_CONNECTION
1673 ? rx_socket : conn->service->socket);
1676 /* Possibly drop this packet, for testing purposes */
1677 if ((deliveryType == 'D') ||
1678 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1679 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1680 deliveryType = 'D'; /* Drop the packet */
1683 deliveryType = 'S'; /* Send the packet */
1684 #endif /* RXDEBUG */
1686 /* Loop until the packet is sent. We'd prefer just to use a
1687 * blocking socket, but unfortunately the interface doesn't
1688 * allow us to have the socket block in send mode, and not
1689 * block in receive mode */
1691 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1692 waslocked = ISAFS_GLOCK();
1693 if (!istack && waslocked) AFS_GUNLOCK();
1695 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1696 /* send failed, so let's hurry up the resend, eh? */
1697 MUTEX_ENTER(&rx_stats_mutex);
1698 rx_stats.netSendFailures++;
1699 MUTEX_EXIT(&rx_stats_mutex);
1700 for (i = 0 ; i < len ; i++) {
1702 p->retryTime = p->timeSent; /* resend it very soon */
1703 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1706 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1707 if (!istack && waslocked) AFS_GLOCK();
1712 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1713 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1714 peer->host, peer->port, p->header.serial, p->header.epoch,
1715 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1716 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1718 MUTEX_ENTER(&rx_stats_mutex);
1719 rx_stats.packetsSent[p->header.type-1]++;
1720 MUTEX_EXIT(&rx_stats_mutex);
1721 MUTEX_ENTER(&peer->peer_lock);
1722 hadd32(peer->bytesSent, p->length);
1723 MUTEX_EXIT(&peer->peer_lock);
1727 /* Send a "special" packet to the peer connection. If call is
1728 * specified, then the packet is directed to a specific call channel
1729 * associated with the connection, otherwise it is directed to the
1730 * connection only. Uses optionalPacket if it is supplied, rather than
1731 * allocating a new packet buffer. Nbytes is the length of the data
1732 * portion of the packet. If data is non-null, nbytes of data are
1733 * copied into the packet. Type is the type of the packet, as defined
1734 * in rx.h. Bug: there's a lot of duplication between this and other
1735 * routines. This needs to be cleaned up. */
1737 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1738 register struct rx_call *call;
1739 register struct rx_connection *conn;
1740 struct rx_packet *optionalPacket;
1745 /* Some of the following stuff should be common code for all
1746 * packet sends (it's repeated elsewhere) */
1747 register struct rx_packet *p;
1749 int savelen = 0, saven = 0;
1750 int channel, callNumber;
1752 channel = call->channel;
1753 callNumber = *call->callNumber;
1754 /* BUSY packets refer to the next call on this connection */
1755 if (type == RX_PACKET_TYPE_BUSY) {
1764 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1765 if (!p) osi_Panic("rxi_SendSpecial failure");
1772 p->header.serviceId = conn->serviceId;
1773 p->header.securityIndex = conn->securityIndex;
1774 p->header.cid = (conn->cid | channel);
1775 p->header.callNumber = callNumber;
1777 p->header.epoch = conn->epoch;
1778 p->header.type = type;
1779 p->header.flags = 0;
1780 if (conn->type == RX_CLIENT_CONNECTION)
1781 p->header.flags |= RX_CLIENT_INITIATED;
1783 rx_packetwrite(p, 0, nbytes, data);
1785 for (i=1; i < p->niovecs; i++) {
1786 if (nbytes <= p->wirevec[i].iov_len) {
1787 savelen = p->wirevec[i].iov_len;
1789 p->wirevec[i].iov_len = nbytes;
1790 p->niovecs = i+1; /* so condition fails because i == niovecs */
1792 else nbytes -= p->wirevec[i].iov_len;
1795 if (call) rxi_Send(call, p, istack);
1796 else rxi_SendPacket(conn, p, istack);
1797 if (saven) { /* means we truncated the packet above. We probably don't */
1798 /* really need to do this, but it seems safer this way, given that */
1799 /* sneaky optionalPacket... */
1800 p->wirevec[i-1].iov_len = savelen;
1803 if (!optionalPacket) rxi_FreePacket(p);
1804 return optionalPacket;
1808 /* Encode the packet's header (from the struct header in the packet to
1809 * the net byte order representation in the wire representation of the
1810 * packet, which is what is actually sent out on the wire) */
1811 void rxi_EncodePacketHeader(p)
1812 register struct rx_packet *p;
1814 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1816 bzero((char *)buf, RX_HEADER_SIZE);
1817 *buf++ = htonl(p->header.epoch);
1818 *buf++ = htonl(p->header.cid);
1819 *buf++ = htonl(p->header.callNumber);
1820 *buf++ = htonl(p->header.seq);
1821 *buf++ = htonl(p->header.serial);
1822 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1823 | (((afs_uint32)p->header.flags)<<16)
1824 | (p->header.userStatus<<8) | p->header.securityIndex);
1825 /* Note: top 16 bits of this next word were reserved */
1826 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1829 /* Decode the packet's header (from net byte order to a struct header) */
1830 void rxi_DecodePacketHeader(p)
1831 register struct rx_packet *p;
1833 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1836 p->header.epoch = ntohl(*buf++);
1837 p->header.cid = ntohl(*buf++);
1838 p->header.callNumber = ntohl(*buf++);
1839 p->header.seq = ntohl(*buf++);
1840 p->header.serial = ntohl(*buf++);
1841 temp = ntohl(*buf++);
1842 /* C will truncate byte fields to bytes for me */
1843 p->header.type = temp>>24;
1844 p->header.flags = temp>>16;
1845 p->header.userStatus = temp>>8;
1846 p->header.securityIndex = temp>>0;
1847 temp = ntohl(*buf++);
1848 p->header.serviceId = (temp&0xffff);
1849 p->header.spare = temp>>16;
1850 /* Note: top 16 bits of this last word are the security checksum */
1853 void rxi_PrepareSendPacket(call, p, last)
1854 register struct rx_call *call;
1855 register struct rx_packet *p;
1858 register struct rx_connection *conn = call->conn;
1860 ssize_t len; /* len must be a signed type; it can go negative */
1863 p->header.cid = (conn->cid | call->channel);
1864 p->header.serviceId = conn->serviceId;
1865 p->header.securityIndex = conn->securityIndex;
1866 p->header.callNumber = *call->callNumber;
1867 p->header.seq = call->tnext++;
1868 p->header.epoch = conn->epoch;
1869 p->header.type = RX_PACKET_TYPE_DATA;
1870 p->header.flags = 0;
1871 p->header.spare = 0;
1872 if (conn->type == RX_CLIENT_CONNECTION)
1873 p->header.flags |= RX_CLIENT_INITIATED;
1876 p->header.flags |= RX_LAST_PACKET;
1878 clock_Zero(&p->retryTime); /* Never yet transmitted */
1879 clock_Zero(&p->firstSent); /* Never yet transmitted */
1880 p->header.serial = 0; /* Another way of saying never transmitted... */
1883 /* Now that we're sure this is the last data on the call, make sure
1884 * that the "length" and the sum of the iov_lens matches. */
1885 len = p->length + call->conn->securityHeaderSize;
1887 for (i=1; i < p->niovecs && len > 0; i++) {
1888 len -= p->wirevec[i].iov_len;
1891 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1894 /* Free any extra elements in the wirevec */
1895 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1896 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1899 p->wirevec[i-1].iov_len += len;
1901 RXS_PreparePacket(conn->securityObject, call, p);
1904 /* Given an interface MTU size, calculate an adjusted MTU size that
1905 * will make efficient use of the RX buffers when the peer is sending
1906 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1907 int rxi_AdjustIfMTU(int mtu)
1912 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1913 if (mtu <= adjMTU) {
1920 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1921 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1924 /* Given an interface MTU size, and the peer's advertised max receive
1925 * size, calculate an adjisted maxMTU size that makes efficient use
1926 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1927 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1929 int maxMTU = mtu * rxi_nSendFrags;
1930 maxMTU = MIN(maxMTU, peerMaxMTU);
1931 return rxi_AdjustIfMTU(maxMTU);
1934 /* Given a packet size, figure out how many datagram packet will fit.
1935 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1936 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1937 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1938 int rxi_AdjustDgramPackets(int frags, int mtu)
1941 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1944 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1945 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1946 /* subtract the size of the first and last packets */
1947 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1951 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));