1 /* Copyright (C) 1998 Transarc Corporation. All rights reserved. */
3 #include "../afs/param.h"
5 #include "../afs/sysincludes.h"
6 #include "../afs/afsincludes.h"
7 #include "../rx/rx_kcommon.h"
8 #include "../rx/rx_clock.h"
9 #include "../rx/rx_queue.h"
10 #include "../rx/rx_packet.h"
11 #else /* defined(UKERNEL) */
12 #include "../h/types.h"
13 #ifndef AFS_LINUX20_ENV
14 #include "../h/systm.h"
16 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
17 #include "../afs/sysincludes.h"
19 #include "../h/socket.h"
20 #include "../netinet/in.h"
21 #include "../afs/afs_osi.h"
22 #include "../rx/rx_kmutex.h"
23 #include "../rx/rx_clock.h"
24 #include "../rx/rx_queue.h"
26 #include <sys/sysmacros.h>
28 #include "../rx/rx_packet.h"
29 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
30 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
31 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
33 #include "../h/mbuf.h"
35 #endif /* defined(UKERNEL) */
36 #include "../rx/rx_globals.h"
38 #include <afs/param.h>
39 #include "sys/types.h"
44 #include "rx_xmit_nt.h"
47 #include <sys/socket.h>
48 #include <netinet/in.h>
54 #include <sys/sysmacros.h>
56 #include "rx_packet.h"
57 #include "rx_globals.h"
59 #include "rx_internal.h"
64 /* rxdb_fileID is used to identify the lock location, along with line#. */
65 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
66 #endif /* RX_LOCKS_DB */
67 struct rx_packet *rx_mallocedP = 0;
69 extern char cml_version_number[];
70 extern int (*rx_almostSent)();
72 void rxi_FreePacketNoLock(struct rx_packet *p);
73 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
74 afs_int32 ahost, short aport, afs_int32 istack);
76 extern char cml_version_number[];
77 extern int (*rx_almostSent)();
78 /* some rules about packets:
79 * 1. When a packet is allocated, the final iov_buf contains room for
80 * a security trailer, but iov_len masks that fact. If the security
81 * package wants to add the trailer, it may do so, and then extend
82 * iov_len appropriately. For this reason, packet's niovecs and
83 * iov_len fields should be accurate before calling PreparePacket.
87 * all packet buffers (iov_base) are integral multiples of
89 * offset is an integral multiple of the word size.
91 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
95 for (l=0, i=1; i< packet->niovecs ; i++ ) {
96 if (l + packet->wirevec[i].iov_len > offset) {
97 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
99 l += packet->wirevec[i].iov_len;
106 * all packet buffers (iov_base) are integral multiples of the word size.
107 * offset is an integral multiple of the word size.
109 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
113 for (l=0, i=1; i< packet->niovecs ; i++ ) {
114 if (l + packet->wirevec[i].iov_len > offset) {
115 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
119 l += packet->wirevec[i].iov_len;
126 * all packet buffers (iov_base) are integral multiples of the
128 * offset is an integral multiple of the word size.
130 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
132 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
133 int resid, char *out)
135 unsigned int i, j, l, r;
136 for (l=0, i=1; i< packet->niovecs ; i++ ) {
137 if (l + packet->wirevec[i].iov_len > offset) {
140 l += packet->wirevec[i].iov_len;
143 /* i is the iovec which contains the first little bit of data in which we
144 * are interested. l is the total length of everything prior to this iovec.
145 * j is the number of bytes we can safely copy out of this iovec.
148 while ((resid > 0) && (i < packet->niovecs)) {
149 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
150 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
152 l += packet->wirevec[i].iov_len;
156 return (resid ? (r - resid) : r);
161 * all packet buffers (iov_base) are integral multiples of the
163 * offset is an integral multiple of the word size.
165 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
171 for (l=0, i=1; i < packet->niovecs; i++ ) {
172 if (l + packet->wirevec[i].iov_len > offset) {
175 l += packet->wirevec[i].iov_len;
178 /* i is the iovec which contains the first little bit of data in which we
179 * are interested. l is the total length of everything prior to this iovec.
180 * j is the number of bytes we can safely copy out of this iovec.
183 while ((resid > 0) && (i < RX_MAXWVECS)) {
184 if (i >= packet->niovecs)
185 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
188 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
189 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
192 l += packet->wirevec[i].iov_len;
196 return (resid ? (r - resid) : r);
199 static struct rx_packet * allocCBuf(int class)
203 extern void rxi_MorePacketsNoLock();
208 MUTEX_ENTER(&rx_freePktQ_lock);
211 if (rxi_OverQuota(class)) {
213 rxi_NeedMorePackets = TRUE;
214 MUTEX_ENTER(&rx_stats_mutex);
216 case RX_PACKET_CLASS_RECEIVE:
217 rx_stats.receivePktAllocFailures++;
219 case RX_PACKET_CLASS_SEND:
220 rx_stats.sendPktAllocFailures++;
222 case RX_PACKET_CLASS_SPECIAL:
223 rx_stats.specialPktAllocFailures++;
225 case RX_PACKET_CLASS_RECV_CBUF:
226 rx_stats.receiveCbufPktAllocFailures++;
228 case RX_PACKET_CLASS_SEND_CBUF:
229 rx_stats.sendCbufPktAllocFailures++;
232 MUTEX_EXIT(&rx_stats_mutex);
236 if (queue_IsEmpty(&rx_freePacketQueue)) {
238 rxi_NeedMorePackets = TRUE;
242 if (queue_IsEmpty(&rx_freePacketQueue)) {
243 rxi_MorePacketsNoLock(rx_initSendWindow);
248 c = queue_First(&rx_freePacketQueue, rx_packet);
250 if (c->header.flags != RX_FREE_PACKET)
251 osi_Panic("rxi_AllocPacket: packet not free\n");
255 MUTEX_EXIT(&rx_freePktQ_lock);
262 * Free a packet currently used as a continuation buffer
264 void rxi_freeCBuf(struct rx_packet *c)
266 extern void rxi_PacketsUnWait();
270 MUTEX_ENTER(&rx_freePktQ_lock);
272 rxi_FreePacketNoLock(c);
273 /* Wakeup anyone waiting for packets */
276 MUTEX_EXIT(&rx_freePktQ_lock);
280 /* this one is kind of awful.
281 * In rxkad, the packet has been all shortened, and everything, ready for
282 * sending. All of a sudden, we discover we need some of that space back.
283 * This isn't terribly general, because it knows that the packets are only
284 * rounded up to the EBS (userdata + security header).
286 int rxi_RoundUpPacket(p, nb)
287 struct rx_packet * p;
292 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
293 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
294 p->wirevec[i].iov_len += nb;
299 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
300 p->wirevec[i].iov_len += nb;
307 /* get sufficient space to store nb bytes of data (or more), and hook
308 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
309 * returns the number of bytes >0 which it failed to come up with.
310 * Don't need to worry about locking on packet, since only
311 * one thread can manipulate one at a time. Locking on continution
312 * packets is handled by allocCBuf */
313 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
314 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
318 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
319 register struct rx_packet *cb;
320 if (cb = allocCBuf(class)) {
321 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
322 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
323 nb -= RX_CBUFFERSIZE;
324 p->length += RX_CBUFFERSIZE;
333 /* Add more packet buffers */
334 void rxi_MorePackets(int apackets)
336 extern void rxi_PacketsUnWait();
337 struct rx_packet *p, *e;
341 getme = apackets * sizeof(struct rx_packet);
342 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
344 PIN(p, getme); /* XXXXX */
345 bzero((char *)p, getme);
348 MUTEX_ENTER(&rx_freePktQ_lock);
350 for (e = p + apackets; p<e; p++) {
351 p->wirevec[0].iov_base = (char *) (p->wirehead);
352 p->wirevec[0].iov_len = RX_HEADER_SIZE;
353 p->wirevec[1].iov_base = (char *) (p->localdata);
354 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
355 p->header.flags = RX_FREE_PACKET;
358 queue_Append(&rx_freePacketQueue, p);
360 rx_nFreePackets += apackets;
361 rxi_NeedMorePackets = FALSE;
365 MUTEX_EXIT(&rx_freePktQ_lock);
370 /* Add more packet buffers */
371 void rxi_MorePacketsNoLock(int apackets)
373 extern void rxi_PacketsUnWait();
374 struct rx_packet *p, *e;
377 /* allocate enough packets that 1/4 of the packets will be able
378 * to hold maximal amounts of data */
379 apackets += (apackets/4)
380 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
381 getme = apackets * sizeof(struct rx_packet);
382 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
384 bzero((char *)p, getme);
386 for (e = p + apackets; p<e; p++) {
387 p->wirevec[0].iov_base = (char *) (p->wirehead);
388 p->wirevec[0].iov_len = RX_HEADER_SIZE;
389 p->wirevec[1].iov_base = (char *) (p->localdata);
390 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
391 p->header.flags = RX_FREE_PACKET;
394 queue_Append(&rx_freePacketQueue, p);
396 rx_nFreePackets += apackets;
397 rxi_NeedMorePackets = FALSE;
402 void rxi_FreeAllPackets(void)
404 /* must be called at proper interrupt level, etcetera */
405 /* MTUXXX need to free all Packets */
406 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
407 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
410 /* Allocate more packets iff we need more continuation buffers */
411 /* In kernel, can't page in memory with interrupts disabled, so we
412 * don't use the event mechanism. */
413 void rx_CheckPackets()
415 if (rxi_NeedMorePackets) {
416 rxi_MorePackets(rx_initSendWindow);
420 /* In the packet freeing routine below, the assumption is that
421 we want all of the packets to be used equally frequently, so that we
422 don't get packet buffers paging out. It would be just as valid to
423 assume that we DO want them to page out if not many are being used.
424 In any event, we assume the former, and append the packets to the end
426 /* This explanation is bogus. The free list doesn't remain in any kind of
427 useful order for afs_int32: the packets in use get pretty much randomly scattered
428 across all the pages. In order to permit unused {packets,bufs} to page out, they
429 must be stored so that packets which are adjacent in memory are adjacent in the
430 free list. An array springs rapidly to mind.
433 /* Actually free the packet p. */
434 void rxi_FreePacketNoLock(struct rx_packet *p)
436 dpf(("Free %x\n", p));
438 if (p->header.flags & RX_FREE_PACKET)
439 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
441 p->header.flags = RX_FREE_PACKET;
442 queue_Append(&rx_freePacketQueue, p);
445 int rxi_FreeDataBufsNoLock(p, first)
446 struct rx_packet * p;
449 struct iovec *iov, *end;
451 if (first != 1) /* MTUXXX */
452 osi_Panic("FreeDataBufs 1: first must be 1");
453 iov = &p->wirevec[1];
454 end = iov + (p->niovecs-1);
455 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
456 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
457 for (iov++ ; iov < end ; iov++) {
459 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
460 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
468 int rxi_nBadIovecs = 0;
470 /* rxi_RestoreDataBufs
472 * Restore the correct sizes to the iovecs. Called when reusing a packet
473 * for reading off the wire.
475 void rxi_RestoreDataBufs(struct rx_packet *p)
478 struct iovec *iov = &p->wirevec[2];
480 p->wirevec[0].iov_base = (char *) (p->wirehead);
481 p->wirevec[0].iov_len = RX_HEADER_SIZE;
482 p->wirevec[1].iov_base = (char *) (p->localdata);
483 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
485 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
486 if (!iov->iov_base) {
491 iov->iov_len = RX_CBUFFERSIZE;
495 int rxi_TrimDataBufs(p, first)
496 struct rx_packet * p;
499 extern void rxi_PacketsUnWait();
501 struct iovec *iov, *end;
505 osi_Panic("TrimDataBufs 1: first must be 1");
507 /* Skip over continuation buffers containing message data */
508 iov = &p->wirevec[2];
509 end = iov + (p->niovecs-2);
510 length = p->length - p->wirevec[1].iov_len;
511 for (; iov < end && length > 0 ; iov++) {
513 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
514 length -= iov->iov_len;
517 /* iov now points to the first empty data buffer. */
522 MUTEX_ENTER(&rx_freePktQ_lock);
524 for (; iov < end ; iov++) {
526 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
527 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
532 MUTEX_EXIT(&rx_freePktQ_lock);
538 /* Free the packet p. P is assumed not to be on any queue, i.e.
539 * remove it yourself first if you call this routine. */
540 void rxi_FreePacket(struct rx_packet *p)
542 extern void rxi_PacketsUnWait();
546 MUTEX_ENTER(&rx_freePktQ_lock);
548 rxi_FreeDataBufsNoLock(p,1);
549 rxi_FreePacketNoLock(p);
550 /* Wakeup anyone waiting for packets */
553 MUTEX_EXIT(&rx_freePktQ_lock);
558 /* rxi_AllocPacket sets up p->length so it reflects the number of
559 * bytes in the packet at this point, **not including** the header.
560 * The header is absolutely necessary, besides, this is the way the
561 * length field is usually used */
562 struct rx_packet *rxi_AllocPacketNoLock(class)
565 register struct rx_packet *p;
568 if (rxi_OverQuota(class)) {
569 rxi_NeedMorePackets = TRUE;
570 MUTEX_ENTER(&rx_stats_mutex);
572 case RX_PACKET_CLASS_RECEIVE:
573 rx_stats.receivePktAllocFailures++;
575 case RX_PACKET_CLASS_SEND:
576 rx_stats.sendPktAllocFailures++;
578 case RX_PACKET_CLASS_SPECIAL:
579 rx_stats.specialPktAllocFailures++;
581 case RX_PACKET_CLASS_RECV_CBUF:
582 rx_stats.receiveCbufPktAllocFailures++;
584 case RX_PACKET_CLASS_SEND_CBUF:
585 rx_stats.sendCbufPktAllocFailures++;
588 MUTEX_EXIT(&rx_stats_mutex);
589 return (struct rx_packet *) 0;
593 MUTEX_ENTER(&rx_stats_mutex);
594 rx_stats.packetRequests++;
595 MUTEX_EXIT(&rx_stats_mutex);
598 if (queue_IsEmpty(&rx_freePacketQueue))
599 osi_Panic("rxi_AllocPacket error");
601 if (queue_IsEmpty(&rx_freePacketQueue))
602 rxi_MorePacketsNoLock(rx_initSendWindow);
606 p = queue_First(&rx_freePacketQueue, rx_packet);
607 if (p->header.flags != RX_FREE_PACKET)
608 osi_Panic("rxi_AllocPacket: packet not free\n");
610 dpf(("Alloc %x, class %d\n", p, class));
615 /* have to do this here because rx_FlushWrite fiddles with the iovs in
616 * order to truncate outbound packets. In the near future, may need
617 * to allocate bufs from a static pool here, and/or in AllocSendPacket
619 p->wirevec[0].iov_base = (char *) (p->wirehead);
620 p->wirevec[0].iov_len = RX_HEADER_SIZE;
621 p->wirevec[1].iov_base = (char *) (p->localdata);
622 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
624 p->length = RX_FIRSTBUFFERSIZE;
628 struct rx_packet *rxi_AllocPacket(class)
631 register struct rx_packet *p;
633 MUTEX_ENTER(&rx_freePktQ_lock);
634 p = rxi_AllocPacketNoLock(class);
635 MUTEX_EXIT(&rx_freePktQ_lock);
639 /* This guy comes up with as many buffers as it {takes,can get} given
640 * the MTU for this call. It also sets the packet length before
641 * returning. caution: this is often called at NETPRI
642 * Called with call locked.
644 struct rx_packet *rxi_AllocSendPacket(call, want)
645 register struct rx_call *call;
648 register struct rx_packet *p = (struct rx_packet *) 0;
650 register unsigned delta;
653 mud = call->MTU - RX_HEADER_SIZE;
654 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
655 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
657 while (!(call->error)) {
658 MUTEX_ENTER(&rx_freePktQ_lock);
659 /* if an error occurred, or we get the packet we want, we're done */
660 if (p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND)) {
661 MUTEX_EXIT(&rx_freePktQ_lock);
664 want = MIN(want, mud);
666 if ((unsigned) want > p->length)
667 (void) rxi_AllocDataBuf(p, (want - p->length),
668 RX_PACKET_CLASS_SEND_CBUF);
670 if ((unsigned) p->length > mud)
673 if (delta >= p->length) {
682 /* no error occurred, and we didn't get a packet, so we sleep.
683 * At this point, we assume that packets will be returned
684 * sooner or later, as packets are acknowledged, and so we
687 call->flags |= RX_CALL_WAIT_PACKETS;
688 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
689 MUTEX_EXIT(&call->lock);
690 rx_waitingForPackets = 1;
692 #ifdef RX_ENABLE_LOCKS
693 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
695 osi_rxSleep(&rx_waitingForPackets);
697 MUTEX_EXIT(&rx_freePktQ_lock);
698 MUTEX_ENTER(&call->lock);
699 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
700 call->flags &= ~RX_CALL_WAIT_PACKETS;
709 /* count the number of used FDs */
710 static int CountFDs(amax)
713 register int i, code;
717 for(i=0;i<amax;i++) {
718 code = fstat(i, &tstat);
719 if (code == 0) count++;
726 #define CountFDs(amax) amax
730 #if !defined(KERNEL) || defined(UKERNEL)
732 /* This function reads a single packet from the interface into the
733 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
734 * (host,port) of the sender are stored in the supplied variables, and
735 * the data length of the packet is stored in the packet structure.
736 * The header is decoded. */
737 int rxi_ReadPacket(socket, p, host, port)
739 register struct rx_packet *p;
743 struct sockaddr_in from;
746 register afs_int32 tlen, savelen;
748 rx_computelen(p, tlen);
749 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
751 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
752 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
753 * it once in order to avoid races. */
756 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
764 /* Extend the last iovec for padding, it's just to make sure that the
765 * read doesn't return more data than we expect, and is done to get around
766 * our problems caused by the lack of a length field in the rx header.
767 * Use the extra buffer that follows the localdata in each packet
769 savelen = p->wirevec[p->niovecs].iov_len;
770 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
772 bzero((char *)&msg, sizeof(msg));
773 msg.msg_name = (char *) &from;
774 msg.msg_namelen = sizeof(struct sockaddr_in);
775 msg.msg_iov = p->wirevec;
776 msg.msg_iovlen = p->niovecs;
777 nbytes = rxi_Recvmsg(socket, &msg, 0);
779 /* restore the vec to its correct state */
780 p->wirevec[p->niovecs].iov_len = savelen;
782 p->length = (nbytes - RX_HEADER_SIZE);
783 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
785 rxi_MorePackets(rx_initSendWindow);
787 else if (nbytes < 0 && errno == EWOULDBLOCK) {
788 MUTEX_ENTER(&rx_stats_mutex);
789 rx_stats.noPacketOnRead++;
790 MUTEX_EXIT(&rx_stats_mutex);
794 MUTEX_ENTER(&rx_stats_mutex);
795 rx_stats.bogusPacketOnRead++;
796 rx_stats.bogusHost = from.sin_addr.s_addr;
797 MUTEX_EXIT(&rx_stats_mutex);
798 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
799 from.sin_port,nbytes));
804 /* Extract packet header. */
805 rxi_DecodePacketHeader(p);
807 *host = from.sin_addr.s_addr;
808 *port = from.sin_port;
809 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
810 struct rx_peer *peer;
811 MUTEX_ENTER(&rx_stats_mutex);
812 rx_stats.packetsRead[p->header.type-1]++;
813 MUTEX_EXIT(&rx_stats_mutex);
815 * Try to look up this peer structure. If it doesn't exist,
816 * don't create a new one -
817 * we don't keep count of the bytes sent/received if a peer
818 * structure doesn't already exist.
820 * The peer/connection cleanup code assumes that there is 1 peer
821 * per connection. If we actually created a peer structure here
822 * and this packet was an rxdebug packet, the peer structure would
823 * never be cleaned up.
825 peer = rxi_FindPeer(*host, *port, 0, 0);
827 MUTEX_ENTER(&peer->peer_lock);
828 hadd32(peer->bytesReceived, p->length);
829 MUTEX_EXIT(&peer->peer_lock);
833 /* Free any empty packet buffers at the end of this packet */
834 rxi_TrimDataBufs(p, 1);
840 #endif /* !KERNEL || UKERNEL */
842 /* This function splits off the first packet in a jumbo packet.
843 * As of AFS 3.5, jumbograms contain more than one fixed size
844 * packet, and the RX_JUMBO_PACKET flag is set in all but the
845 * last packet header. All packets (except the last) are padded to
846 * fall on RX_CBUFFERSIZE boundaries.
847 * HACK: We store the length of the first n-1 packets in the
848 * last two pad bytes. */
850 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
851 register struct rx_packet *p;
856 struct rx_packet *np;
857 struct rx_jumboHeader *jp;
863 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
864 * bytes in length. All but the first packet are preceded by
865 * an abbreviated four byte header. The length of the last packet
866 * is calculated from the size of the jumbogram. */
867 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
869 if ((int)p->length < length) {
870 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
873 niov = p->niovecs - 2;
875 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
878 iov = &p->wirevec[2];
879 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
881 /* Get a pointer to the abbreviated packet header */
882 jp = (struct rx_jumboHeader *)
883 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
885 /* Set up the iovecs for the next packet */
886 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
887 np->wirevec[0].iov_len = sizeof(struct rx_header);
888 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
889 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
890 np->niovecs = niov+1;
891 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
892 np->wirevec[i] = *iov;
894 np->length = p->length - length;
895 p->length = RX_JUMBOBUFFERSIZE;
898 /* Convert the jumbo packet header to host byte order */
899 temp = ntohl(*(afs_uint32 *)jp);
900 jp->flags = (u_char)(temp >> 24);
901 jp->cksum = (u_short)(temp);
903 /* Fill in the packet header */
904 np->header = p->header;
905 np->header.serial = p->header.serial + 1;
906 np->header.seq = p->header.seq + 1;
907 np->header.flags = jp->flags;
908 np->header.spare = jp->cksum;
914 /* Send a udp datagram */
915 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
925 memset(&msg, 0, sizeof(msg));
927 msg.msg_iovlen = nvecs;
929 msg.msg_namelen = sizeof(struct sockaddr_in);
931 rxi_Sendmsg(socket, &msg, 0);
935 #elif !defined(UKERNEL)
936 /* osi_NetSend is defined in afs/afs_osinet.c
937 * message receipt is done in rxk_input or rx_put.
942 * Copy an mblock to the contiguous area pointed to by cp.
943 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
944 * but it doesn't really.
945 * Returns the number of bytes not transferred.
946 * The message is NOT changed.
948 static int cpytoc(mp, off, len, cp)
950 register int off, len;
955 for (;mp && len > 0; mp = mp->b_cont) {
956 if (mp->b_datap->db_type != M_DATA) {
959 n = MIN(len, (mp->b_wptr - mp->b_rptr));
960 bcopy((char *)mp->b_rptr, cp, n);
968 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
969 * but it doesn't really.
970 * This sucks, anyway, do it like m_cpy.... below
972 static int cpytoiovec(mp, off, len, iovs, niovs)
975 register struct iovec *iovs;
977 register int m,n,o,t,i;
979 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
980 if (mp->b_datap->db_type != M_DATA) {
983 n = MIN(len, (mp->b_wptr - mp->b_rptr));
992 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1001 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1002 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1004 #if !defined(AFS_LINUX20_ENV)
1005 static int m_cpytoiovec(m, off, len, iovs, niovs)
1007 int off, len, niovs;
1008 struct iovec iovs[];
1011 unsigned int l1, l2, i, t;
1013 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1014 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1017 if (m->m_len <= off) {
1027 p1 = mtod(m, caddr_t)+off;
1028 l1 = m->m_len - off;
1030 p2 = iovs[0].iov_base;
1031 l2 = iovs[0].iov_len;
1034 t = MIN(l1, MIN(l2, (unsigned int)len));
1043 p1 = mtod(m, caddr_t);
1049 p2 = iovs[i].iov_base;
1050 l2 = iovs[i].iov_len;
1058 #endif /* AFS_SUN5_ENV */
1060 #if !defined(AFS_LINUX20_ENV)
1061 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1068 struct rx_packet *phandle;
1069 int hdr_len, data_len;
1073 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1079 #endif /*KERNEL && !UKERNEL*/
1082 /* send a response to a debug packet */
1084 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1088 register struct rx_packet *ap;
1091 struct rx_debugIn tin;
1093 struct rx_serverQueueEntry *np, *nqe;
1095 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1096 /* all done with packet, now set length to the truth, so we can
1097 * reuse this packet */
1098 rx_computelen(ap, ap->length);
1100 tin.type = ntohl(tin.type);
1101 tin.index = ntohl(tin.index);
1103 case RX_DEBUGI_GETSTATS: {
1104 struct rx_debugStats tstat;
1106 /* get basic stats */
1107 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1108 tstat.version = RX_DEBUGI_VERSION;
1109 #ifndef RX_ENABLE_LOCKS
1110 tstat.waitingForPackets = rx_waitingForPackets;
1112 tstat.nFreePackets = htonl(rx_nFreePackets);
1113 tstat.callsExecuted = htonl(rxi_nCalls);
1114 tstat.packetReclaims = htonl(rx_packetReclaims);
1115 tstat.usedFDs = CountFDs(64);
1116 tstat.nWaiting = htonl(rx_nWaiting);
1117 queue_Count( &rx_idleServerQueue, np, nqe,
1118 rx_serverQueueEntry, tstat.idleThreads);
1119 tstat.idleThreads = htonl(tstat.idleThreads);
1120 tl = sizeof(struct rx_debugStats) - ap->length;
1122 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1125 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1126 ap->length = sizeof(struct rx_debugStats);
1127 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1128 rx_computelen(ap, ap->length);
1133 case RX_DEBUGI_GETALLCONN:
1134 case RX_DEBUGI_GETCONN: {
1136 register struct rx_connection *tc;
1137 struct rx_call *tcall;
1138 struct rx_debugConn tconn;
1139 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1142 tl = sizeof(struct rx_debugConn) - ap->length;
1144 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1148 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1149 /* get N'th (maybe) "interesting" connection info */
1150 for(i=0;i<rx_hashTableSize;i++) {
1151 #if !defined(KERNEL)
1152 /* the time complexity of the algorithm used here
1153 * exponentially increses with the number of connections.
1155 #ifdef AFS_PTHREAD_ENV
1158 (void) IOMGR_Poll();
1161 MUTEX_ENTER(&rx_connHashTable_lock);
1162 /* We might be slightly out of step since we are not
1163 * locking each call, but this is only debugging output.
1165 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1166 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1167 tconn.host = tc->peer->host;
1168 tconn.port = tc->peer->port;
1169 tconn.cid = htonl(tc->cid);
1170 tconn.epoch = htonl(tc->epoch);
1171 tconn.serial = htonl(tc->serial);
1172 for(j=0;j<RX_MAXCALLS;j++) {
1173 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1174 if (tcall=tc->call[j]) {
1175 tconn.callState[j] = tcall->state;
1176 tconn.callMode[j] = tcall->mode;
1177 tconn.callFlags[j] = tcall->flags;
1178 if (queue_IsNotEmpty(&tcall->rq))
1179 tconn.callOther[j] |= RX_OTHER_IN;
1180 if (queue_IsNotEmpty(&tcall->tq))
1181 tconn.callOther[j] |= RX_OTHER_OUT;
1183 else tconn.callState[j] = RX_STATE_NOTINIT;
1186 tconn.natMTU = htonl(tc->peer->natMTU);
1187 tconn.error = htonl(tc->error);
1188 tconn.flags = tc->flags;
1189 tconn.type = tc->type;
1190 tconn.securityIndex = tc->securityIndex;
1191 if (tc->securityObject) {
1192 RXS_GetStats (tc->securityObject, tc,
1194 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1195 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1198 DOHTONL(packetsReceived);
1199 DOHTONL(packetsSent);
1200 DOHTONL(bytesReceived);
1203 i<sizeof(tconn.secStats.spares)/sizeof(short);
1207 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1212 MUTEX_EXIT(&rx_connHashTable_lock);
1213 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1215 ap->length = sizeof(struct rx_debugConn);
1216 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1221 MUTEX_EXIT(&rx_connHashTable_lock);
1223 /* if we make it here, there are no interesting packets */
1224 tconn.cid = htonl(0xffffffff); /* means end */
1225 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1227 ap->length = sizeof(struct rx_debugConn);
1228 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1234 * Pass back all the peer structures we have available
1237 case RX_DEBUGI_GETPEER: {
1239 register struct rx_peer *tp;
1240 struct rx_debugPeer tpeer;
1243 tl = sizeof(struct rx_debugPeer) - ap->length;
1245 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1249 bzero ((char *)&tpeer, sizeof(tpeer));
1250 for(i=0;i<rx_hashTableSize;i++) {
1251 #if !defined(KERNEL)
1252 /* the time complexity of the algorithm used here
1253 * exponentially increses with the number of peers.
1255 * Yielding after processing each hash table entry
1256 * and dropping rx_peerHashTable_lock.
1257 * also increases the risk that we will miss a new
1258 * entry - but we are willing to live with this
1259 * limitation since this is meant for debugging only
1261 #ifdef AFS_PTHREAD_ENV
1264 (void) IOMGR_Poll();
1267 MUTEX_ENTER(&rx_peerHashTable_lock);
1268 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1269 if (tin.index-- <= 0) {
1270 tpeer.host = tp->host;
1271 tpeer.port = tp->port;
1272 tpeer.ifMTU = htons(tp->ifMTU);
1273 tpeer.idleWhen = htonl(tp->idleWhen);
1274 tpeer.refCount = htons(tp->refCount);
1275 tpeer.burstSize = tp->burstSize;
1276 tpeer.burst = tp->burst;
1277 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1278 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1279 tpeer.rtt = htonl(tp->rtt);
1280 tpeer.rtt_dev = htonl(tp->rtt_dev);
1281 tpeer.timeout.sec = htonl(tp->timeout.sec);
1282 tpeer.timeout.usec = htonl(tp->timeout.usec);
1283 tpeer.nSent = htonl(tp->nSent);
1284 tpeer.reSends = htonl(tp->reSends);
1285 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1286 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1287 tpeer.rateFlag = htonl(tp->rateFlag);
1288 tpeer.natMTU = htons(tp->natMTU);
1289 tpeer.maxMTU = htons(tp->maxMTU);
1290 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1291 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1292 tpeer.MTU = htons(tp->MTU);
1293 tpeer.cwind = htons(tp->cwind);
1294 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1295 tpeer.congestSeq = htons(tp->congestSeq);
1296 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1297 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1298 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1299 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1301 MUTEX_EXIT(&rx_peerHashTable_lock);
1302 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1304 ap->length = sizeof(struct rx_debugPeer);
1305 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1310 MUTEX_EXIT(&rx_peerHashTable_lock);
1312 /* if we make it here, there are no interesting packets */
1313 tpeer.host = htonl(0xffffffff); /* means end */
1314 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1316 ap->length = sizeof(struct rx_debugPeer);
1317 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1322 case RX_DEBUGI_RXSTATS: {
1326 tl = sizeof(rx_stats) - ap->length;
1328 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1332 /* Since its all int32s convert to network order with a loop. */
1333 MUTEX_ENTER(&rx_stats_mutex);
1334 s = (afs_int32 *)&rx_stats;
1335 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1336 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1339 ap->length = sizeof(rx_stats);
1340 MUTEX_EXIT(&rx_stats_mutex);
1341 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1347 /* error response packet */
1348 tin.type = htonl(RX_DEBUGI_BADTYPE);
1349 tin.index = tin.type;
1350 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1352 ap->length = sizeof(struct rx_debugIn);
1353 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1360 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1364 register struct rx_packet *ap;
1368 rx_packetwrite(ap, 0, 65, cml_version_number+4);
1371 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1377 /* send a debug packet back to the sender */
1378 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1379 afs_int32 ahost, short aport, afs_int32 istack)
1381 struct sockaddr_in taddr;
1387 int waslocked = ISAFS_GLOCK();
1390 taddr.sin_family = AF_INET;
1391 taddr.sin_port = aport;
1392 taddr.sin_addr.s_addr = ahost;
1395 /* We need to trim the niovecs. */
1396 nbytes = apacket->length;
1397 for (i=1; i < apacket->niovecs; i++) {
1398 if (nbytes <= apacket->wirevec[i].iov_len) {
1399 savelen = apacket->wirevec[i].iov_len;
1400 saven = apacket->niovecs;
1401 apacket->wirevec[i].iov_len = nbytes;
1402 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1404 else nbytes -= apacket->wirevec[i].iov_len;
1408 if (waslocked) AFS_GUNLOCK();
1410 /* debug packets are not reliably delivered, hence the cast below. */
1411 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1412 apacket->length+RX_HEADER_SIZE, istack);
1414 if (waslocked) AFS_GLOCK();
1417 if (saven) { /* means we truncated the packet above. */
1418 apacket->wirevec[i-1].iov_len = savelen;
1419 apacket->niovecs = saven;
1424 /* Send the packet to appropriate destination for the specified
1425 * connection. The header is first encoded and placed in the packet.
1427 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1431 struct sockaddr_in addr;
1432 register struct rx_peer *peer = conn->peer;
1435 char deliveryType = 'S';
1437 /* The address we're sending the packet to */
1438 addr.sin_family = AF_INET;
1439 addr.sin_port = peer->port;
1440 addr.sin_addr.s_addr = peer->host;
1442 /* This stuff should be revamped, I think, so that most, if not
1443 * all, of the header stuff is always added here. We could
1444 * probably do away with the encode/decode routines. XXXXX */
1446 /* Stamp each packet with a unique serial number. The serial
1447 * number is maintained on a connection basis because some types
1448 * of security may be based on the serial number of the packet,
1449 * and security is handled on a per authenticated-connection
1451 /* Pre-increment, to guarantee no zero serial number; a zero
1452 * serial number means the packet was never sent. */
1453 MUTEX_ENTER(&conn->conn_data_lock);
1454 p->header.serial = ++conn->serial;
1455 MUTEX_EXIT(&conn->conn_data_lock);
1456 /* This is so we can adjust retransmit time-outs better in the face of
1457 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1459 if (p->firstSerial == 0) {
1460 p->firstSerial = p->header.serial;
1464 /* If an output tracer function is defined, call it with the packet and
1465 * network address. Note this function may modify its arguments. */
1466 if (rx_almostSent) {
1467 int drop = (*rx_almostSent) (p, &addr);
1468 /* drop packet if return value is non-zero? */
1469 if (drop) deliveryType = 'D'; /* Drop the packet */
1473 /* Get network byte order header */
1474 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1475 * touch ALL the fields */
1477 /* Send the packet out on the same socket that related packets are being
1479 socket = (conn->type == RX_CLIENT_CONNECTION
1480 ? rx_socket : conn->service->socket);
1483 /* Possibly drop this packet, for testing purposes */
1484 if ((deliveryType == 'D') ||
1485 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1486 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1487 deliveryType = 'D'; /* Drop the packet */
1490 deliveryType = 'S'; /* Send the packet */
1491 #endif /* RXDEBUG */
1493 /* Loop until the packet is sent. We'd prefer just to use a
1494 * blocking socket, but unfortunately the interface doesn't
1495 * allow us to have the socket block in send mode, and not
1496 * block in receive mode */
1499 waslocked = ISAFS_GLOCK();
1500 if (waslocked) AFS_GUNLOCK();
1502 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1503 p->length+RX_HEADER_SIZE, istack)){
1504 /* send failed, so let's hurry up the resend, eh? */
1505 MUTEX_ENTER(&rx_stats_mutex);
1506 rx_stats.netSendFailures++;
1507 MUTEX_EXIT(&rx_stats_mutex);
1508 p->retryTime = p->timeSent; /* resend it very soon */
1509 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1512 if (waslocked) AFS_GLOCK();
1517 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1518 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1519 peer->host, peer->port, p->header.serial, p->header.epoch,
1520 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1521 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1523 MUTEX_ENTER(&rx_stats_mutex);
1524 rx_stats.packetsSent[p->header.type-1]++;
1525 MUTEX_EXIT(&rx_stats_mutex);
1526 MUTEX_ENTER(&peer->peer_lock);
1527 hadd32(peer->bytesSent, p->length);
1528 MUTEX_EXIT(&peer->peer_lock);
1531 /* Send a list of packets to appropriate destination for the specified
1532 * connection. The headers are first encoded and placed in the packets.
1534 void rxi_SendPacketList(struct rx_connection * conn,
1535 struct rx_packet **list,
1540 struct sockaddr_in addr;
1541 register struct rx_peer *peer = conn->peer;
1543 struct rx_packet *p;
1544 struct iovec wirevec[RX_MAXIOVECS];
1548 struct rx_jumboHeader *jp;
1550 char deliveryType = 'S';
1552 /* The address we're sending the packet to */
1553 addr.sin_family = AF_INET;
1554 addr.sin_port = peer->port;
1555 addr.sin_addr.s_addr = peer->host;
1557 if (len+1 > RX_MAXIOVECS) {
1558 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1562 * Stamp the packets in this jumbogram with consecutive serial numbers
1564 MUTEX_ENTER(&conn->conn_data_lock);
1565 serial = conn->serial;
1566 conn->serial += len;
1567 MUTEX_EXIT(&conn->conn_data_lock);
1570 /* This stuff should be revamped, I think, so that most, if not
1571 * all, of the header stuff is always added here. We could
1572 * probably do away with the encode/decode routines. XXXXX */
1575 length = RX_HEADER_SIZE;
1576 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1577 wirevec[0].iov_len = RX_HEADER_SIZE;
1578 for (i = 0 ; i < len ; i++) {
1581 /* The whole 3.5 jumbogram scheme relies on packets fitting
1582 * in a single packet buffer. */
1583 if (p->niovecs > 2) {
1584 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1587 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1590 if (p->length != RX_JUMBOBUFFERSIZE) {
1591 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1593 p->header.flags |= RX_JUMBO_PACKET;
1594 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1595 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1597 wirevec[i+1].iov_len = p->length;
1598 length += p->length;
1600 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1602 /* Convert jumbo packet header to network byte order */
1603 temp = (afs_uint32)(p->header.flags) << 24;
1604 temp |= (afs_uint32)(p->header.spare);
1605 *(afs_uint32 *)jp = htonl(temp);
1607 jp = (struct rx_jumboHeader *)
1608 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1610 /* Stamp each packet with a unique serial number. The serial
1611 * number is maintained on a connection basis because some types
1612 * of security may be based on the serial number of the packet,
1613 * and security is handled on a per authenticated-connection
1615 /* Pre-increment, to guarantee no zero serial number; a zero
1616 * serial number means the packet was never sent. */
1617 p->header.serial = ++serial;
1618 /* This is so we can adjust retransmit time-outs better in the face of
1619 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1621 if (p->firstSerial == 0) {
1622 p->firstSerial = p->header.serial;
1626 /* If an output tracer function is defined, call it with the packet and
1627 * network address. Note this function may modify its arguments. */
1628 if (rx_almostSent) {
1629 int drop = (*rx_almostSent) (p, &addr);
1630 /* drop packet if return value is non-zero? */
1631 if (drop) deliveryType = 'D'; /* Drop the packet */
1635 /* Get network byte order header */
1636 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1637 * touch ALL the fields */
1640 /* Send the packet out on the same socket that related packets are being
1642 socket = (conn->type == RX_CLIENT_CONNECTION
1643 ? rx_socket : conn->service->socket);
1646 /* Possibly drop this packet, for testing purposes */
1647 if ((deliveryType == 'D') ||
1648 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1649 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1650 deliveryType = 'D'; /* Drop the packet */
1653 deliveryType = 'S'; /* Send the packet */
1654 #endif /* RXDEBUG */
1656 /* Loop until the packet is sent. We'd prefer just to use a
1657 * blocking socket, but unfortunately the interface doesn't
1658 * allow us to have the socket block in send mode, and not
1659 * block in receive mode */
1661 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1662 waslocked = ISAFS_GLOCK();
1663 if (!istack && waslocked) AFS_GUNLOCK();
1665 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1666 /* send failed, so let's hurry up the resend, eh? */
1667 MUTEX_ENTER(&rx_stats_mutex);
1668 rx_stats.netSendFailures++;
1669 MUTEX_EXIT(&rx_stats_mutex);
1670 for (i = 0 ; i < len ; i++) {
1672 p->retryTime = p->timeSent; /* resend it very soon */
1673 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1676 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1677 if (!istack && waslocked) AFS_GLOCK();
1682 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1683 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1684 peer->host, peer->port, p->header.serial, p->header.epoch,
1685 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1686 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1688 MUTEX_ENTER(&rx_stats_mutex);
1689 rx_stats.packetsSent[p->header.type-1]++;
1690 MUTEX_EXIT(&rx_stats_mutex);
1691 MUTEX_ENTER(&peer->peer_lock);
1692 hadd32(peer->bytesSent, p->length);
1693 MUTEX_EXIT(&peer->peer_lock);
1697 /* Send a "special" packet to the peer connection. If call is
1698 * specified, then the packet is directed to a specific call channel
1699 * associated with the connection, otherwise it is directed to the
1700 * connection only. Uses optionalPacket if it is supplied, rather than
1701 * allocating a new packet buffer. Nbytes is the length of the data
1702 * portion of the packet. If data is non-null, nbytes of data are
1703 * copied into the packet. Type is the type of the packet, as defined
1704 * in rx.h. Bug: there's a lot of duplication between this and other
1705 * routines. This needs to be cleaned up. */
1707 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1708 register struct rx_call *call;
1709 register struct rx_connection *conn;
1710 struct rx_packet *optionalPacket;
1715 /* Some of the following stuff should be common code for all
1716 * packet sends (it's repeated elsewhere) */
1717 register struct rx_packet *p;
1719 int savelen, saven = 0;
1720 int channel, callNumber;
1722 channel = call->channel;
1723 callNumber = *call->callNumber;
1724 /* BUSY packets refer to the next call on this connection */
1725 if (type == RX_PACKET_TYPE_BUSY) {
1734 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1735 if (!p) osi_Panic("rxi_SendSpecial failure");
1742 p->header.serviceId = conn->serviceId;
1743 p->header.securityIndex = conn->securityIndex;
1744 p->header.cid = (conn->cid | channel);
1745 p->header.callNumber = callNumber;
1747 p->header.epoch = conn->epoch;
1748 p->header.type = type;
1749 p->header.flags = 0;
1750 if (conn->type == RX_CLIENT_CONNECTION)
1751 p->header.flags |= RX_CLIENT_INITIATED;
1753 rx_packetwrite(p, 0, nbytes, data);
1755 for (i=1; i < p->niovecs; i++) {
1756 if (nbytes <= p->wirevec[i].iov_len) {
1757 savelen = p->wirevec[i].iov_len;
1759 p->wirevec[i].iov_len = nbytes;
1760 p->niovecs = i+1; /* so condition fails because i == niovecs */
1762 else nbytes -= p->wirevec[i].iov_len;
1765 if (call) rxi_Send(call, p, istack);
1766 else rxi_SendPacket(conn, p, istack);
1767 if (saven) { /* means we truncated the packet above. We probably don't */
1768 /* really need to do this, but it seems safer this way, given that */
1769 /* sneaky optionalPacket... */
1770 p->wirevec[i-1].iov_len = savelen;
1773 if (!optionalPacket) rxi_FreePacket(p);
1774 return optionalPacket;
1778 /* Encode the packet's header (from the struct header in the packet to
1779 * the net byte order representation in the wire representation of the
1780 * packet, which is what is actually sent out on the wire) */
1781 void rxi_EncodePacketHeader(p)
1782 register struct rx_packet *p;
1784 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1786 bzero((char *)buf, RX_HEADER_SIZE);
1787 *buf++ = htonl(p->header.epoch);
1788 *buf++ = htonl(p->header.cid);
1789 *buf++ = htonl(p->header.callNumber);
1790 *buf++ = htonl(p->header.seq);
1791 *buf++ = htonl(p->header.serial);
1792 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1793 | (((afs_uint32)p->header.flags)<<16)
1794 | (p->header.userStatus<<8) | p->header.securityIndex);
1795 /* Note: top 16 bits of this next word were reserved */
1796 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1799 /* Decode the packet's header (from net byte order to a struct header) */
1800 void rxi_DecodePacketHeader(p)
1801 register struct rx_packet *p;
1803 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1806 p->header.epoch = ntohl(*buf++);
1807 p->header.cid = ntohl(*buf++);
1808 p->header.callNumber = ntohl(*buf++);
1809 p->header.seq = ntohl(*buf++);
1810 p->header.serial = ntohl(*buf++);
1811 temp = ntohl(*buf++);
1812 /* C will truncate byte fields to bytes for me */
1813 p->header.type = temp>>24;
1814 p->header.flags = temp>>16;
1815 p->header.userStatus = temp>>8;
1816 p->header.securityIndex = temp>>0;
1817 temp = ntohl(*buf++);
1818 p->header.serviceId = (temp&0xffff);
1819 p->header.spare = temp>>16;
1820 /* Note: top 16 bits of this last word are the security checksum */
1823 void rxi_PrepareSendPacket(call, p, last)
1824 register struct rx_call *call;
1825 register struct rx_packet *p;
1828 register struct rx_connection *conn = call->conn;
1830 ssize_t len; /* len must be a signed type; it can go negative */
1833 p->header.cid = (conn->cid | call->channel);
1834 p->header.serviceId = conn->serviceId;
1835 p->header.securityIndex = conn->securityIndex;
1836 p->header.callNumber = *call->callNumber;
1837 p->header.seq = call->tnext++;
1838 p->header.epoch = conn->epoch;
1839 p->header.type = RX_PACKET_TYPE_DATA;
1840 p->header.flags = 0;
1841 p->header.spare = 0;
1842 if (conn->type == RX_CLIENT_CONNECTION)
1843 p->header.flags |= RX_CLIENT_INITIATED;
1846 p->header.flags |= RX_LAST_PACKET;
1848 clock_Zero(&p->retryTime); /* Never yet transmitted */
1849 clock_Zero(&p->firstSent); /* Never yet transmitted */
1850 p->header.serial = 0; /* Another way of saying never transmitted... */
1853 /* Now that we're sure this is the last data on the call, make sure
1854 * that the "length" and the sum of the iov_lens matches. */
1855 len = p->length + call->conn->securityHeaderSize;
1857 for (i=1; i < p->niovecs && len > 0; i++) {
1858 len -= p->wirevec[i].iov_len;
1861 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1864 /* Free any extra elements in the wirevec */
1865 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1866 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1869 p->wirevec[i-1].iov_len += len;
1871 RXS_PreparePacket(conn->securityObject, call, p);
1874 /* Given an interface MTU size, calculate an adjusted MTU size that
1875 * will make efficient use of the RX buffers when the peer is sending
1876 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1877 int rxi_AdjustIfMTU(int mtu)
1882 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1883 if (mtu <= adjMTU) {
1890 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1891 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1894 /* Given an interface MTU size, and the peer's advertised max receive
1895 * size, calculate an adjisted maxMTU size that makes efficient use
1896 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1897 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1899 int maxMTU = mtu * rxi_nSendFrags;
1900 maxMTU = MIN(maxMTU, peerMaxMTU);
1901 return rxi_AdjustIfMTU(maxMTU);
1904 /* Given a packet size, figure out how many datagram packet will fit.
1905 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1906 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1907 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1908 int rxi_AdjustDgramPackets(int frags, int mtu)
1911 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1914 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1915 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1916 /* subtract the size of the first and last packets */
1917 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1921 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));