2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "../afs/param.h"
14 #include <afs/param.h>
21 #include "../afs/sysincludes.h"
22 #include "../afs/afsincludes.h"
23 #include "../rx/rx_kcommon.h"
24 #include "../rx/rx_clock.h"
25 #include "../rx/rx_queue.h"
26 #include "../rx/rx_packet.h"
27 #else /* defined(UKERNEL) */
28 #include "../h/types.h"
29 #ifndef AFS_LINUX20_ENV
30 #include "../h/systm.h"
32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
33 #include "../afs/sysincludes.h"
35 #include "../h/socket.h"
36 #include "../netinet/in.h"
37 #include "../afs/afs_osi.h"
38 #include "../rx/rx_kmutex.h"
39 #include "../rx/rx_clock.h"
40 #include "../rx/rx_queue.h"
42 #include <sys/sysmacros.h>
44 #include "../rx/rx_packet.h"
45 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
46 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
47 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "../h/mbuf.h"
51 #endif /* defined(UKERNEL) */
52 #include "../rx/rx_globals.h"
54 #include "sys/types.h"
57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
61 #include <sys/socket.h>
62 #include <netinet/in.h>
63 #endif /* AFS_NT40_ENV */
64 #include "rx_xmit_nt.h"
67 #include <sys/socket.h>
68 #include <netinet/in.h>
74 #include <sys/sysmacros.h>
76 #include "rx_packet.h"
77 #include "rx_globals.h"
79 #include "rx_internal.h"
93 /* rxdb_fileID is used to identify the lock location, along with line#. */
94 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
95 #endif /* RX_LOCKS_DB */
96 struct rx_packet *rx_mallocedP = 0;
98 extern char cml_version_number[];
99 extern int (*rx_almostSent)();
101 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
102 afs_int32 ahost, short aport, afs_int32 istack);
104 /* some rules about packets:
105 * 1. When a packet is allocated, the final iov_buf contains room for
106 * a security trailer, but iov_len masks that fact. If the security
107 * package wants to add the trailer, it may do so, and then extend
108 * iov_len appropriately. For this reason, packet's niovecs and
109 * iov_len fields should be accurate before calling PreparePacket.
113 * all packet buffers (iov_base) are integral multiples of
115 * offset is an integral multiple of the word size.
117 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
121 for (l=0, i=1; i< packet->niovecs ; i++ ) {
122 if (l + packet->wirevec[i].iov_len > offset) {
123 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
125 l += packet->wirevec[i].iov_len;
132 * all packet buffers (iov_base) are integral multiples of the word size.
133 * offset is an integral multiple of the word size.
135 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
139 for (l=0, i=1; i< packet->niovecs ; i++ ) {
140 if (l + packet->wirevec[i].iov_len > offset) {
141 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the
154 * offset is an integral multiple of the word size.
156 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
158 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
159 int resid, char *out)
161 unsigned int i, j, l, r;
162 for (l=0, i=1; i< packet->niovecs ; i++ ) {
163 if (l + packet->wirevec[i].iov_len > offset) {
166 l += packet->wirevec[i].iov_len;
169 /* i is the iovec which contains the first little bit of data in which we
170 * are interested. l is the total length of everything prior to this iovec.
171 * j is the number of bytes we can safely copy out of this iovec.
174 while ((resid > 0) && (i < packet->niovecs)) {
175 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
176 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
178 l += packet->wirevec[i].iov_len;
182 return (resid ? (r - resid) : r);
187 * all packet buffers (iov_base) are integral multiples of the
189 * offset is an integral multiple of the word size.
191 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
197 for (l=0, i=1; i < packet->niovecs; i++ ) {
198 if (l + packet->wirevec[i].iov_len > offset) {
201 l += packet->wirevec[i].iov_len;
204 /* i is the iovec which contains the first little bit of data in which we
205 * are interested. l is the total length of everything prior to this iovec.
206 * j is the number of bytes we can safely copy out of this iovec.
209 while ((resid > 0) && (i < RX_MAXWVECS)) {
210 if (i >= packet->niovecs)
211 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
214 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
215 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
218 l += packet->wirevec[i].iov_len;
222 return (resid ? (r - resid) : r);
225 static struct rx_packet * allocCBuf(int class)
231 MUTEX_ENTER(&rx_freePktQ_lock);
234 if (rxi_OverQuota(class)) {
236 rxi_NeedMorePackets = TRUE;
237 MUTEX_ENTER(&rx_stats_mutex);
239 case RX_PACKET_CLASS_RECEIVE:
240 rx_stats.receivePktAllocFailures++;
242 case RX_PACKET_CLASS_SEND:
243 rx_stats.sendPktAllocFailures++;
245 case RX_PACKET_CLASS_SPECIAL:
246 rx_stats.specialPktAllocFailures++;
248 case RX_PACKET_CLASS_RECV_CBUF:
249 rx_stats.receiveCbufPktAllocFailures++;
251 case RX_PACKET_CLASS_SEND_CBUF:
252 rx_stats.sendCbufPktAllocFailures++;
255 MUTEX_EXIT(&rx_stats_mutex);
259 if (queue_IsEmpty(&rx_freePacketQueue)) {
261 rxi_NeedMorePackets = TRUE;
265 if (queue_IsEmpty(&rx_freePacketQueue)) {
266 rxi_MorePacketsNoLock(rx_initSendWindow);
271 c = queue_First(&rx_freePacketQueue, rx_packet);
273 if (c->header.flags != RX_FREE_PACKET)
274 osi_Panic("rxi_AllocPacket: packet not free\n");
280 MUTEX_EXIT(&rx_freePktQ_lock);
287 * Free a packet currently used as a continuation buffer
289 void rxi_freeCBuf(struct rx_packet *c)
294 MUTEX_ENTER(&rx_freePktQ_lock);
296 rxi_FreePacketNoLock(c);
297 /* Wakeup anyone waiting for packets */
300 MUTEX_EXIT(&rx_freePktQ_lock);
304 /* this one is kind of awful.
305 * In rxkad, the packet has been all shortened, and everything, ready for
306 * sending. All of a sudden, we discover we need some of that space back.
307 * This isn't terribly general, because it knows that the packets are only
308 * rounded up to the EBS (userdata + security header).
310 int rxi_RoundUpPacket(p, nb)
311 struct rx_packet * p;
316 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
317 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
318 p->wirevec[i].iov_len += nb;
323 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
324 p->wirevec[i].iov_len += nb;
331 /* get sufficient space to store nb bytes of data (or more), and hook
332 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
333 * returns the number of bytes >0 which it failed to come up with.
334 * Don't need to worry about locking on packet, since only
335 * one thread can manipulate one at a time. Locking on continution
336 * packets is handled by allocCBuf */
337 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
338 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
342 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
343 register struct rx_packet *cb;
344 if ((cb = allocCBuf(class))) {
345 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
346 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
347 nb -= RX_CBUFFERSIZE;
348 p->length += RX_CBUFFERSIZE;
357 /* Add more packet buffers */
358 void rxi_MorePackets(int apackets)
360 struct rx_packet *p, *e;
364 getme = apackets * sizeof(struct rx_packet);
365 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
367 PIN(p, getme); /* XXXXX */
368 bzero((char *)p, getme);
371 MUTEX_ENTER(&rx_freePktQ_lock);
373 for (e = p + apackets; p<e; p++) {
374 p->wirevec[0].iov_base = (char *) (p->wirehead);
375 p->wirevec[0].iov_len = RX_HEADER_SIZE;
376 p->wirevec[1].iov_base = (char *) (p->localdata);
377 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
378 p->header.flags = RX_FREE_PACKET;
381 queue_Append(&rx_freePacketQueue, p);
383 rx_nFreePackets += apackets;
384 rxi_NeedMorePackets = FALSE;
388 MUTEX_EXIT(&rx_freePktQ_lock);
393 /* Add more packet buffers */
394 void rxi_MorePacketsNoLock(int apackets)
396 struct rx_packet *p, *e;
399 /* allocate enough packets that 1/4 of the packets will be able
400 * to hold maximal amounts of data */
401 apackets += (apackets/4)
402 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
403 getme = apackets * sizeof(struct rx_packet);
404 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
406 bzero((char *)p, getme);
408 for (e = p + apackets; p<e; p++) {
409 p->wirevec[0].iov_base = (char *) (p->wirehead);
410 p->wirevec[0].iov_len = RX_HEADER_SIZE;
411 p->wirevec[1].iov_base = (char *) (p->localdata);
412 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
413 p->header.flags = RX_FREE_PACKET;
416 queue_Append(&rx_freePacketQueue, p);
418 rx_nFreePackets += apackets;
419 rxi_NeedMorePackets = FALSE;
424 void rxi_FreeAllPackets(void)
426 /* must be called at proper interrupt level, etcetera */
427 /* MTUXXX need to free all Packets */
428 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
429 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
432 /* Allocate more packets iff we need more continuation buffers */
433 /* In kernel, can't page in memory with interrupts disabled, so we
434 * don't use the event mechanism. */
435 void rx_CheckPackets()
437 if (rxi_NeedMorePackets) {
438 rxi_MorePackets(rx_initSendWindow);
442 /* In the packet freeing routine below, the assumption is that
443 we want all of the packets to be used equally frequently, so that we
444 don't get packet buffers paging out. It would be just as valid to
445 assume that we DO want them to page out if not many are being used.
446 In any event, we assume the former, and append the packets to the end
448 /* This explanation is bogus. The free list doesn't remain in any kind of
449 useful order for afs_int32: the packets in use get pretty much randomly scattered
450 across all the pages. In order to permit unused {packets,bufs} to page out, they
451 must be stored so that packets which are adjacent in memory are adjacent in the
452 free list. An array springs rapidly to mind.
455 /* Actually free the packet p. */
456 void rxi_FreePacketNoLock(struct rx_packet *p)
458 dpf(("Free %x\n", p));
460 if (p->header.flags & RX_FREE_PACKET)
461 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
463 p->header.flags = RX_FREE_PACKET;
464 queue_Append(&rx_freePacketQueue, p);
467 int rxi_FreeDataBufsNoLock(p, first)
468 struct rx_packet * p;
471 struct iovec *iov, *end;
473 if (first != 1) /* MTUXXX */
474 osi_Panic("FreeDataBufs 1: first must be 1");
475 iov = &p->wirevec[1];
476 end = iov + (p->niovecs-1);
477 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
478 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
479 for (iov++ ; iov < end ; iov++) {
481 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
482 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
490 int rxi_nBadIovecs = 0;
492 /* rxi_RestoreDataBufs
494 * Restore the correct sizes to the iovecs. Called when reusing a packet
495 * for reading off the wire.
497 void rxi_RestoreDataBufs(struct rx_packet *p)
500 struct iovec *iov = &p->wirevec[2];
502 p->wirevec[0].iov_base = (char *) (p->wirehead);
503 p->wirevec[0].iov_len = RX_HEADER_SIZE;
504 p->wirevec[1].iov_base = (char *) (p->localdata);
505 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
507 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
508 if (!iov->iov_base) {
513 iov->iov_len = RX_CBUFFERSIZE;
517 int rxi_TrimDataBufs(p, first)
518 struct rx_packet * p;
522 struct iovec *iov, *end;
526 osi_Panic("TrimDataBufs 1: first must be 1");
528 /* Skip over continuation buffers containing message data */
529 iov = &p->wirevec[2];
530 end = iov + (p->niovecs-2);
531 length = p->length - p->wirevec[1].iov_len;
532 for (; iov < end && length > 0 ; iov++) {
534 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
535 length -= iov->iov_len;
538 /* iov now points to the first empty data buffer. */
543 MUTEX_ENTER(&rx_freePktQ_lock);
545 for (; iov < end ; iov++) {
547 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
548 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
553 MUTEX_EXIT(&rx_freePktQ_lock);
559 /* Free the packet p. P is assumed not to be on any queue, i.e.
560 * remove it yourself first if you call this routine. */
561 void rxi_FreePacket(struct rx_packet *p)
566 MUTEX_ENTER(&rx_freePktQ_lock);
568 rxi_FreeDataBufsNoLock(p,1);
569 rxi_FreePacketNoLock(p);
570 /* Wakeup anyone waiting for packets */
573 MUTEX_EXIT(&rx_freePktQ_lock);
578 /* rxi_AllocPacket sets up p->length so it reflects the number of
579 * bytes in the packet at this point, **not including** the header.
580 * The header is absolutely necessary, besides, this is the way the
581 * length field is usually used */
582 struct rx_packet *rxi_AllocPacketNoLock(class)
585 register struct rx_packet *p;
588 if (rxi_OverQuota(class)) {
589 rxi_NeedMorePackets = TRUE;
590 MUTEX_ENTER(&rx_stats_mutex);
592 case RX_PACKET_CLASS_RECEIVE:
593 rx_stats.receivePktAllocFailures++;
595 case RX_PACKET_CLASS_SEND:
596 rx_stats.sendPktAllocFailures++;
598 case RX_PACKET_CLASS_SPECIAL:
599 rx_stats.specialPktAllocFailures++;
601 case RX_PACKET_CLASS_RECV_CBUF:
602 rx_stats.receiveCbufPktAllocFailures++;
604 case RX_PACKET_CLASS_SEND_CBUF:
605 rx_stats.sendCbufPktAllocFailures++;
608 MUTEX_EXIT(&rx_stats_mutex);
609 return (struct rx_packet *) 0;
613 MUTEX_ENTER(&rx_stats_mutex);
614 rx_stats.packetRequests++;
615 MUTEX_EXIT(&rx_stats_mutex);
618 if (queue_IsEmpty(&rx_freePacketQueue))
619 osi_Panic("rxi_AllocPacket error");
621 if (queue_IsEmpty(&rx_freePacketQueue))
622 rxi_MorePacketsNoLock(rx_initSendWindow);
626 p = queue_First(&rx_freePacketQueue, rx_packet);
627 if (p->header.flags != RX_FREE_PACKET)
628 osi_Panic("rxi_AllocPacket: packet not free\n");
630 dpf(("Alloc %x, class %d\n", p, class));
635 /* have to do this here because rx_FlushWrite fiddles with the iovs in
636 * order to truncate outbound packets. In the near future, may need
637 * to allocate bufs from a static pool here, and/or in AllocSendPacket
639 p->wirevec[0].iov_base = (char *) (p->wirehead);
640 p->wirevec[0].iov_len = RX_HEADER_SIZE;
641 p->wirevec[1].iov_base = (char *) (p->localdata);
642 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
644 p->length = RX_FIRSTBUFFERSIZE;
648 struct rx_packet *rxi_AllocPacket(class)
651 register struct rx_packet *p;
653 MUTEX_ENTER(&rx_freePktQ_lock);
654 p = rxi_AllocPacketNoLock(class);
655 MUTEX_EXIT(&rx_freePktQ_lock);
659 /* This guy comes up with as many buffers as it {takes,can get} given
660 * the MTU for this call. It also sets the packet length before
661 * returning. caution: this is often called at NETPRI
662 * Called with call locked.
664 struct rx_packet *rxi_AllocSendPacket(call, want)
665 register struct rx_call *call;
668 register struct rx_packet *p = (struct rx_packet *) 0;
670 register unsigned delta;
673 mud = call->MTU - RX_HEADER_SIZE;
674 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
675 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
677 while (!(call->error)) {
678 MUTEX_ENTER(&rx_freePktQ_lock);
679 /* if an error occurred, or we get the packet we want, we're done */
680 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
681 MUTEX_EXIT(&rx_freePktQ_lock);
684 want = MIN(want, mud);
686 if ((unsigned) want > p->length)
687 (void) rxi_AllocDataBuf(p, (want - p->length),
688 RX_PACKET_CLASS_SEND_CBUF);
690 if ((unsigned) p->length > mud)
693 if (delta >= p->length) {
702 /* no error occurred, and we didn't get a packet, so we sleep.
703 * At this point, we assume that packets will be returned
704 * sooner or later, as packets are acknowledged, and so we
707 call->flags |= RX_CALL_WAIT_PACKETS;
708 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
709 MUTEX_EXIT(&call->lock);
710 rx_waitingForPackets = 1;
712 #ifdef RX_ENABLE_LOCKS
713 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
715 osi_rxSleep(&rx_waitingForPackets);
717 MUTEX_EXIT(&rx_freePktQ_lock);
718 MUTEX_ENTER(&call->lock);
719 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
720 call->flags &= ~RX_CALL_WAIT_PACKETS;
729 /* count the number of used FDs */
730 static int CountFDs(amax)
733 register int i, code;
737 for(i=0;i<amax;i++) {
738 code = fstat(i, &tstat);
739 if (code == 0) count++;
746 #define CountFDs(amax) amax
750 #if !defined(KERNEL) || defined(UKERNEL)
752 /* This function reads a single packet from the interface into the
753 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
754 * (host,port) of the sender are stored in the supplied variables, and
755 * the data length of the packet is stored in the packet structure.
756 * The header is decoded. */
757 int rxi_ReadPacket(socket, p, host, port)
759 register struct rx_packet *p;
763 struct sockaddr_in from;
766 register afs_int32 tlen, savelen;
768 rx_computelen(p, tlen);
769 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
771 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
772 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
773 * it once in order to avoid races. */
776 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
784 /* Extend the last iovec for padding, it's just to make sure that the
785 * read doesn't return more data than we expect, and is done to get around
786 * our problems caused by the lack of a length field in the rx header.
787 * Use the extra buffer that follows the localdata in each packet
789 savelen = p->wirevec[p->niovecs].iov_len;
790 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
792 bzero((char *)&msg, sizeof(msg));
793 msg.msg_name = (char *) &from;
794 msg.msg_namelen = sizeof(struct sockaddr_in);
795 msg.msg_iov = p->wirevec;
796 msg.msg_iovlen = p->niovecs;
797 nbytes = rxi_Recvmsg(socket, &msg, 0);
799 /* restore the vec to its correct state */
800 p->wirevec[p->niovecs].iov_len = savelen;
802 p->length = (nbytes - RX_HEADER_SIZE);
803 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
805 rxi_MorePackets(rx_initSendWindow);
807 else if (nbytes < 0 && errno == EWOULDBLOCK) {
808 MUTEX_ENTER(&rx_stats_mutex);
809 rx_stats.noPacketOnRead++;
810 MUTEX_EXIT(&rx_stats_mutex);
814 MUTEX_ENTER(&rx_stats_mutex);
815 rx_stats.bogusPacketOnRead++;
816 rx_stats.bogusHost = from.sin_addr.s_addr;
817 MUTEX_EXIT(&rx_stats_mutex);
818 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
819 from.sin_port,nbytes));
824 /* Extract packet header. */
825 rxi_DecodePacketHeader(p);
827 *host = from.sin_addr.s_addr;
828 *port = from.sin_port;
829 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
830 struct rx_peer *peer;
831 MUTEX_ENTER(&rx_stats_mutex);
832 rx_stats.packetsRead[p->header.type-1]++;
833 MUTEX_EXIT(&rx_stats_mutex);
835 * Try to look up this peer structure. If it doesn't exist,
836 * don't create a new one -
837 * we don't keep count of the bytes sent/received if a peer
838 * structure doesn't already exist.
840 * The peer/connection cleanup code assumes that there is 1 peer
841 * per connection. If we actually created a peer structure here
842 * and this packet was an rxdebug packet, the peer structure would
843 * never be cleaned up.
845 peer = rxi_FindPeer(*host, *port, 0, 0);
847 MUTEX_ENTER(&peer->peer_lock);
848 hadd32(peer->bytesReceived, p->length);
849 MUTEX_EXIT(&peer->peer_lock);
853 /* Free any empty packet buffers at the end of this packet */
854 rxi_TrimDataBufs(p, 1);
860 #endif /* !KERNEL || UKERNEL */
862 /* This function splits off the first packet in a jumbo packet.
863 * As of AFS 3.5, jumbograms contain more than one fixed size
864 * packet, and the RX_JUMBO_PACKET flag is set in all but the
865 * last packet header. All packets (except the last) are padded to
866 * fall on RX_CBUFFERSIZE boundaries.
867 * HACK: We store the length of the first n-1 packets in the
868 * last two pad bytes. */
870 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
871 register struct rx_packet *p;
876 struct rx_packet *np;
877 struct rx_jumboHeader *jp;
883 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
884 * bytes in length. All but the first packet are preceded by
885 * an abbreviated four byte header. The length of the last packet
886 * is calculated from the size of the jumbogram. */
887 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
889 if ((int)p->length < length) {
890 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
893 niov = p->niovecs - 2;
895 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
898 iov = &p->wirevec[2];
899 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
901 /* Get a pointer to the abbreviated packet header */
902 jp = (struct rx_jumboHeader *)
903 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
905 /* Set up the iovecs for the next packet */
906 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
907 np->wirevec[0].iov_len = sizeof(struct rx_header);
908 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
909 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
910 np->niovecs = niov+1;
911 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
912 np->wirevec[i] = *iov;
914 np->length = p->length - length;
915 p->length = RX_JUMBOBUFFERSIZE;
918 /* Convert the jumbo packet header to host byte order */
919 temp = ntohl(*(afs_uint32 *)jp);
920 jp->flags = (u_char)(temp >> 24);
921 jp->cksum = (u_short)(temp);
923 /* Fill in the packet header */
924 np->header = p->header;
925 np->header.serial = p->header.serial + 1;
926 np->header.seq = p->header.seq + 1;
927 np->header.flags = jp->flags;
928 np->header.spare = jp->cksum;
934 /* Send a udp datagram */
935 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
945 memset(&msg, 0, sizeof(msg));
947 msg.msg_iovlen = nvecs;
949 msg.msg_namelen = sizeof(struct sockaddr_in);
951 rxi_Sendmsg(socket, &msg, 0);
955 #elif !defined(UKERNEL)
956 /* osi_NetSend is defined in afs/afs_osinet.c
957 * message receipt is done in rxk_input or rx_put.
962 * Copy an mblock to the contiguous area pointed to by cp.
963 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
964 * but it doesn't really.
965 * Returns the number of bytes not transferred.
966 * The message is NOT changed.
968 static int cpytoc(mp, off, len, cp)
970 register int off, len;
975 for (;mp && len > 0; mp = mp->b_cont) {
976 if (mp->b_datap->db_type != M_DATA) {
979 n = MIN(len, (mp->b_wptr - mp->b_rptr));
980 bcopy((char *)mp->b_rptr, cp, n);
988 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
989 * but it doesn't really.
990 * This sucks, anyway, do it like m_cpy.... below
992 static int cpytoiovec(mp, off, len, iovs, niovs)
995 register struct iovec *iovs;
997 register int m,n,o,t,i;
999 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1000 if (mp->b_datap->db_type != M_DATA) {
1003 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1009 t = iovs[i].iov_len;
1012 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1021 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1022 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1024 #if !defined(AFS_LINUX20_ENV)
1025 static int m_cpytoiovec(m, off, len, iovs, niovs)
1027 int off, len, niovs;
1028 struct iovec iovs[];
1031 unsigned int l1, l2, i, t;
1033 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1034 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1037 if (m->m_len <= off) {
1047 p1 = mtod(m, caddr_t)+off;
1048 l1 = m->m_len - off;
1050 p2 = iovs[0].iov_base;
1051 l2 = iovs[0].iov_len;
1054 t = MIN(l1, MIN(l2, (unsigned int)len));
1063 p1 = mtod(m, caddr_t);
1069 p2 = iovs[i].iov_base;
1070 l2 = iovs[i].iov_len;
1078 #endif /* AFS_SUN5_ENV */
1080 #if !defined(AFS_LINUX20_ENV)
1081 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1088 struct rx_packet *phandle;
1089 int hdr_len, data_len;
1093 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1099 #endif /*KERNEL && !UKERNEL*/
1102 /* send a response to a debug packet */
1104 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1108 register struct rx_packet *ap;
1111 struct rx_debugIn tin;
1113 struct rx_serverQueueEntry *np, *nqe;
1115 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1116 /* all done with packet, now set length to the truth, so we can
1117 * reuse this packet */
1118 rx_computelen(ap, ap->length);
1120 tin.type = ntohl(tin.type);
1121 tin.index = ntohl(tin.index);
1123 case RX_DEBUGI_GETSTATS: {
1124 struct rx_debugStats tstat;
1126 /* get basic stats */
1127 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1128 tstat.version = RX_DEBUGI_VERSION;
1129 #ifndef RX_ENABLE_LOCKS
1130 tstat.waitingForPackets = rx_waitingForPackets;
1132 tstat.nFreePackets = htonl(rx_nFreePackets);
1133 tstat.callsExecuted = htonl(rxi_nCalls);
1134 tstat.packetReclaims = htonl(rx_packetReclaims);
1135 tstat.usedFDs = CountFDs(64);
1136 tstat.nWaiting = htonl(rx_nWaiting);
1137 queue_Count( &rx_idleServerQueue, np, nqe,
1138 rx_serverQueueEntry, tstat.idleThreads);
1139 tstat.idleThreads = htonl(tstat.idleThreads);
1140 tl = sizeof(struct rx_debugStats) - ap->length;
1142 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1145 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1146 ap->length = sizeof(struct rx_debugStats);
1147 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1148 rx_computelen(ap, ap->length);
1153 case RX_DEBUGI_GETALLCONN:
1154 case RX_DEBUGI_GETCONN: {
1156 register struct rx_connection *tc;
1157 struct rx_call *tcall;
1158 struct rx_debugConn tconn;
1159 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1162 tl = sizeof(struct rx_debugConn) - ap->length;
1164 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1168 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1169 /* get N'th (maybe) "interesting" connection info */
1170 for(i=0;i<rx_hashTableSize;i++) {
1171 #if !defined(KERNEL)
1172 /* the time complexity of the algorithm used here
1173 * exponentially increses with the number of connections.
1175 #ifdef AFS_PTHREAD_ENV
1178 (void) IOMGR_Poll();
1181 MUTEX_ENTER(&rx_connHashTable_lock);
1182 /* We might be slightly out of step since we are not
1183 * locking each call, but this is only debugging output.
1185 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1186 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1187 tconn.host = tc->peer->host;
1188 tconn.port = tc->peer->port;
1189 tconn.cid = htonl(tc->cid);
1190 tconn.epoch = htonl(tc->epoch);
1191 tconn.serial = htonl(tc->serial);
1192 for(j=0;j<RX_MAXCALLS;j++) {
1193 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1194 if ((tcall=tc->call[j])) {
1195 tconn.callState[j] = tcall->state;
1196 tconn.callMode[j] = tcall->mode;
1197 tconn.callFlags[j] = tcall->flags;
1198 if (queue_IsNotEmpty(&tcall->rq))
1199 tconn.callOther[j] |= RX_OTHER_IN;
1200 if (queue_IsNotEmpty(&tcall->tq))
1201 tconn.callOther[j] |= RX_OTHER_OUT;
1203 else tconn.callState[j] = RX_STATE_NOTINIT;
1206 tconn.natMTU = htonl(tc->peer->natMTU);
1207 tconn.error = htonl(tc->error);
1208 tconn.flags = tc->flags;
1209 tconn.type = tc->type;
1210 tconn.securityIndex = tc->securityIndex;
1211 if (tc->securityObject) {
1212 RXS_GetStats (tc->securityObject, tc,
1214 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1215 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1218 DOHTONL(packetsReceived);
1219 DOHTONL(packetsSent);
1220 DOHTONL(bytesReceived);
1223 i<sizeof(tconn.secStats.spares)/sizeof(short);
1227 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1232 MUTEX_EXIT(&rx_connHashTable_lock);
1233 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1235 ap->length = sizeof(struct rx_debugConn);
1236 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1241 MUTEX_EXIT(&rx_connHashTable_lock);
1243 /* if we make it here, there are no interesting packets */
1244 tconn.cid = htonl(0xffffffff); /* means end */
1245 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1247 ap->length = sizeof(struct rx_debugConn);
1248 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1254 * Pass back all the peer structures we have available
1257 case RX_DEBUGI_GETPEER: {
1259 register struct rx_peer *tp;
1260 struct rx_debugPeer tpeer;
1263 tl = sizeof(struct rx_debugPeer) - ap->length;
1265 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1269 bzero ((char *)&tpeer, sizeof(tpeer));
1270 for(i=0;i<rx_hashTableSize;i++) {
1271 #if !defined(KERNEL)
1272 /* the time complexity of the algorithm used here
1273 * exponentially increses with the number of peers.
1275 * Yielding after processing each hash table entry
1276 * and dropping rx_peerHashTable_lock.
1277 * also increases the risk that we will miss a new
1278 * entry - but we are willing to live with this
1279 * limitation since this is meant for debugging only
1281 #ifdef AFS_PTHREAD_ENV
1284 (void) IOMGR_Poll();
1287 MUTEX_ENTER(&rx_peerHashTable_lock);
1288 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1289 if (tin.index-- <= 0) {
1290 tpeer.host = tp->host;
1291 tpeer.port = tp->port;
1292 tpeer.ifMTU = htons(tp->ifMTU);
1293 tpeer.idleWhen = htonl(tp->idleWhen);
1294 tpeer.refCount = htons(tp->refCount);
1295 tpeer.burstSize = tp->burstSize;
1296 tpeer.burst = tp->burst;
1297 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1298 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1299 tpeer.rtt = htonl(tp->rtt);
1300 tpeer.rtt_dev = htonl(tp->rtt_dev);
1301 tpeer.timeout.sec = htonl(tp->timeout.sec);
1302 tpeer.timeout.usec = htonl(tp->timeout.usec);
1303 tpeer.nSent = htonl(tp->nSent);
1304 tpeer.reSends = htonl(tp->reSends);
1305 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1306 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1307 tpeer.rateFlag = htonl(tp->rateFlag);
1308 tpeer.natMTU = htons(tp->natMTU);
1309 tpeer.maxMTU = htons(tp->maxMTU);
1310 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1311 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1312 tpeer.MTU = htons(tp->MTU);
1313 tpeer.cwind = htons(tp->cwind);
1314 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1315 tpeer.congestSeq = htons(tp->congestSeq);
1316 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1317 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1318 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1319 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1321 MUTEX_EXIT(&rx_peerHashTable_lock);
1322 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1324 ap->length = sizeof(struct rx_debugPeer);
1325 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1330 MUTEX_EXIT(&rx_peerHashTable_lock);
1332 /* if we make it here, there are no interesting packets */
1333 tpeer.host = htonl(0xffffffff); /* means end */
1334 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1336 ap->length = sizeof(struct rx_debugPeer);
1337 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1342 case RX_DEBUGI_RXSTATS: {
1346 tl = sizeof(rx_stats) - ap->length;
1348 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1352 /* Since its all int32s convert to network order with a loop. */
1353 MUTEX_ENTER(&rx_stats_mutex);
1354 s = (afs_int32 *)&rx_stats;
1355 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1356 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1359 ap->length = sizeof(rx_stats);
1360 MUTEX_EXIT(&rx_stats_mutex);
1361 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1367 /* error response packet */
1368 tin.type = htonl(RX_DEBUGI_BADTYPE);
1369 tin.index = tin.type;
1370 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1372 ap->length = sizeof(struct rx_debugIn);
1373 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1380 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1384 register struct rx_packet *ap;
1388 rx_packetwrite(ap, 0, 65, cml_version_number+4);
1391 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1397 /* send a debug packet back to the sender */
1398 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1399 afs_int32 ahost, short aport, afs_int32 istack)
1401 struct sockaddr_in taddr;
1407 int waslocked = ISAFS_GLOCK();
1410 taddr.sin_family = AF_INET;
1411 taddr.sin_port = aport;
1412 taddr.sin_addr.s_addr = ahost;
1415 /* We need to trim the niovecs. */
1416 nbytes = apacket->length;
1417 for (i=1; i < apacket->niovecs; i++) {
1418 if (nbytes <= apacket->wirevec[i].iov_len) {
1419 savelen = apacket->wirevec[i].iov_len;
1420 saven = apacket->niovecs;
1421 apacket->wirevec[i].iov_len = nbytes;
1422 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1424 else nbytes -= apacket->wirevec[i].iov_len;
1428 if (waslocked) AFS_GUNLOCK();
1430 /* debug packets are not reliably delivered, hence the cast below. */
1431 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1432 apacket->length+RX_HEADER_SIZE, istack);
1434 if (waslocked) AFS_GLOCK();
1437 if (saven) { /* means we truncated the packet above. */
1438 apacket->wirevec[i-1].iov_len = savelen;
1439 apacket->niovecs = saven;
1444 /* Send the packet to appropriate destination for the specified
1445 * connection. The header is first encoded and placed in the packet.
1447 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1453 struct sockaddr_in addr;
1454 register struct rx_peer *peer = conn->peer;
1457 char deliveryType = 'S';
1459 /* The address we're sending the packet to */
1460 addr.sin_family = AF_INET;
1461 addr.sin_port = peer->port;
1462 addr.sin_addr.s_addr = peer->host;
1464 /* This stuff should be revamped, I think, so that most, if not
1465 * all, of the header stuff is always added here. We could
1466 * probably do away with the encode/decode routines. XXXXX */
1468 /* Stamp each packet with a unique serial number. The serial
1469 * number is maintained on a connection basis because some types
1470 * of security may be based on the serial number of the packet,
1471 * and security is handled on a per authenticated-connection
1473 /* Pre-increment, to guarantee no zero serial number; a zero
1474 * serial number means the packet was never sent. */
1475 MUTEX_ENTER(&conn->conn_data_lock);
1476 p->header.serial = ++conn->serial;
1477 MUTEX_EXIT(&conn->conn_data_lock);
1478 /* This is so we can adjust retransmit time-outs better in the face of
1479 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1481 if (p->firstSerial == 0) {
1482 p->firstSerial = p->header.serial;
1486 /* If an output tracer function is defined, call it with the packet and
1487 * network address. Note this function may modify its arguments. */
1488 if (rx_almostSent) {
1489 int drop = (*rx_almostSent) (p, &addr);
1490 /* drop packet if return value is non-zero? */
1491 if (drop) deliveryType = 'D'; /* Drop the packet */
1495 /* Get network byte order header */
1496 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1497 * touch ALL the fields */
1499 /* Send the packet out on the same socket that related packets are being
1501 socket = (conn->type == RX_CLIENT_CONNECTION
1502 ? rx_socket : conn->service->socket);
1505 /* Possibly drop this packet, for testing purposes */
1506 if ((deliveryType == 'D') ||
1507 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1508 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1509 deliveryType = 'D'; /* Drop the packet */
1512 deliveryType = 'S'; /* Send the packet */
1513 #endif /* RXDEBUG */
1515 /* Loop until the packet is sent. We'd prefer just to use a
1516 * blocking socket, but unfortunately the interface doesn't
1517 * allow us to have the socket block in send mode, and not
1518 * block in receive mode */
1521 waslocked = ISAFS_GLOCK();
1522 if (waslocked) AFS_GUNLOCK();
1524 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1525 p->length+RX_HEADER_SIZE, istack)){
1526 /* send failed, so let's hurry up the resend, eh? */
1527 MUTEX_ENTER(&rx_stats_mutex);
1528 rx_stats.netSendFailures++;
1529 MUTEX_EXIT(&rx_stats_mutex);
1530 p->retryTime = p->timeSent; /* resend it very soon */
1531 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1534 if (waslocked) AFS_GLOCK();
1539 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1540 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1541 peer->host, peer->port, p->header.serial, p->header.epoch,
1542 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1543 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1545 MUTEX_ENTER(&rx_stats_mutex);
1546 rx_stats.packetsSent[p->header.type-1]++;
1547 MUTEX_EXIT(&rx_stats_mutex);
1548 MUTEX_ENTER(&peer->peer_lock);
1549 hadd32(peer->bytesSent, p->length);
1550 MUTEX_EXIT(&peer->peer_lock);
1553 /* Send a list of packets to appropriate destination for the specified
1554 * connection. The headers are first encoded and placed in the packets.
1556 void rxi_SendPacketList(struct rx_connection * conn,
1557 struct rx_packet **list,
1561 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1564 struct sockaddr_in addr;
1565 register struct rx_peer *peer = conn->peer;
1567 struct rx_packet *p = NULL;
1568 struct iovec wirevec[RX_MAXIOVECS];
1572 struct rx_jumboHeader *jp;
1574 char deliveryType = 'S';
1576 /* The address we're sending the packet to */
1577 addr.sin_family = AF_INET;
1578 addr.sin_port = peer->port;
1579 addr.sin_addr.s_addr = peer->host;
1581 if (len+1 > RX_MAXIOVECS) {
1582 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1586 * Stamp the packets in this jumbogram with consecutive serial numbers
1588 MUTEX_ENTER(&conn->conn_data_lock);
1589 serial = conn->serial;
1590 conn->serial += len;
1591 MUTEX_EXIT(&conn->conn_data_lock);
1594 /* This stuff should be revamped, I think, so that most, if not
1595 * all, of the header stuff is always added here. We could
1596 * probably do away with the encode/decode routines. XXXXX */
1599 length = RX_HEADER_SIZE;
1600 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1601 wirevec[0].iov_len = RX_HEADER_SIZE;
1602 for (i = 0 ; i < len ; i++) {
1605 /* The whole 3.5 jumbogram scheme relies on packets fitting
1606 * in a single packet buffer. */
1607 if (p->niovecs > 2) {
1608 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1611 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1614 if (p->length != RX_JUMBOBUFFERSIZE) {
1615 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1617 p->header.flags |= RX_JUMBO_PACKET;
1618 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1619 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1621 wirevec[i+1].iov_len = p->length;
1622 length += p->length;
1624 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1626 /* Convert jumbo packet header to network byte order */
1627 temp = (afs_uint32)(p->header.flags) << 24;
1628 temp |= (afs_uint32)(p->header.spare);
1629 *(afs_uint32 *)jp = htonl(temp);
1631 jp = (struct rx_jumboHeader *)
1632 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1634 /* Stamp each packet with a unique serial number. The serial
1635 * number is maintained on a connection basis because some types
1636 * of security may be based on the serial number of the packet,
1637 * and security is handled on a per authenticated-connection
1639 /* Pre-increment, to guarantee no zero serial number; a zero
1640 * serial number means the packet was never sent. */
1641 p->header.serial = ++serial;
1642 /* This is so we can adjust retransmit time-outs better in the face of
1643 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1645 if (p->firstSerial == 0) {
1646 p->firstSerial = p->header.serial;
1650 /* If an output tracer function is defined, call it with the packet and
1651 * network address. Note this function may modify its arguments. */
1652 if (rx_almostSent) {
1653 int drop = (*rx_almostSent) (p, &addr);
1654 /* drop packet if return value is non-zero? */
1655 if (drop) deliveryType = 'D'; /* Drop the packet */
1659 /* Get network byte order header */
1660 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1661 * touch ALL the fields */
1664 /* Send the packet out on the same socket that related packets are being
1666 socket = (conn->type == RX_CLIENT_CONNECTION
1667 ? rx_socket : conn->service->socket);
1670 /* Possibly drop this packet, for testing purposes */
1671 if ((deliveryType == 'D') ||
1672 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1673 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1674 deliveryType = 'D'; /* Drop the packet */
1677 deliveryType = 'S'; /* Send the packet */
1678 #endif /* RXDEBUG */
1680 /* Loop until the packet is sent. We'd prefer just to use a
1681 * blocking socket, but unfortunately the interface doesn't
1682 * allow us to have the socket block in send mode, and not
1683 * block in receive mode */
1685 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1686 waslocked = ISAFS_GLOCK();
1687 if (!istack && waslocked) AFS_GUNLOCK();
1689 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1690 /* send failed, so let's hurry up the resend, eh? */
1691 MUTEX_ENTER(&rx_stats_mutex);
1692 rx_stats.netSendFailures++;
1693 MUTEX_EXIT(&rx_stats_mutex);
1694 for (i = 0 ; i < len ; i++) {
1696 p->retryTime = p->timeSent; /* resend it very soon */
1697 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1700 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1701 if (!istack && waslocked) AFS_GLOCK();
1706 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1707 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1708 peer->host, peer->port, p->header.serial, p->header.epoch,
1709 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1710 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1712 MUTEX_ENTER(&rx_stats_mutex);
1713 rx_stats.packetsSent[p->header.type-1]++;
1714 MUTEX_EXIT(&rx_stats_mutex);
1715 MUTEX_ENTER(&peer->peer_lock);
1716 hadd32(peer->bytesSent, p->length);
1717 MUTEX_EXIT(&peer->peer_lock);
1721 /* Send a "special" packet to the peer connection. If call is
1722 * specified, then the packet is directed to a specific call channel
1723 * associated with the connection, otherwise it is directed to the
1724 * connection only. Uses optionalPacket if it is supplied, rather than
1725 * allocating a new packet buffer. Nbytes is the length of the data
1726 * portion of the packet. If data is non-null, nbytes of data are
1727 * copied into the packet. Type is the type of the packet, as defined
1728 * in rx.h. Bug: there's a lot of duplication between this and other
1729 * routines. This needs to be cleaned up. */
1731 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1732 register struct rx_call *call;
1733 register struct rx_connection *conn;
1734 struct rx_packet *optionalPacket;
1739 /* Some of the following stuff should be common code for all
1740 * packet sends (it's repeated elsewhere) */
1741 register struct rx_packet *p;
1743 int savelen = 0, saven = 0;
1744 int channel, callNumber;
1746 channel = call->channel;
1747 callNumber = *call->callNumber;
1748 /* BUSY packets refer to the next call on this connection */
1749 if (type == RX_PACKET_TYPE_BUSY) {
1758 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1759 if (!p) osi_Panic("rxi_SendSpecial failure");
1766 p->header.serviceId = conn->serviceId;
1767 p->header.securityIndex = conn->securityIndex;
1768 p->header.cid = (conn->cid | channel);
1769 p->header.callNumber = callNumber;
1771 p->header.epoch = conn->epoch;
1772 p->header.type = type;
1773 p->header.flags = 0;
1774 if (conn->type == RX_CLIENT_CONNECTION)
1775 p->header.flags |= RX_CLIENT_INITIATED;
1777 rx_packetwrite(p, 0, nbytes, data);
1779 for (i=1; i < p->niovecs; i++) {
1780 if (nbytes <= p->wirevec[i].iov_len) {
1781 savelen = p->wirevec[i].iov_len;
1783 p->wirevec[i].iov_len = nbytes;
1784 p->niovecs = i+1; /* so condition fails because i == niovecs */
1786 else nbytes -= p->wirevec[i].iov_len;
1789 if (call) rxi_Send(call, p, istack);
1790 else rxi_SendPacket(conn, p, istack);
1791 if (saven) { /* means we truncated the packet above. We probably don't */
1792 /* really need to do this, but it seems safer this way, given that */
1793 /* sneaky optionalPacket... */
1794 p->wirevec[i-1].iov_len = savelen;
1797 if (!optionalPacket) rxi_FreePacket(p);
1798 return optionalPacket;
1802 /* Encode the packet's header (from the struct header in the packet to
1803 * the net byte order representation in the wire representation of the
1804 * packet, which is what is actually sent out on the wire) */
1805 void rxi_EncodePacketHeader(p)
1806 register struct rx_packet *p;
1808 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1810 bzero((char *)buf, RX_HEADER_SIZE);
1811 *buf++ = htonl(p->header.epoch);
1812 *buf++ = htonl(p->header.cid);
1813 *buf++ = htonl(p->header.callNumber);
1814 *buf++ = htonl(p->header.seq);
1815 *buf++ = htonl(p->header.serial);
1816 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1817 | (((afs_uint32)p->header.flags)<<16)
1818 | (p->header.userStatus<<8) | p->header.securityIndex);
1819 /* Note: top 16 bits of this next word were reserved */
1820 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1823 /* Decode the packet's header (from net byte order to a struct header) */
1824 void rxi_DecodePacketHeader(p)
1825 register struct rx_packet *p;
1827 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1830 p->header.epoch = ntohl(*buf++);
1831 p->header.cid = ntohl(*buf++);
1832 p->header.callNumber = ntohl(*buf++);
1833 p->header.seq = ntohl(*buf++);
1834 p->header.serial = ntohl(*buf++);
1835 temp = ntohl(*buf++);
1836 /* C will truncate byte fields to bytes for me */
1837 p->header.type = temp>>24;
1838 p->header.flags = temp>>16;
1839 p->header.userStatus = temp>>8;
1840 p->header.securityIndex = temp>>0;
1841 temp = ntohl(*buf++);
1842 p->header.serviceId = (temp&0xffff);
1843 p->header.spare = temp>>16;
1844 /* Note: top 16 bits of this last word are the security checksum */
1847 void rxi_PrepareSendPacket(call, p, last)
1848 register struct rx_call *call;
1849 register struct rx_packet *p;
1852 register struct rx_connection *conn = call->conn;
1854 ssize_t len; /* len must be a signed type; it can go negative */
1857 p->header.cid = (conn->cid | call->channel);
1858 p->header.serviceId = conn->serviceId;
1859 p->header.securityIndex = conn->securityIndex;
1860 p->header.callNumber = *call->callNumber;
1861 p->header.seq = call->tnext++;
1862 p->header.epoch = conn->epoch;
1863 p->header.type = RX_PACKET_TYPE_DATA;
1864 p->header.flags = 0;
1865 p->header.spare = 0;
1866 if (conn->type == RX_CLIENT_CONNECTION)
1867 p->header.flags |= RX_CLIENT_INITIATED;
1870 p->header.flags |= RX_LAST_PACKET;
1872 clock_Zero(&p->retryTime); /* Never yet transmitted */
1873 clock_Zero(&p->firstSent); /* Never yet transmitted */
1874 p->header.serial = 0; /* Another way of saying never transmitted... */
1877 /* Now that we're sure this is the last data on the call, make sure
1878 * that the "length" and the sum of the iov_lens matches. */
1879 len = p->length + call->conn->securityHeaderSize;
1881 for (i=1; i < p->niovecs && len > 0; i++) {
1882 len -= p->wirevec[i].iov_len;
1885 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1888 /* Free any extra elements in the wirevec */
1889 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1890 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1893 p->wirevec[i-1].iov_len += len;
1895 RXS_PreparePacket(conn->securityObject, call, p);
1898 /* Given an interface MTU size, calculate an adjusted MTU size that
1899 * will make efficient use of the RX buffers when the peer is sending
1900 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1901 int rxi_AdjustIfMTU(int mtu)
1906 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1907 if (mtu <= adjMTU) {
1914 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1915 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1918 /* Given an interface MTU size, and the peer's advertised max receive
1919 * size, calculate an adjisted maxMTU size that makes efficient use
1920 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1921 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1923 int maxMTU = mtu * rxi_nSendFrags;
1924 maxMTU = MIN(maxMTU, peerMaxMTU);
1925 return rxi_AdjustIfMTU(maxMTU);
1928 /* Given a packet size, figure out how many datagram packet will fit.
1929 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1930 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1931 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1932 int rxi_AdjustDgramPackets(int frags, int mtu)
1935 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1938 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1939 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1940 /* subtract the size of the first and last packets */
1941 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1945 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));