2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "../afs/param.h"
14 #include <afs/param.h>
21 #include "../afs/sysincludes.h"
22 #include "../afs/afsincludes.h"
23 #include "../rx/rx_kcommon.h"
24 #include "../rx/rx_clock.h"
25 #include "../rx/rx_queue.h"
26 #include "../rx/rx_packet.h"
27 #else /* defined(UKERNEL) */
28 #include "../h/types.h"
29 #ifndef AFS_LINUX20_ENV
30 #include "../h/systm.h"
32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
33 #include "../afs/sysincludes.h"
35 #include "../h/socket.h"
36 #include "../netinet/in.h"
37 #include "../afs/afs_osi.h"
38 #include "../rx/rx_kmutex.h"
39 #include "../rx/rx_clock.h"
40 #include "../rx/rx_queue.h"
42 #include <sys/sysmacros.h>
44 #include "../rx/rx_packet.h"
45 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
46 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
47 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "../h/mbuf.h"
51 #endif /* defined(UKERNEL) */
52 #include "../rx/rx_globals.h"
54 #include "sys/types.h"
57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
61 #include <sys/socket.h>
62 #include <netinet/in.h>
63 #endif /* AFS_NT40_ENV */
64 #include "rx_xmit_nt.h"
67 #include <sys/socket.h>
68 #include <netinet/in.h>
74 #include <sys/sysmacros.h>
76 #include "rx_packet.h"
77 #include "rx_globals.h"
79 #include "rx_internal.h"
93 /* rxdb_fileID is used to identify the lock location, along with line#. */
94 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
95 #endif /* RX_LOCKS_DB */
96 struct rx_packet *rx_mallocedP = 0;
98 extern char cml_version_number[];
99 extern int (*rx_almostSent)();
101 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
102 afs_int32 ahost, short aport, afs_int32 istack);
104 /* some rules about packets:
105 * 1. When a packet is allocated, the final iov_buf contains room for
106 * a security trailer, but iov_len masks that fact. If the security
107 * package wants to add the trailer, it may do so, and then extend
108 * iov_len appropriately. For this reason, packet's niovecs and
109 * iov_len fields should be accurate before calling PreparePacket.
113 * all packet buffers (iov_base) are integral multiples of
115 * offset is an integral multiple of the word size.
117 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
121 for (l=0, i=1; i< packet->niovecs ; i++ ) {
122 if (l + packet->wirevec[i].iov_len > offset) {
123 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
125 l += packet->wirevec[i].iov_len;
132 * all packet buffers (iov_base) are integral multiples of the word size.
133 * offset is an integral multiple of the word size.
135 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
139 for (l=0, i=1; i< packet->niovecs ; i++ ) {
140 if (l + packet->wirevec[i].iov_len > offset) {
141 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the
154 * offset is an integral multiple of the word size.
156 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
158 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
159 int resid, char *out)
161 unsigned int i, j, l, r;
162 for (l=0, i=1; i< packet->niovecs ; i++ ) {
163 if (l + packet->wirevec[i].iov_len > offset) {
166 l += packet->wirevec[i].iov_len;
169 /* i is the iovec which contains the first little bit of data in which we
170 * are interested. l is the total length of everything prior to this iovec.
171 * j is the number of bytes we can safely copy out of this iovec.
174 while ((resid > 0) && (i < packet->niovecs)) {
175 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
176 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
178 l += packet->wirevec[i].iov_len;
182 return (resid ? (r - resid) : r);
187 * all packet buffers (iov_base) are integral multiples of the
189 * offset is an integral multiple of the word size.
191 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
197 for (l=0, i=1; i < packet->niovecs; i++ ) {
198 if (l + packet->wirevec[i].iov_len > offset) {
201 l += packet->wirevec[i].iov_len;
204 /* i is the iovec which contains the first little bit of data in which we
205 * are interested. l is the total length of everything prior to this iovec.
206 * j is the number of bytes we can safely copy out of this iovec.
209 while ((resid > 0) && (i < RX_MAXWVECS)) {
210 if (i >= packet->niovecs)
211 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
214 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
215 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
218 l += packet->wirevec[i].iov_len;
222 return (resid ? (r - resid) : r);
225 static struct rx_packet * allocCBuf(int class)
231 MUTEX_ENTER(&rx_freePktQ_lock);
234 if (rxi_OverQuota(class)) {
236 rxi_NeedMorePackets = TRUE;
237 MUTEX_ENTER(&rx_stats_mutex);
239 case RX_PACKET_CLASS_RECEIVE:
240 rx_stats.receivePktAllocFailures++;
242 case RX_PACKET_CLASS_SEND:
243 rx_stats.sendPktAllocFailures++;
245 case RX_PACKET_CLASS_SPECIAL:
246 rx_stats.specialPktAllocFailures++;
248 case RX_PACKET_CLASS_RECV_CBUF:
249 rx_stats.receiveCbufPktAllocFailures++;
251 case RX_PACKET_CLASS_SEND_CBUF:
252 rx_stats.sendCbufPktAllocFailures++;
255 MUTEX_EXIT(&rx_stats_mutex);
259 if (queue_IsEmpty(&rx_freePacketQueue)) {
261 rxi_NeedMorePackets = TRUE;
265 if (queue_IsEmpty(&rx_freePacketQueue)) {
266 rxi_MorePacketsNoLock(rx_initSendWindow);
271 c = queue_First(&rx_freePacketQueue, rx_packet);
273 if (c->header.flags != RX_FREE_PACKET)
274 osi_Panic("rxi_AllocPacket: packet not free\n");
280 MUTEX_EXIT(&rx_freePktQ_lock);
287 * Free a packet currently used as a continuation buffer
289 void rxi_freeCBuf(struct rx_packet *c)
294 MUTEX_ENTER(&rx_freePktQ_lock);
296 rxi_FreePacketNoLock(c);
297 /* Wakeup anyone waiting for packets */
300 MUTEX_EXIT(&rx_freePktQ_lock);
304 /* this one is kind of awful.
305 * In rxkad, the packet has been all shortened, and everything, ready for
306 * sending. All of a sudden, we discover we need some of that space back.
307 * This isn't terribly general, because it knows that the packets are only
308 * rounded up to the EBS (userdata + security header).
310 int rxi_RoundUpPacket(p, nb)
311 struct rx_packet * p;
316 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
317 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
318 p->wirevec[i].iov_len += nb;
323 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
324 p->wirevec[i].iov_len += nb;
331 /* get sufficient space to store nb bytes of data (or more), and hook
332 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
333 * returns the number of bytes >0 which it failed to come up with.
334 * Don't need to worry about locking on packet, since only
335 * one thread can manipulate one at a time. Locking on continution
336 * packets is handled by allocCBuf */
337 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
338 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
342 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
343 register struct rx_packet *cb;
344 if ((cb = allocCBuf(class))) {
345 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
346 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
347 nb -= RX_CBUFFERSIZE;
348 p->length += RX_CBUFFERSIZE;
357 /* Add more packet buffers */
358 void rxi_MorePackets(int apackets)
360 struct rx_packet *p, *e;
364 getme = apackets * sizeof(struct rx_packet);
365 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
367 PIN(p, getme); /* XXXXX */
368 bzero((char *)p, getme);
371 MUTEX_ENTER(&rx_freePktQ_lock);
373 for (e = p + apackets; p<e; p++) {
374 p->wirevec[0].iov_base = (char *) (p->wirehead);
375 p->wirevec[0].iov_len = RX_HEADER_SIZE;
376 p->wirevec[1].iov_base = (char *) (p->localdata);
377 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
378 p->header.flags = RX_FREE_PACKET;
381 queue_Append(&rx_freePacketQueue, p);
383 rx_nFreePackets += apackets;
384 rxi_NeedMorePackets = FALSE;
388 MUTEX_EXIT(&rx_freePktQ_lock);
393 /* Add more packet buffers */
394 void rxi_MorePacketsNoLock(int apackets)
396 struct rx_packet *p, *e;
399 /* allocate enough packets that 1/4 of the packets will be able
400 * to hold maximal amounts of data */
401 apackets += (apackets/4)
402 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
403 getme = apackets * sizeof(struct rx_packet);
404 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
406 bzero((char *)p, getme);
408 for (e = p + apackets; p<e; p++) {
409 p->wirevec[0].iov_base = (char *) (p->wirehead);
410 p->wirevec[0].iov_len = RX_HEADER_SIZE;
411 p->wirevec[1].iov_base = (char *) (p->localdata);
412 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
413 p->header.flags = RX_FREE_PACKET;
416 queue_Append(&rx_freePacketQueue, p);
418 rx_nFreePackets += apackets;
419 rxi_NeedMorePackets = FALSE;
424 void rxi_FreeAllPackets(void)
426 /* must be called at proper interrupt level, etcetera */
427 /* MTUXXX need to free all Packets */
428 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
429 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
432 /* Allocate more packets iff we need more continuation buffers */
433 /* In kernel, can't page in memory with interrupts disabled, so we
434 * don't use the event mechanism. */
435 void rx_CheckPackets()
437 if (rxi_NeedMorePackets) {
438 rxi_MorePackets(rx_initSendWindow);
442 /* In the packet freeing routine below, the assumption is that
443 we want all of the packets to be used equally frequently, so that we
444 don't get packet buffers paging out. It would be just as valid to
445 assume that we DO want them to page out if not many are being used.
446 In any event, we assume the former, and append the packets to the end
448 /* This explanation is bogus. The free list doesn't remain in any kind of
449 useful order for afs_int32: the packets in use get pretty much randomly scattered
450 across all the pages. In order to permit unused {packets,bufs} to page out, they
451 must be stored so that packets which are adjacent in memory are adjacent in the
452 free list. An array springs rapidly to mind.
455 /* Actually free the packet p. */
456 void rxi_FreePacketNoLock(struct rx_packet *p)
458 dpf(("Free %x\n", p));
460 if (p->header.flags & RX_FREE_PACKET)
461 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
463 p->header.flags = RX_FREE_PACKET;
464 queue_Append(&rx_freePacketQueue, p);
467 int rxi_FreeDataBufsNoLock(p, first)
468 struct rx_packet * p;
471 struct iovec *iov, *end;
473 if (first != 1) /* MTUXXX */
474 osi_Panic("FreeDataBufs 1: first must be 1");
475 iov = &p->wirevec[1];
476 end = iov + (p->niovecs-1);
477 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
478 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
479 for (iov++ ; iov < end ; iov++) {
481 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
482 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
490 int rxi_nBadIovecs = 0;
492 /* rxi_RestoreDataBufs
494 * Restore the correct sizes to the iovecs. Called when reusing a packet
495 * for reading off the wire.
497 void rxi_RestoreDataBufs(struct rx_packet *p)
500 struct iovec *iov = &p->wirevec[2];
502 p->wirevec[0].iov_base = (char *) (p->wirehead);
503 p->wirevec[0].iov_len = RX_HEADER_SIZE;
504 p->wirevec[1].iov_base = (char *) (p->localdata);
505 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
507 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
508 if (!iov->iov_base) {
513 iov->iov_len = RX_CBUFFERSIZE;
517 int rxi_TrimDataBufs(p, first)
518 struct rx_packet * p;
522 struct iovec *iov, *end;
526 osi_Panic("TrimDataBufs 1: first must be 1");
528 /* Skip over continuation buffers containing message data */
529 iov = &p->wirevec[2];
530 end = iov + (p->niovecs-2);
531 length = p->length - p->wirevec[1].iov_len;
532 for (; iov < end && length > 0 ; iov++) {
534 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
535 length -= iov->iov_len;
538 /* iov now points to the first empty data buffer. */
543 MUTEX_ENTER(&rx_freePktQ_lock);
545 for (; iov < end ; iov++) {
547 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
548 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
553 MUTEX_EXIT(&rx_freePktQ_lock);
559 /* Free the packet p. P is assumed not to be on any queue, i.e.
560 * remove it yourself first if you call this routine. */
561 void rxi_FreePacket(struct rx_packet *p)
566 MUTEX_ENTER(&rx_freePktQ_lock);
568 rxi_FreeDataBufsNoLock(p,1);
569 rxi_FreePacketNoLock(p);
570 /* Wakeup anyone waiting for packets */
573 MUTEX_EXIT(&rx_freePktQ_lock);
578 /* rxi_AllocPacket sets up p->length so it reflects the number of
579 * bytes in the packet at this point, **not including** the header.
580 * The header is absolutely necessary, besides, this is the way the
581 * length field is usually used */
582 struct rx_packet *rxi_AllocPacketNoLock(class)
585 register struct rx_packet *p;
588 if (rxi_OverQuota(class)) {
589 rxi_NeedMorePackets = TRUE;
590 MUTEX_ENTER(&rx_stats_mutex);
592 case RX_PACKET_CLASS_RECEIVE:
593 rx_stats.receivePktAllocFailures++;
595 case RX_PACKET_CLASS_SEND:
596 rx_stats.sendPktAllocFailures++;
598 case RX_PACKET_CLASS_SPECIAL:
599 rx_stats.specialPktAllocFailures++;
601 case RX_PACKET_CLASS_RECV_CBUF:
602 rx_stats.receiveCbufPktAllocFailures++;
604 case RX_PACKET_CLASS_SEND_CBUF:
605 rx_stats.sendCbufPktAllocFailures++;
608 MUTEX_EXIT(&rx_stats_mutex);
609 return (struct rx_packet *) 0;
613 MUTEX_ENTER(&rx_stats_mutex);
614 rx_stats.packetRequests++;
615 MUTEX_EXIT(&rx_stats_mutex);
618 if (queue_IsEmpty(&rx_freePacketQueue))
619 osi_Panic("rxi_AllocPacket error");
621 if (queue_IsEmpty(&rx_freePacketQueue))
622 rxi_MorePacketsNoLock(rx_initSendWindow);
626 p = queue_First(&rx_freePacketQueue, rx_packet);
627 if (p->header.flags != RX_FREE_PACKET)
628 osi_Panic("rxi_AllocPacket: packet not free\n");
630 dpf(("Alloc %x, class %d\n", p, class));
635 /* have to do this here because rx_FlushWrite fiddles with the iovs in
636 * order to truncate outbound packets. In the near future, may need
637 * to allocate bufs from a static pool here, and/or in AllocSendPacket
639 p->wirevec[0].iov_base = (char *) (p->wirehead);
640 p->wirevec[0].iov_len = RX_HEADER_SIZE;
641 p->wirevec[1].iov_base = (char *) (p->localdata);
642 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
644 p->length = RX_FIRSTBUFFERSIZE;
648 struct rx_packet *rxi_AllocPacket(class)
651 register struct rx_packet *p;
653 MUTEX_ENTER(&rx_freePktQ_lock);
654 p = rxi_AllocPacketNoLock(class);
655 MUTEX_EXIT(&rx_freePktQ_lock);
659 /* This guy comes up with as many buffers as it {takes,can get} given
660 * the MTU for this call. It also sets the packet length before
661 * returning. caution: this is often called at NETPRI
662 * Called with call locked.
664 struct rx_packet *rxi_AllocSendPacket(call, want)
665 register struct rx_call *call;
668 register struct rx_packet *p = (struct rx_packet *) 0;
670 register unsigned delta;
673 mud = call->MTU - RX_HEADER_SIZE;
674 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
675 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
677 while (!(call->error)) {
678 MUTEX_ENTER(&rx_freePktQ_lock);
679 /* if an error occurred, or we get the packet we want, we're done */
680 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
681 MUTEX_EXIT(&rx_freePktQ_lock);
684 want = MIN(want, mud);
686 if ((unsigned) want > p->length)
687 (void) rxi_AllocDataBuf(p, (want - p->length),
688 RX_PACKET_CLASS_SEND_CBUF);
690 if ((unsigned) p->length > mud)
693 if (delta >= p->length) {
702 /* no error occurred, and we didn't get a packet, so we sleep.
703 * At this point, we assume that packets will be returned
704 * sooner or later, as packets are acknowledged, and so we
707 call->flags |= RX_CALL_WAIT_PACKETS;
708 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
709 MUTEX_EXIT(&call->lock);
710 rx_waitingForPackets = 1;
712 #ifdef RX_ENABLE_LOCKS
713 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
715 osi_rxSleep(&rx_waitingForPackets);
717 MUTEX_EXIT(&rx_freePktQ_lock);
718 MUTEX_ENTER(&call->lock);
719 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
720 call->flags &= ~RX_CALL_WAIT_PACKETS;
729 /* count the number of used FDs */
730 static int CountFDs(amax)
733 register int i, code;
737 for(i=0;i<amax;i++) {
738 code = fstat(i, &tstat);
739 if (code == 0) count++;
746 #define CountFDs(amax) amax
750 #if !defined(KERNEL) || defined(UKERNEL)
752 /* This function reads a single packet from the interface into the
753 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
754 * (host,port) of the sender are stored in the supplied variables, and
755 * the data length of the packet is stored in the packet structure.
756 * The header is decoded. */
757 int rxi_ReadPacket(socket, p, host, port)
759 register struct rx_packet *p;
763 struct sockaddr_in from;
766 register afs_int32 tlen, savelen;
768 rx_computelen(p, tlen);
769 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
771 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
772 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
773 * it once in order to avoid races. */
776 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
784 /* Extend the last iovec for padding, it's just to make sure that the
785 * read doesn't return more data than we expect, and is done to get around
786 * our problems caused by the lack of a length field in the rx header.
787 * Use the extra buffer that follows the localdata in each packet
789 savelen = p->wirevec[p->niovecs].iov_len;
790 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
792 bzero((char *)&msg, sizeof(msg));
793 msg.msg_name = (char *) &from;
794 msg.msg_namelen = sizeof(struct sockaddr_in);
795 msg.msg_iov = p->wirevec;
796 msg.msg_iovlen = p->niovecs;
797 nbytes = rxi_Recvmsg(socket, &msg, 0);
799 /* restore the vec to its correct state */
800 p->wirevec[p->niovecs].iov_len = savelen;
802 p->length = (nbytes - RX_HEADER_SIZE);
803 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
805 rxi_MorePackets(rx_initSendWindow);
807 else if (nbytes < 0 && errno == EWOULDBLOCK) {
808 MUTEX_ENTER(&rx_stats_mutex);
809 rx_stats.noPacketOnRead++;
810 MUTEX_EXIT(&rx_stats_mutex);
814 MUTEX_ENTER(&rx_stats_mutex);
815 rx_stats.bogusPacketOnRead++;
816 rx_stats.bogusHost = from.sin_addr.s_addr;
817 MUTEX_EXIT(&rx_stats_mutex);
818 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
819 from.sin_port,nbytes));
824 /* Extract packet header. */
825 rxi_DecodePacketHeader(p);
827 *host = from.sin_addr.s_addr;
828 *port = from.sin_port;
829 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
830 struct rx_peer *peer;
831 MUTEX_ENTER(&rx_stats_mutex);
832 rx_stats.packetsRead[p->header.type-1]++;
833 MUTEX_EXIT(&rx_stats_mutex);
835 * Try to look up this peer structure. If it doesn't exist,
836 * don't create a new one -
837 * we don't keep count of the bytes sent/received if a peer
838 * structure doesn't already exist.
840 * The peer/connection cleanup code assumes that there is 1 peer
841 * per connection. If we actually created a peer structure here
842 * and this packet was an rxdebug packet, the peer structure would
843 * never be cleaned up.
845 peer = rxi_FindPeer(*host, *port, 0, 0);
847 MUTEX_ENTER(&peer->peer_lock);
848 hadd32(peer->bytesReceived, p->length);
849 MUTEX_EXIT(&peer->peer_lock);
853 /* Free any empty packet buffers at the end of this packet */
854 rxi_TrimDataBufs(p, 1);
860 #endif /* !KERNEL || UKERNEL */
862 /* This function splits off the first packet in a jumbo packet.
863 * As of AFS 3.5, jumbograms contain more than one fixed size
864 * packet, and the RX_JUMBO_PACKET flag is set in all but the
865 * last packet header. All packets (except the last) are padded to
866 * fall on RX_CBUFFERSIZE boundaries.
867 * HACK: We store the length of the first n-1 packets in the
868 * last two pad bytes. */
870 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
871 register struct rx_packet *p;
876 struct rx_packet *np;
877 struct rx_jumboHeader *jp;
883 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
884 * bytes in length. All but the first packet are preceded by
885 * an abbreviated four byte header. The length of the last packet
886 * is calculated from the size of the jumbogram. */
887 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
889 if ((int)p->length < length) {
890 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
893 niov = p->niovecs - 2;
895 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
898 iov = &p->wirevec[2];
899 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
901 /* Get a pointer to the abbreviated packet header */
902 jp = (struct rx_jumboHeader *)
903 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
905 /* Set up the iovecs for the next packet */
906 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
907 np->wirevec[0].iov_len = sizeof(struct rx_header);
908 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
909 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
910 np->niovecs = niov+1;
911 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
912 np->wirevec[i] = *iov;
914 np->length = p->length - length;
915 p->length = RX_JUMBOBUFFERSIZE;
918 /* Convert the jumbo packet header to host byte order */
919 temp = ntohl(*(afs_uint32 *)jp);
920 jp->flags = (u_char)(temp >> 24);
921 jp->cksum = (u_short)(temp);
923 /* Fill in the packet header */
924 np->header = p->header;
925 np->header.serial = p->header.serial + 1;
926 np->header.seq = p->header.seq + 1;
927 np->header.flags = jp->flags;
928 np->header.spare = jp->cksum;
934 /* Send a udp datagram */
935 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
945 memset(&msg, 0, sizeof(msg));
947 msg.msg_iovlen = nvecs;
949 msg.msg_namelen = sizeof(struct sockaddr_in);
951 rxi_Sendmsg(socket, &msg, 0);
955 #elif !defined(UKERNEL)
956 /* osi_NetSend is defined in afs/afs_osinet.c
957 * message receipt is done in rxk_input or rx_put.
962 * Copy an mblock to the contiguous area pointed to by cp.
963 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
964 * but it doesn't really.
965 * Returns the number of bytes not transferred.
966 * The message is NOT changed.
968 static int cpytoc(mp, off, len, cp)
970 register int off, len;
975 for (;mp && len > 0; mp = mp->b_cont) {
976 if (mp->b_datap->db_type != M_DATA) {
979 n = MIN(len, (mp->b_wptr - mp->b_rptr));
980 bcopy((char *)mp->b_rptr, cp, n);
988 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
989 * but it doesn't really.
990 * This sucks, anyway, do it like m_cpy.... below
992 static int cpytoiovec(mp, off, len, iovs, niovs)
995 register struct iovec *iovs;
997 register int m,n,o,t,i;
999 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1000 if (mp->b_datap->db_type != M_DATA) {
1003 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1009 t = iovs[i].iov_len;
1012 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1021 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1022 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1024 #if !defined(AFS_LINUX20_ENV)
1025 static int m_cpytoiovec(m, off, len, iovs, niovs)
1027 int off, len, niovs;
1028 struct iovec iovs[];
1031 unsigned int l1, l2, i, t;
1033 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1034 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1037 if (m->m_len <= off) {
1047 p1 = mtod(m, caddr_t)+off;
1048 l1 = m->m_len - off;
1050 p2 = iovs[0].iov_base;
1051 l2 = iovs[0].iov_len;
1054 t = MIN(l1, MIN(l2, (unsigned int)len));
1063 p1 = mtod(m, caddr_t);
1069 p2 = iovs[i].iov_base;
1070 l2 = iovs[i].iov_len;
1078 #endif /* AFS_SUN5_ENV */
1080 #if !defined(AFS_LINUX20_ENV)
1081 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1088 struct rx_packet *phandle;
1089 int hdr_len, data_len;
1093 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1099 #endif /*KERNEL && !UKERNEL*/
1102 /* send a response to a debug packet */
1104 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1108 register struct rx_packet *ap;
1111 struct rx_debugIn tin;
1113 struct rx_serverQueueEntry *np, *nqe;
1116 * Only respond to client-initiated Rx debug packets,
1117 * and clear the client flag in the response.
1119 if (ap->header.flags & RX_CLIENT_INITIATED) {
1120 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1121 rxi_EncodePacketHeader(ap);
1126 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1127 /* all done with packet, now set length to the truth, so we can
1128 * reuse this packet */
1129 rx_computelen(ap, ap->length);
1131 tin.type = ntohl(tin.type);
1132 tin.index = ntohl(tin.index);
1134 case RX_DEBUGI_GETSTATS: {
1135 struct rx_debugStats tstat;
1137 /* get basic stats */
1138 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1139 tstat.version = RX_DEBUGI_VERSION;
1140 #ifndef RX_ENABLE_LOCKS
1141 tstat.waitingForPackets = rx_waitingForPackets;
1143 tstat.nFreePackets = htonl(rx_nFreePackets);
1144 tstat.callsExecuted = htonl(rxi_nCalls);
1145 tstat.packetReclaims = htonl(rx_packetReclaims);
1146 tstat.usedFDs = CountFDs(64);
1147 tstat.nWaiting = htonl(rx_nWaiting);
1148 queue_Count( &rx_idleServerQueue, np, nqe,
1149 rx_serverQueueEntry, tstat.idleThreads);
1150 tstat.idleThreads = htonl(tstat.idleThreads);
1151 tl = sizeof(struct rx_debugStats) - ap->length;
1153 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1156 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1157 ap->length = sizeof(struct rx_debugStats);
1158 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1159 rx_computelen(ap, ap->length);
1164 case RX_DEBUGI_GETALLCONN:
1165 case RX_DEBUGI_GETCONN: {
1167 register struct rx_connection *tc;
1168 struct rx_call *tcall;
1169 struct rx_debugConn tconn;
1170 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1173 tl = sizeof(struct rx_debugConn) - ap->length;
1175 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1179 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1180 /* get N'th (maybe) "interesting" connection info */
1181 for(i=0;i<rx_hashTableSize;i++) {
1182 #if !defined(KERNEL)
1183 /* the time complexity of the algorithm used here
1184 * exponentially increses with the number of connections.
1186 #ifdef AFS_PTHREAD_ENV
1189 (void) IOMGR_Poll();
1192 MUTEX_ENTER(&rx_connHashTable_lock);
1193 /* We might be slightly out of step since we are not
1194 * locking each call, but this is only debugging output.
1196 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1197 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1198 tconn.host = tc->peer->host;
1199 tconn.port = tc->peer->port;
1200 tconn.cid = htonl(tc->cid);
1201 tconn.epoch = htonl(tc->epoch);
1202 tconn.serial = htonl(tc->serial);
1203 for(j=0;j<RX_MAXCALLS;j++) {
1204 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1205 if ((tcall=tc->call[j])) {
1206 tconn.callState[j] = tcall->state;
1207 tconn.callMode[j] = tcall->mode;
1208 tconn.callFlags[j] = tcall->flags;
1209 if (queue_IsNotEmpty(&tcall->rq))
1210 tconn.callOther[j] |= RX_OTHER_IN;
1211 if (queue_IsNotEmpty(&tcall->tq))
1212 tconn.callOther[j] |= RX_OTHER_OUT;
1214 else tconn.callState[j] = RX_STATE_NOTINIT;
1217 tconn.natMTU = htonl(tc->peer->natMTU);
1218 tconn.error = htonl(tc->error);
1219 tconn.flags = tc->flags;
1220 tconn.type = tc->type;
1221 tconn.securityIndex = tc->securityIndex;
1222 if (tc->securityObject) {
1223 RXS_GetStats (tc->securityObject, tc,
1225 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1226 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1229 DOHTONL(packetsReceived);
1230 DOHTONL(packetsSent);
1231 DOHTONL(bytesReceived);
1234 i<sizeof(tconn.secStats.spares)/sizeof(short);
1238 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1243 MUTEX_EXIT(&rx_connHashTable_lock);
1244 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1246 ap->length = sizeof(struct rx_debugConn);
1247 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1252 MUTEX_EXIT(&rx_connHashTable_lock);
1254 /* if we make it here, there are no interesting packets */
1255 tconn.cid = htonl(0xffffffff); /* means end */
1256 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1258 ap->length = sizeof(struct rx_debugConn);
1259 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1265 * Pass back all the peer structures we have available
1268 case RX_DEBUGI_GETPEER: {
1270 register struct rx_peer *tp;
1271 struct rx_debugPeer tpeer;
1274 tl = sizeof(struct rx_debugPeer) - ap->length;
1276 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1280 bzero ((char *)&tpeer, sizeof(tpeer));
1281 for(i=0;i<rx_hashTableSize;i++) {
1282 #if !defined(KERNEL)
1283 /* the time complexity of the algorithm used here
1284 * exponentially increses with the number of peers.
1286 * Yielding after processing each hash table entry
1287 * and dropping rx_peerHashTable_lock.
1288 * also increases the risk that we will miss a new
1289 * entry - but we are willing to live with this
1290 * limitation since this is meant for debugging only
1292 #ifdef AFS_PTHREAD_ENV
1295 (void) IOMGR_Poll();
1298 MUTEX_ENTER(&rx_peerHashTable_lock);
1299 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1300 if (tin.index-- <= 0) {
1301 tpeer.host = tp->host;
1302 tpeer.port = tp->port;
1303 tpeer.ifMTU = htons(tp->ifMTU);
1304 tpeer.idleWhen = htonl(tp->idleWhen);
1305 tpeer.refCount = htons(tp->refCount);
1306 tpeer.burstSize = tp->burstSize;
1307 tpeer.burst = tp->burst;
1308 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1309 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1310 tpeer.rtt = htonl(tp->rtt);
1311 tpeer.rtt_dev = htonl(tp->rtt_dev);
1312 tpeer.timeout.sec = htonl(tp->timeout.sec);
1313 tpeer.timeout.usec = htonl(tp->timeout.usec);
1314 tpeer.nSent = htonl(tp->nSent);
1315 tpeer.reSends = htonl(tp->reSends);
1316 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1317 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1318 tpeer.rateFlag = htonl(tp->rateFlag);
1319 tpeer.natMTU = htons(tp->natMTU);
1320 tpeer.maxMTU = htons(tp->maxMTU);
1321 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1322 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1323 tpeer.MTU = htons(tp->MTU);
1324 tpeer.cwind = htons(tp->cwind);
1325 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1326 tpeer.congestSeq = htons(tp->congestSeq);
1327 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1328 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1329 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1330 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1332 MUTEX_EXIT(&rx_peerHashTable_lock);
1333 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1335 ap->length = sizeof(struct rx_debugPeer);
1336 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1341 MUTEX_EXIT(&rx_peerHashTable_lock);
1343 /* if we make it here, there are no interesting packets */
1344 tpeer.host = htonl(0xffffffff); /* means end */
1345 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1347 ap->length = sizeof(struct rx_debugPeer);
1348 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1353 case RX_DEBUGI_RXSTATS: {
1357 tl = sizeof(rx_stats) - ap->length;
1359 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1363 /* Since its all int32s convert to network order with a loop. */
1364 MUTEX_ENTER(&rx_stats_mutex);
1365 s = (afs_int32 *)&rx_stats;
1366 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1367 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1370 ap->length = sizeof(rx_stats);
1371 MUTEX_EXIT(&rx_stats_mutex);
1372 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1378 /* error response packet */
1379 tin.type = htonl(RX_DEBUGI_BADTYPE);
1380 tin.index = tin.type;
1381 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1383 ap->length = sizeof(struct rx_debugIn);
1384 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1391 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1395 register struct rx_packet *ap;
1401 * Only respond to client-initiated version requests, and
1402 * clear that flag in the response.
1404 if (ap->header.flags & RX_CLIENT_INITIATED) {
1407 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1408 rxi_EncodePacketHeader(ap);
1409 bzero(buf, sizeof(buf));
1410 snprintf(buf, sizeof(buf), "%s", cml_version_number+4);
1411 rx_packetwrite(ap, 0, 65, buf);
1414 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1422 /* send a debug packet back to the sender */
1423 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1424 afs_int32 ahost, short aport, afs_int32 istack)
1426 struct sockaddr_in taddr;
1432 int waslocked = ISAFS_GLOCK();
1435 taddr.sin_family = AF_INET;
1436 taddr.sin_port = aport;
1437 taddr.sin_addr.s_addr = ahost;
1440 /* We need to trim the niovecs. */
1441 nbytes = apacket->length;
1442 for (i=1; i < apacket->niovecs; i++) {
1443 if (nbytes <= apacket->wirevec[i].iov_len) {
1444 savelen = apacket->wirevec[i].iov_len;
1445 saven = apacket->niovecs;
1446 apacket->wirevec[i].iov_len = nbytes;
1447 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1449 else nbytes -= apacket->wirevec[i].iov_len;
1453 if (waslocked) AFS_GUNLOCK();
1455 /* debug packets are not reliably delivered, hence the cast below. */
1456 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1457 apacket->length+RX_HEADER_SIZE, istack);
1459 if (waslocked) AFS_GLOCK();
1462 if (saven) { /* means we truncated the packet above. */
1463 apacket->wirevec[i-1].iov_len = savelen;
1464 apacket->niovecs = saven;
1469 /* Send the packet to appropriate destination for the specified
1470 * connection. The header is first encoded and placed in the packet.
1472 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1478 struct sockaddr_in addr;
1479 register struct rx_peer *peer = conn->peer;
1482 char deliveryType = 'S';
1484 /* The address we're sending the packet to */
1485 addr.sin_family = AF_INET;
1486 addr.sin_port = peer->port;
1487 addr.sin_addr.s_addr = peer->host;
1489 /* This stuff should be revamped, I think, so that most, if not
1490 * all, of the header stuff is always added here. We could
1491 * probably do away with the encode/decode routines. XXXXX */
1493 /* Stamp each packet with a unique serial number. The serial
1494 * number is maintained on a connection basis because some types
1495 * of security may be based on the serial number of the packet,
1496 * and security is handled on a per authenticated-connection
1498 /* Pre-increment, to guarantee no zero serial number; a zero
1499 * serial number means the packet was never sent. */
1500 MUTEX_ENTER(&conn->conn_data_lock);
1501 p->header.serial = ++conn->serial;
1502 MUTEX_EXIT(&conn->conn_data_lock);
1503 /* This is so we can adjust retransmit time-outs better in the face of
1504 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1506 if (p->firstSerial == 0) {
1507 p->firstSerial = p->header.serial;
1511 /* If an output tracer function is defined, call it with the packet and
1512 * network address. Note this function may modify its arguments. */
1513 if (rx_almostSent) {
1514 int drop = (*rx_almostSent) (p, &addr);
1515 /* drop packet if return value is non-zero? */
1516 if (drop) deliveryType = 'D'; /* Drop the packet */
1520 /* Get network byte order header */
1521 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1522 * touch ALL the fields */
1524 /* Send the packet out on the same socket that related packets are being
1526 socket = (conn->type == RX_CLIENT_CONNECTION
1527 ? rx_socket : conn->service->socket);
1530 /* Possibly drop this packet, for testing purposes */
1531 if ((deliveryType == 'D') ||
1532 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1533 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1534 deliveryType = 'D'; /* Drop the packet */
1537 deliveryType = 'S'; /* Send the packet */
1538 #endif /* RXDEBUG */
1540 /* Loop until the packet is sent. We'd prefer just to use a
1541 * blocking socket, but unfortunately the interface doesn't
1542 * allow us to have the socket block in send mode, and not
1543 * block in receive mode */
1546 waslocked = ISAFS_GLOCK();
1547 if (waslocked) AFS_GUNLOCK();
1549 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1550 p->length+RX_HEADER_SIZE, istack)){
1551 /* send failed, so let's hurry up the resend, eh? */
1552 MUTEX_ENTER(&rx_stats_mutex);
1553 rx_stats.netSendFailures++;
1554 MUTEX_EXIT(&rx_stats_mutex);
1555 p->retryTime = p->timeSent; /* resend it very soon */
1556 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1559 if (waslocked) AFS_GLOCK();
1564 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1565 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1566 peer->host, peer->port, p->header.serial, p->header.epoch,
1567 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1568 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1570 MUTEX_ENTER(&rx_stats_mutex);
1571 rx_stats.packetsSent[p->header.type-1]++;
1572 MUTEX_EXIT(&rx_stats_mutex);
1573 MUTEX_ENTER(&peer->peer_lock);
1574 hadd32(peer->bytesSent, p->length);
1575 MUTEX_EXIT(&peer->peer_lock);
1578 /* Send a list of packets to appropriate destination for the specified
1579 * connection. The headers are first encoded and placed in the packets.
1581 void rxi_SendPacketList(struct rx_connection * conn,
1582 struct rx_packet **list,
1586 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1589 struct sockaddr_in addr;
1590 register struct rx_peer *peer = conn->peer;
1592 struct rx_packet *p = NULL;
1593 struct iovec wirevec[RX_MAXIOVECS];
1597 struct rx_jumboHeader *jp;
1599 char deliveryType = 'S';
1601 /* The address we're sending the packet to */
1602 addr.sin_family = AF_INET;
1603 addr.sin_port = peer->port;
1604 addr.sin_addr.s_addr = peer->host;
1606 if (len+1 > RX_MAXIOVECS) {
1607 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1611 * Stamp the packets in this jumbogram with consecutive serial numbers
1613 MUTEX_ENTER(&conn->conn_data_lock);
1614 serial = conn->serial;
1615 conn->serial += len;
1616 MUTEX_EXIT(&conn->conn_data_lock);
1619 /* This stuff should be revamped, I think, so that most, if not
1620 * all, of the header stuff is always added here. We could
1621 * probably do away with the encode/decode routines. XXXXX */
1624 length = RX_HEADER_SIZE;
1625 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1626 wirevec[0].iov_len = RX_HEADER_SIZE;
1627 for (i = 0 ; i < len ; i++) {
1630 /* The whole 3.5 jumbogram scheme relies on packets fitting
1631 * in a single packet buffer. */
1632 if (p->niovecs > 2) {
1633 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1636 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1639 if (p->length != RX_JUMBOBUFFERSIZE) {
1640 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1642 p->header.flags |= RX_JUMBO_PACKET;
1643 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1644 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1646 wirevec[i+1].iov_len = p->length;
1647 length += p->length;
1649 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1651 /* Convert jumbo packet header to network byte order */
1652 temp = (afs_uint32)(p->header.flags) << 24;
1653 temp |= (afs_uint32)(p->header.spare);
1654 *(afs_uint32 *)jp = htonl(temp);
1656 jp = (struct rx_jumboHeader *)
1657 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1659 /* Stamp each packet with a unique serial number. The serial
1660 * number is maintained on a connection basis because some types
1661 * of security may be based on the serial number of the packet,
1662 * and security is handled on a per authenticated-connection
1664 /* Pre-increment, to guarantee no zero serial number; a zero
1665 * serial number means the packet was never sent. */
1666 p->header.serial = ++serial;
1667 /* This is so we can adjust retransmit time-outs better in the face of
1668 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1670 if (p->firstSerial == 0) {
1671 p->firstSerial = p->header.serial;
1675 /* If an output tracer function is defined, call it with the packet and
1676 * network address. Note this function may modify its arguments. */
1677 if (rx_almostSent) {
1678 int drop = (*rx_almostSent) (p, &addr);
1679 /* drop packet if return value is non-zero? */
1680 if (drop) deliveryType = 'D'; /* Drop the packet */
1684 /* Get network byte order header */
1685 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1686 * touch ALL the fields */
1689 /* Send the packet out on the same socket that related packets are being
1691 socket = (conn->type == RX_CLIENT_CONNECTION
1692 ? rx_socket : conn->service->socket);
1695 /* Possibly drop this packet, for testing purposes */
1696 if ((deliveryType == 'D') ||
1697 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1698 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1699 deliveryType = 'D'; /* Drop the packet */
1702 deliveryType = 'S'; /* Send the packet */
1703 #endif /* RXDEBUG */
1705 /* Loop until the packet is sent. We'd prefer just to use a
1706 * blocking socket, but unfortunately the interface doesn't
1707 * allow us to have the socket block in send mode, and not
1708 * block in receive mode */
1710 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1711 waslocked = ISAFS_GLOCK();
1712 if (!istack && waslocked) AFS_GUNLOCK();
1714 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1715 /* send failed, so let's hurry up the resend, eh? */
1716 MUTEX_ENTER(&rx_stats_mutex);
1717 rx_stats.netSendFailures++;
1718 MUTEX_EXIT(&rx_stats_mutex);
1719 for (i = 0 ; i < len ; i++) {
1721 p->retryTime = p->timeSent; /* resend it very soon */
1722 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1725 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1726 if (!istack && waslocked) AFS_GLOCK();
1731 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1732 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1733 peer->host, peer->port, p->header.serial, p->header.epoch,
1734 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1735 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1737 MUTEX_ENTER(&rx_stats_mutex);
1738 rx_stats.packetsSent[p->header.type-1]++;
1739 MUTEX_EXIT(&rx_stats_mutex);
1740 MUTEX_ENTER(&peer->peer_lock);
1741 hadd32(peer->bytesSent, p->length);
1742 MUTEX_EXIT(&peer->peer_lock);
1746 /* Send a "special" packet to the peer connection. If call is
1747 * specified, then the packet is directed to a specific call channel
1748 * associated with the connection, otherwise it is directed to the
1749 * connection only. Uses optionalPacket if it is supplied, rather than
1750 * allocating a new packet buffer. Nbytes is the length of the data
1751 * portion of the packet. If data is non-null, nbytes of data are
1752 * copied into the packet. Type is the type of the packet, as defined
1753 * in rx.h. Bug: there's a lot of duplication between this and other
1754 * routines. This needs to be cleaned up. */
1756 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1757 register struct rx_call *call;
1758 register struct rx_connection *conn;
1759 struct rx_packet *optionalPacket;
1764 /* Some of the following stuff should be common code for all
1765 * packet sends (it's repeated elsewhere) */
1766 register struct rx_packet *p;
1768 int savelen = 0, saven = 0;
1769 int channel, callNumber;
1771 channel = call->channel;
1772 callNumber = *call->callNumber;
1773 /* BUSY packets refer to the next call on this connection */
1774 if (type == RX_PACKET_TYPE_BUSY) {
1783 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1784 if (!p) osi_Panic("rxi_SendSpecial failure");
1791 p->header.serviceId = conn->serviceId;
1792 p->header.securityIndex = conn->securityIndex;
1793 p->header.cid = (conn->cid | channel);
1794 p->header.callNumber = callNumber;
1796 p->header.epoch = conn->epoch;
1797 p->header.type = type;
1798 p->header.flags = 0;
1799 if (conn->type == RX_CLIENT_CONNECTION)
1800 p->header.flags |= RX_CLIENT_INITIATED;
1802 rx_packetwrite(p, 0, nbytes, data);
1804 for (i=1; i < p->niovecs; i++) {
1805 if (nbytes <= p->wirevec[i].iov_len) {
1806 savelen = p->wirevec[i].iov_len;
1808 p->wirevec[i].iov_len = nbytes;
1809 p->niovecs = i+1; /* so condition fails because i == niovecs */
1811 else nbytes -= p->wirevec[i].iov_len;
1814 if (call) rxi_Send(call, p, istack);
1815 else rxi_SendPacket(conn, p, istack);
1816 if (saven) { /* means we truncated the packet above. We probably don't */
1817 /* really need to do this, but it seems safer this way, given that */
1818 /* sneaky optionalPacket... */
1819 p->wirevec[i-1].iov_len = savelen;
1822 if (!optionalPacket) rxi_FreePacket(p);
1823 return optionalPacket;
1827 /* Encode the packet's header (from the struct header in the packet to
1828 * the net byte order representation in the wire representation of the
1829 * packet, which is what is actually sent out on the wire) */
1830 void rxi_EncodePacketHeader(p)
1831 register struct rx_packet *p;
1833 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1835 bzero((char *)buf, RX_HEADER_SIZE);
1836 *buf++ = htonl(p->header.epoch);
1837 *buf++ = htonl(p->header.cid);
1838 *buf++ = htonl(p->header.callNumber);
1839 *buf++ = htonl(p->header.seq);
1840 *buf++ = htonl(p->header.serial);
1841 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1842 | (((afs_uint32)p->header.flags)<<16)
1843 | (p->header.userStatus<<8) | p->header.securityIndex);
1844 /* Note: top 16 bits of this next word were reserved */
1845 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1848 /* Decode the packet's header (from net byte order to a struct header) */
1849 void rxi_DecodePacketHeader(p)
1850 register struct rx_packet *p;
1852 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1855 p->header.epoch = ntohl(*buf++);
1856 p->header.cid = ntohl(*buf++);
1857 p->header.callNumber = ntohl(*buf++);
1858 p->header.seq = ntohl(*buf++);
1859 p->header.serial = ntohl(*buf++);
1860 temp = ntohl(*buf++);
1861 /* C will truncate byte fields to bytes for me */
1862 p->header.type = temp>>24;
1863 p->header.flags = temp>>16;
1864 p->header.userStatus = temp>>8;
1865 p->header.securityIndex = temp>>0;
1866 temp = ntohl(*buf++);
1867 p->header.serviceId = (temp&0xffff);
1868 p->header.spare = temp>>16;
1869 /* Note: top 16 bits of this last word are the security checksum */
1872 void rxi_PrepareSendPacket(call, p, last)
1873 register struct rx_call *call;
1874 register struct rx_packet *p;
1877 register struct rx_connection *conn = call->conn;
1879 ssize_t len; /* len must be a signed type; it can go negative */
1882 p->header.cid = (conn->cid | call->channel);
1883 p->header.serviceId = conn->serviceId;
1884 p->header.securityIndex = conn->securityIndex;
1885 p->header.callNumber = *call->callNumber;
1886 p->header.seq = call->tnext++;
1887 p->header.epoch = conn->epoch;
1888 p->header.type = RX_PACKET_TYPE_DATA;
1889 p->header.flags = 0;
1890 p->header.spare = 0;
1891 if (conn->type == RX_CLIENT_CONNECTION)
1892 p->header.flags |= RX_CLIENT_INITIATED;
1895 p->header.flags |= RX_LAST_PACKET;
1897 clock_Zero(&p->retryTime); /* Never yet transmitted */
1898 clock_Zero(&p->firstSent); /* Never yet transmitted */
1899 p->header.serial = 0; /* Another way of saying never transmitted... */
1902 /* Now that we're sure this is the last data on the call, make sure
1903 * that the "length" and the sum of the iov_lens matches. */
1904 len = p->length + call->conn->securityHeaderSize;
1906 for (i=1; i < p->niovecs && len > 0; i++) {
1907 len -= p->wirevec[i].iov_len;
1910 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1913 /* Free any extra elements in the wirevec */
1914 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1915 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1918 p->wirevec[i-1].iov_len += len;
1920 RXS_PreparePacket(conn->securityObject, call, p);
1923 /* Given an interface MTU size, calculate an adjusted MTU size that
1924 * will make efficient use of the RX buffers when the peer is sending
1925 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1926 int rxi_AdjustIfMTU(int mtu)
1931 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1932 if (mtu <= adjMTU) {
1939 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1940 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1943 /* Given an interface MTU size, and the peer's advertised max receive
1944 * size, calculate an adjisted maxMTU size that makes efficient use
1945 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1946 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1948 int maxMTU = mtu * rxi_nSendFrags;
1949 maxMTU = MIN(maxMTU, peerMaxMTU);
1950 return rxi_AdjustIfMTU(maxMTU);
1953 /* Given a packet size, figure out how many datagram packet will fit.
1954 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1955 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1956 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1957 int rxi_AdjustDgramPackets(int frags, int mtu)
1960 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1963 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1964 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1965 /* subtract the size of the first and last packets */
1966 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1970 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));