2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "../afs/param.h"
14 #include <afs/param.h>
21 #include "../afs/sysincludes.h"
22 #include "../afs/afsincludes.h"
23 #include "../rx/rx_kcommon.h"
24 #include "../rx/rx_clock.h"
25 #include "../rx/rx_queue.h"
26 #include "../rx/rx_packet.h"
27 #else /* defined(UKERNEL) */
28 #include "../h/types.h"
29 #ifndef AFS_LINUX20_ENV
30 #include "../h/systm.h"
32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
33 #include "../afs/sysincludes.h"
35 #include "../h/socket.h"
36 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
37 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
38 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
40 #include "../h/mbuf.h"
42 #include "../netinet/in.h"
43 #include "../afs/afs_osi.h"
44 #include "../rx/rx_kmutex.h"
45 #include "../rx/rx_clock.h"
46 #include "../rx/rx_queue.h"
48 #include <sys/sysmacros.h>
50 #include "../rx/rx_packet.h"
51 #endif /* defined(UKERNEL) */
52 #include "../rx/rx_globals.h"
54 #include "sys/types.h"
57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
61 #include <sys/socket.h>
62 #include <netinet/in.h>
63 #endif /* AFS_NT40_ENV */
64 #include "rx_xmit_nt.h"
67 #include <sys/socket.h>
68 #include <netinet/in.h>
74 #include <sys/sysmacros.h>
76 #include "rx_packet.h"
77 #include "rx_globals.h"
79 #include "rx_internal.h"
93 /* rxdb_fileID is used to identify the lock location, along with line#. */
94 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
95 #endif /* RX_LOCKS_DB */
96 struct rx_packet *rx_mallocedP = 0;
98 extern char cml_version_number[];
99 extern int (*rx_almostSent)();
101 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
102 afs_int32 ahost, short aport, afs_int32 istack);
104 /* some rules about packets:
105 * 1. When a packet is allocated, the final iov_buf contains room for
106 * a security trailer, but iov_len masks that fact. If the security
107 * package wants to add the trailer, it may do so, and then extend
108 * iov_len appropriately. For this reason, packet's niovecs and
109 * iov_len fields should be accurate before calling PreparePacket.
113 * all packet buffers (iov_base) are integral multiples of
115 * offset is an integral multiple of the word size.
117 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
121 for (l=0, i=1; i< packet->niovecs ; i++ ) {
122 if (l + packet->wirevec[i].iov_len > offset) {
123 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
125 l += packet->wirevec[i].iov_len;
132 * all packet buffers (iov_base) are integral multiples of the word size.
133 * offset is an integral multiple of the word size.
135 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
139 for (l=0, i=1; i< packet->niovecs ; i++ ) {
140 if (l + packet->wirevec[i].iov_len > offset) {
141 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
145 l += packet->wirevec[i].iov_len;
152 * all packet buffers (iov_base) are integral multiples of the
154 * offset is an integral multiple of the word size.
156 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
158 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
159 int resid, char *out)
161 unsigned int i, j, l, r;
162 for (l=0, i=1; i< packet->niovecs ; i++ ) {
163 if (l + packet->wirevec[i].iov_len > offset) {
166 l += packet->wirevec[i].iov_len;
169 /* i is the iovec which contains the first little bit of data in which we
170 * are interested. l is the total length of everything prior to this iovec.
171 * j is the number of bytes we can safely copy out of this iovec.
174 while ((resid > 0) && (i < packet->niovecs)) {
175 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
176 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
178 l += packet->wirevec[i].iov_len;
182 return (resid ? (r - resid) : r);
187 * all packet buffers (iov_base) are integral multiples of the
189 * offset is an integral multiple of the word size.
191 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
197 for (l=0, i=1; i < packet->niovecs; i++ ) {
198 if (l + packet->wirevec[i].iov_len > offset) {
201 l += packet->wirevec[i].iov_len;
204 /* i is the iovec which contains the first little bit of data in which we
205 * are interested. l is the total length of everything prior to this iovec.
206 * j is the number of bytes we can safely copy out of this iovec.
209 while ((resid > 0) && (i < RX_MAXWVECS)) {
210 if (i >= packet->niovecs)
211 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
214 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
215 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
218 l += packet->wirevec[i].iov_len;
222 return (resid ? (r - resid) : r);
225 static struct rx_packet * allocCBuf(int class)
231 MUTEX_ENTER(&rx_freePktQ_lock);
234 if (rxi_OverQuota(class)) {
236 rxi_NeedMorePackets = TRUE;
237 MUTEX_ENTER(&rx_stats_mutex);
239 case RX_PACKET_CLASS_RECEIVE:
240 rx_stats.receivePktAllocFailures++;
242 case RX_PACKET_CLASS_SEND:
243 rx_stats.sendPktAllocFailures++;
245 case RX_PACKET_CLASS_SPECIAL:
246 rx_stats.specialPktAllocFailures++;
248 case RX_PACKET_CLASS_RECV_CBUF:
249 rx_stats.receiveCbufPktAllocFailures++;
251 case RX_PACKET_CLASS_SEND_CBUF:
252 rx_stats.sendCbufPktAllocFailures++;
255 MUTEX_EXIT(&rx_stats_mutex);
259 if (queue_IsEmpty(&rx_freePacketQueue)) {
261 rxi_NeedMorePackets = TRUE;
265 if (queue_IsEmpty(&rx_freePacketQueue)) {
266 rxi_MorePacketsNoLock(rx_initSendWindow);
271 c = queue_First(&rx_freePacketQueue, rx_packet);
273 if (!(c->flags & RX_PKTFLAG_FREE))
274 osi_Panic("rxi_AllocPacket: packet not free\n");
275 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
281 MUTEX_EXIT(&rx_freePktQ_lock);
288 * Free a packet currently used as a continuation buffer
290 void rxi_freeCBuf(struct rx_packet *c)
295 MUTEX_ENTER(&rx_freePktQ_lock);
297 rxi_FreePacketNoLock(c);
298 /* Wakeup anyone waiting for packets */
301 MUTEX_EXIT(&rx_freePktQ_lock);
305 /* this one is kind of awful.
306 * In rxkad, the packet has been all shortened, and everything, ready for
307 * sending. All of a sudden, we discover we need some of that space back.
308 * This isn't terribly general, because it knows that the packets are only
309 * rounded up to the EBS (userdata + security header).
311 int rxi_RoundUpPacket(p, nb)
312 struct rx_packet * p;
317 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
318 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
319 p->wirevec[i].iov_len += nb;
324 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
325 p->wirevec[i].iov_len += nb;
332 /* get sufficient space to store nb bytes of data (or more), and hook
333 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
334 * returns the number of bytes >0 which it failed to come up with.
335 * Don't need to worry about locking on packet, since only
336 * one thread can manipulate one at a time. Locking on continution
337 * packets is handled by allocCBuf */
338 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
339 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
343 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
344 register struct rx_packet *cb;
345 if ((cb = allocCBuf(class))) {
346 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
347 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
348 nb -= RX_CBUFFERSIZE;
349 p->length += RX_CBUFFERSIZE;
358 /* Add more packet buffers */
359 void rxi_MorePackets(int apackets)
361 struct rx_packet *p, *e;
365 getme = apackets * sizeof(struct rx_packet);
366 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
368 PIN(p, getme); /* XXXXX */
369 memset((char *)p, 0, getme);
372 MUTEX_ENTER(&rx_freePktQ_lock);
374 for (e = p + apackets; p<e; p++) {
375 p->wirevec[0].iov_base = (char *) (p->wirehead);
376 p->wirevec[0].iov_len = RX_HEADER_SIZE;
377 p->wirevec[1].iov_base = (char *) (p->localdata);
378 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
379 p->flags |= RX_PKTFLAG_FREE;
382 queue_Append(&rx_freePacketQueue, p);
384 rx_nFreePackets += apackets;
385 rxi_NeedMorePackets = FALSE;
389 MUTEX_EXIT(&rx_freePktQ_lock);
394 /* Add more packet buffers */
395 void rxi_MorePacketsNoLock(int apackets)
397 struct rx_packet *p, *e;
400 /* allocate enough packets that 1/4 of the packets will be able
401 * to hold maximal amounts of data */
402 apackets += (apackets/4)
403 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
404 getme = apackets * sizeof(struct rx_packet);
405 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
407 memset((char *)p, 0, getme);
409 for (e = p + apackets; p<e; p++) {
410 p->wirevec[0].iov_base = (char *) (p->wirehead);
411 p->wirevec[0].iov_len = RX_HEADER_SIZE;
412 p->wirevec[1].iov_base = (char *) (p->localdata);
413 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
414 p->flags |= RX_PKTFLAG_FREE;
417 queue_Append(&rx_freePacketQueue, p);
419 rx_nFreePackets += apackets;
420 rxi_NeedMorePackets = FALSE;
425 void rxi_FreeAllPackets(void)
427 /* must be called at proper interrupt level, etcetera */
428 /* MTUXXX need to free all Packets */
429 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
430 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
433 /* Allocate more packets iff we need more continuation buffers */
434 /* In kernel, can't page in memory with interrupts disabled, so we
435 * don't use the event mechanism. */
436 void rx_CheckPackets()
438 if (rxi_NeedMorePackets) {
439 rxi_MorePackets(rx_initSendWindow);
443 /* In the packet freeing routine below, the assumption is that
444 we want all of the packets to be used equally frequently, so that we
445 don't get packet buffers paging out. It would be just as valid to
446 assume that we DO want them to page out if not many are being used.
447 In any event, we assume the former, and append the packets to the end
449 /* This explanation is bogus. The free list doesn't remain in any kind of
450 useful order for afs_int32: the packets in use get pretty much randomly scattered
451 across all the pages. In order to permit unused {packets,bufs} to page out, they
452 must be stored so that packets which are adjacent in memory are adjacent in the
453 free list. An array springs rapidly to mind.
456 /* Actually free the packet p. */
457 void rxi_FreePacketNoLock(struct rx_packet *p)
459 dpf(("Free %x\n", p));
461 if (p->flags & RX_PKTFLAG_FREE)
462 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
464 p->flags |= RX_PKTFLAG_FREE;
465 queue_Append(&rx_freePacketQueue, p);
468 int rxi_FreeDataBufsNoLock(p, first)
469 struct rx_packet * p;
472 struct iovec *iov, *end;
474 if (first != 1) /* MTUXXX */
475 osi_Panic("FreeDataBufs 1: first must be 1");
476 iov = &p->wirevec[1];
477 end = iov + (p->niovecs-1);
478 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
479 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
480 for (iov++ ; iov < end ; iov++) {
482 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
483 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
491 int rxi_nBadIovecs = 0;
493 /* rxi_RestoreDataBufs
495 * Restore the correct sizes to the iovecs. Called when reusing a packet
496 * for reading off the wire.
498 void rxi_RestoreDataBufs(struct rx_packet *p)
501 struct iovec *iov = &p->wirevec[2];
503 p->wirevec[0].iov_base = (char *) (p->wirehead);
504 p->wirevec[0].iov_len = RX_HEADER_SIZE;
505 p->wirevec[1].iov_base = (char *) (p->localdata);
506 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
508 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
509 if (!iov->iov_base) {
514 iov->iov_len = RX_CBUFFERSIZE;
518 int rxi_TrimDataBufs(p, first)
519 struct rx_packet * p;
523 struct iovec *iov, *end;
527 osi_Panic("TrimDataBufs 1: first must be 1");
529 /* Skip over continuation buffers containing message data */
530 iov = &p->wirevec[2];
531 end = iov + (p->niovecs-2);
532 length = p->length - p->wirevec[1].iov_len;
533 for (; iov < end && length > 0 ; iov++) {
535 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
536 length -= iov->iov_len;
539 /* iov now points to the first empty data buffer. */
544 MUTEX_ENTER(&rx_freePktQ_lock);
546 for (; iov < end ; iov++) {
548 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
549 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
554 MUTEX_EXIT(&rx_freePktQ_lock);
560 /* Free the packet p. P is assumed not to be on any queue, i.e.
561 * remove it yourself first if you call this routine. */
562 void rxi_FreePacket(struct rx_packet *p)
567 MUTEX_ENTER(&rx_freePktQ_lock);
569 rxi_FreeDataBufsNoLock(p,1);
570 rxi_FreePacketNoLock(p);
571 /* Wakeup anyone waiting for packets */
574 MUTEX_EXIT(&rx_freePktQ_lock);
579 /* rxi_AllocPacket sets up p->length so it reflects the number of
580 * bytes in the packet at this point, **not including** the header.
581 * The header is absolutely necessary, besides, this is the way the
582 * length field is usually used */
583 struct rx_packet *rxi_AllocPacketNoLock(class)
586 register struct rx_packet *p;
589 if (rxi_OverQuota(class)) {
590 rxi_NeedMorePackets = TRUE;
591 MUTEX_ENTER(&rx_stats_mutex);
593 case RX_PACKET_CLASS_RECEIVE:
594 rx_stats.receivePktAllocFailures++;
596 case RX_PACKET_CLASS_SEND:
597 rx_stats.sendPktAllocFailures++;
599 case RX_PACKET_CLASS_SPECIAL:
600 rx_stats.specialPktAllocFailures++;
602 case RX_PACKET_CLASS_RECV_CBUF:
603 rx_stats.receiveCbufPktAllocFailures++;
605 case RX_PACKET_CLASS_SEND_CBUF:
606 rx_stats.sendCbufPktAllocFailures++;
609 MUTEX_EXIT(&rx_stats_mutex);
610 return (struct rx_packet *) 0;
614 MUTEX_ENTER(&rx_stats_mutex);
615 rx_stats.packetRequests++;
616 MUTEX_EXIT(&rx_stats_mutex);
619 if (queue_IsEmpty(&rx_freePacketQueue))
620 osi_Panic("rxi_AllocPacket error");
622 if (queue_IsEmpty(&rx_freePacketQueue))
623 rxi_MorePacketsNoLock(rx_initSendWindow);
627 p = queue_First(&rx_freePacketQueue, rx_packet);
628 if (!(p->flags & RX_PKTFLAG_FREE))
629 osi_Panic("rxi_AllocPacket: packet not free\n");
631 dpf(("Alloc %x, class %d\n", p, class));
634 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
637 /* have to do this here because rx_FlushWrite fiddles with the iovs in
638 * order to truncate outbound packets. In the near future, may need
639 * to allocate bufs from a static pool here, and/or in AllocSendPacket
641 p->wirevec[0].iov_base = (char *) (p->wirehead);
642 p->wirevec[0].iov_len = RX_HEADER_SIZE;
643 p->wirevec[1].iov_base = (char *) (p->localdata);
644 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
646 p->length = RX_FIRSTBUFFERSIZE;
650 struct rx_packet *rxi_AllocPacket(class)
653 register struct rx_packet *p;
655 MUTEX_ENTER(&rx_freePktQ_lock);
656 p = rxi_AllocPacketNoLock(class);
657 MUTEX_EXIT(&rx_freePktQ_lock);
661 /* This guy comes up with as many buffers as it {takes,can get} given
662 * the MTU for this call. It also sets the packet length before
663 * returning. caution: this is often called at NETPRI
664 * Called with call locked.
666 struct rx_packet *rxi_AllocSendPacket(call, want)
667 register struct rx_call *call;
670 register struct rx_packet *p = (struct rx_packet *) 0;
672 register unsigned delta;
675 mud = call->MTU - RX_HEADER_SIZE;
676 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
677 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
679 while (!(call->error)) {
680 MUTEX_ENTER(&rx_freePktQ_lock);
681 /* if an error occurred, or we get the packet we want, we're done */
682 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
683 MUTEX_EXIT(&rx_freePktQ_lock);
686 want = MIN(want, mud);
688 if ((unsigned) want > p->length)
689 (void) rxi_AllocDataBuf(p, (want - p->length),
690 RX_PACKET_CLASS_SEND_CBUF);
692 if ((unsigned) p->length > mud)
695 if (delta >= p->length) {
704 /* no error occurred, and we didn't get a packet, so we sleep.
705 * At this point, we assume that packets will be returned
706 * sooner or later, as packets are acknowledged, and so we
709 call->flags |= RX_CALL_WAIT_PACKETS;
710 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
711 MUTEX_EXIT(&call->lock);
712 rx_waitingForPackets = 1;
714 #ifdef RX_ENABLE_LOCKS
715 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
717 osi_rxSleep(&rx_waitingForPackets);
719 MUTEX_EXIT(&rx_freePktQ_lock);
720 MUTEX_ENTER(&call->lock);
721 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
722 call->flags &= ~RX_CALL_WAIT_PACKETS;
731 /* count the number of used FDs */
732 static int CountFDs(amax)
735 register int i, code;
739 for(i=0;i<amax;i++) {
740 code = fstat(i, &tstat);
741 if (code == 0) count++;
748 #define CountFDs(amax) amax
752 #if !defined(KERNEL) || defined(UKERNEL)
754 /* This function reads a single packet from the interface into the
755 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
756 * (host,port) of the sender are stored in the supplied variables, and
757 * the data length of the packet is stored in the packet structure.
758 * The header is decoded. */
759 int rxi_ReadPacket(socket, p, host, port)
761 register struct rx_packet *p;
765 struct sockaddr_in from;
768 register afs_int32 tlen, savelen;
770 rx_computelen(p, tlen);
771 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
773 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
774 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
775 * it once in order to avoid races. */
778 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
786 /* Extend the last iovec for padding, it's just to make sure that the
787 * read doesn't return more data than we expect, and is done to get around
788 * our problems caused by the lack of a length field in the rx header.
789 * Use the extra buffer that follows the localdata in each packet
791 savelen = p->wirevec[p->niovecs-1].iov_len;
792 p->wirevec[p->niovecs-1].iov_len += RX_EXTRABUFFERSIZE;
794 memset((char *)&msg, 0, sizeof(msg));
795 msg.msg_name = (char *) &from;
796 msg.msg_namelen = sizeof(struct sockaddr_in);
797 msg.msg_iov = p->wirevec;
798 msg.msg_iovlen = p->niovecs;
799 nbytes = rxi_Recvmsg(socket, &msg, 0);
801 /* restore the vec to its correct state */
802 p->wirevec[p->niovecs-1].iov_len = savelen;
804 p->length = (nbytes - RX_HEADER_SIZE);
805 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
807 rxi_MorePackets(rx_initSendWindow);
809 else if (nbytes < 0 && errno == EWOULDBLOCK) {
810 MUTEX_ENTER(&rx_stats_mutex);
811 rx_stats.noPacketOnRead++;
812 MUTEX_EXIT(&rx_stats_mutex);
816 MUTEX_ENTER(&rx_stats_mutex);
817 rx_stats.bogusPacketOnRead++;
818 rx_stats.bogusHost = from.sin_addr.s_addr;
819 MUTEX_EXIT(&rx_stats_mutex);
820 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
821 from.sin_port,nbytes));
826 /* Extract packet header. */
827 rxi_DecodePacketHeader(p);
829 *host = from.sin_addr.s_addr;
830 *port = from.sin_port;
831 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
832 struct rx_peer *peer;
833 MUTEX_ENTER(&rx_stats_mutex);
834 rx_stats.packetsRead[p->header.type-1]++;
835 MUTEX_EXIT(&rx_stats_mutex);
837 * Try to look up this peer structure. If it doesn't exist,
838 * don't create a new one -
839 * we don't keep count of the bytes sent/received if a peer
840 * structure doesn't already exist.
842 * The peer/connection cleanup code assumes that there is 1 peer
843 * per connection. If we actually created a peer structure here
844 * and this packet was an rxdebug packet, the peer structure would
845 * never be cleaned up.
847 peer = rxi_FindPeer(*host, *port, 0, 0);
849 MUTEX_ENTER(&peer->peer_lock);
850 hadd32(peer->bytesReceived, p->length);
851 MUTEX_EXIT(&peer->peer_lock);
855 /* Free any empty packet buffers at the end of this packet */
856 rxi_TrimDataBufs(p, 1);
862 #endif /* !KERNEL || UKERNEL */
864 /* This function splits off the first packet in a jumbo packet.
865 * As of AFS 3.5, jumbograms contain more than one fixed size
866 * packet, and the RX_JUMBO_PACKET flag is set in all but the
867 * last packet header. All packets (except the last) are padded to
868 * fall on RX_CBUFFERSIZE boundaries.
869 * HACK: We store the length of the first n-1 packets in the
870 * last two pad bytes. */
872 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
873 register struct rx_packet *p;
878 struct rx_packet *np;
879 struct rx_jumboHeader *jp;
885 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
886 * bytes in length. All but the first packet are preceded by
887 * an abbreviated four byte header. The length of the last packet
888 * is calculated from the size of the jumbogram. */
889 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
891 if ((int)p->length < length) {
892 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
895 niov = p->niovecs - 2;
897 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
900 iov = &p->wirevec[2];
901 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
903 /* Get a pointer to the abbreviated packet header */
904 jp = (struct rx_jumboHeader *)
905 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
907 /* Set up the iovecs for the next packet */
908 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
909 np->wirevec[0].iov_len = sizeof(struct rx_header);
910 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
911 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
912 np->niovecs = niov+1;
913 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
914 np->wirevec[i] = *iov;
916 np->length = p->length - length;
917 p->length = RX_JUMBOBUFFERSIZE;
920 /* Convert the jumbo packet header to host byte order */
921 temp = ntohl(*(afs_uint32 *)jp);
922 jp->flags = (u_char)(temp >> 24);
923 jp->cksum = (u_short)(temp);
925 /* Fill in the packet header */
926 np->header = p->header;
927 np->header.serial = p->header.serial + 1;
928 np->header.seq = p->header.seq + 1;
929 np->header.flags = jp->flags;
930 np->header.spare = jp->cksum;
936 /* Send a udp datagram */
937 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
947 memset(&msg, 0, sizeof(msg));
949 msg.msg_iovlen = nvecs;
951 msg.msg_namelen = sizeof(struct sockaddr_in);
953 rxi_Sendmsg(socket, &msg, 0);
957 #elif !defined(UKERNEL)
958 /* osi_NetSend is defined in afs/afs_osinet.c
959 * message receipt is done in rxk_input or rx_put.
964 * Copy an mblock to the contiguous area pointed to by cp.
965 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
966 * but it doesn't really.
967 * Returns the number of bytes not transferred.
968 * The message is NOT changed.
970 static int cpytoc(mp, off, len, cp)
972 register int off, len;
977 for (;mp && len > 0; mp = mp->b_cont) {
978 if (mp->b_datap->db_type != M_DATA) {
981 n = MIN(len, (mp->b_wptr - mp->b_rptr));
982 memcpy(cp, (char *)mp->b_rptr, n);
990 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
991 * but it doesn't really.
992 * This sucks, anyway, do it like m_cpy.... below
994 static int cpytoiovec(mp, off, len, iovs, niovs)
997 register struct iovec *iovs;
999 register int m,n,o,t,i;
1001 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1002 if (mp->b_datap->db_type != M_DATA) {
1005 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1011 t = iovs[i].iov_len;
1014 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1023 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1024 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1026 #if !defined(AFS_LINUX20_ENV)
1027 static int m_cpytoiovec(m, off, len, iovs, niovs)
1029 int off, len, niovs;
1030 struct iovec iovs[];
1033 unsigned int l1, l2, i, t;
1035 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1036 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1039 if (m->m_len <= off) {
1049 p1 = mtod(m, caddr_t)+off;
1050 l1 = m->m_len - off;
1052 p2 = iovs[0].iov_base;
1053 l2 = iovs[0].iov_len;
1056 t = MIN(l1, MIN(l2, (unsigned int)len));
1065 p1 = mtod(m, caddr_t);
1071 p2 = iovs[i].iov_base;
1072 l2 = iovs[i].iov_len;
1080 #endif /* AFS_SUN5_ENV */
1082 #if !defined(AFS_LINUX20_ENV)
1083 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1090 struct rx_packet *phandle;
1091 int hdr_len, data_len;
1095 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1101 #endif /*KERNEL && !UKERNEL*/
1104 /* send a response to a debug packet */
1106 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1110 register struct rx_packet *ap;
1113 struct rx_debugIn tin;
1115 struct rx_serverQueueEntry *np, *nqe;
1118 * Only respond to client-initiated Rx debug packets,
1119 * and clear the client flag in the response.
1121 if (ap->header.flags & RX_CLIENT_INITIATED) {
1122 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1123 rxi_EncodePacketHeader(ap);
1128 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1129 /* all done with packet, now set length to the truth, so we can
1130 * reuse this packet */
1131 rx_computelen(ap, ap->length);
1133 tin.type = ntohl(tin.type);
1134 tin.index = ntohl(tin.index);
1136 case RX_DEBUGI_GETSTATS: {
1137 struct rx_debugStats tstat;
1139 /* get basic stats */
1140 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1141 tstat.version = RX_DEBUGI_VERSION;
1142 #ifndef RX_ENABLE_LOCKS
1143 tstat.waitingForPackets = rx_waitingForPackets;
1145 tstat.nFreePackets = htonl(rx_nFreePackets);
1146 tstat.callsExecuted = htonl(rxi_nCalls);
1147 tstat.packetReclaims = htonl(rx_packetReclaims);
1148 tstat.usedFDs = CountFDs(64);
1149 tstat.nWaiting = htonl(rx_nWaiting);
1150 queue_Count( &rx_idleServerQueue, np, nqe,
1151 rx_serverQueueEntry, tstat.idleThreads);
1152 tstat.idleThreads = htonl(tstat.idleThreads);
1153 tl = sizeof(struct rx_debugStats) - ap->length;
1155 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1158 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1159 ap->length = sizeof(struct rx_debugStats);
1160 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1161 rx_computelen(ap, ap->length);
1166 case RX_DEBUGI_GETALLCONN:
1167 case RX_DEBUGI_GETCONN: {
1169 register struct rx_connection *tc;
1170 struct rx_call *tcall;
1171 struct rx_debugConn tconn;
1172 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1175 tl = sizeof(struct rx_debugConn) - ap->length;
1177 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1181 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1182 /* get N'th (maybe) "interesting" connection info */
1183 for(i=0;i<rx_hashTableSize;i++) {
1184 #if !defined(KERNEL)
1185 /* the time complexity of the algorithm used here
1186 * exponentially increses with the number of connections.
1188 #ifdef AFS_PTHREAD_ENV
1191 (void) IOMGR_Poll();
1194 MUTEX_ENTER(&rx_connHashTable_lock);
1195 /* We might be slightly out of step since we are not
1196 * locking each call, but this is only debugging output.
1198 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1199 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1200 tconn.host = tc->peer->host;
1201 tconn.port = tc->peer->port;
1202 tconn.cid = htonl(tc->cid);
1203 tconn.epoch = htonl(tc->epoch);
1204 tconn.serial = htonl(tc->serial);
1205 for(j=0;j<RX_MAXCALLS;j++) {
1206 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1207 if ((tcall=tc->call[j])) {
1208 tconn.callState[j] = tcall->state;
1209 tconn.callMode[j] = tcall->mode;
1210 tconn.callFlags[j] = tcall->flags;
1211 if (queue_IsNotEmpty(&tcall->rq))
1212 tconn.callOther[j] |= RX_OTHER_IN;
1213 if (queue_IsNotEmpty(&tcall->tq))
1214 tconn.callOther[j] |= RX_OTHER_OUT;
1216 else tconn.callState[j] = RX_STATE_NOTINIT;
1219 tconn.natMTU = htonl(tc->peer->natMTU);
1220 tconn.error = htonl(tc->error);
1221 tconn.flags = tc->flags;
1222 tconn.type = tc->type;
1223 tconn.securityIndex = tc->securityIndex;
1224 if (tc->securityObject) {
1225 RXS_GetStats (tc->securityObject, tc,
1227 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1228 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1231 DOHTONL(packetsReceived);
1232 DOHTONL(packetsSent);
1233 DOHTONL(bytesReceived);
1236 i<sizeof(tconn.secStats.spares)/sizeof(short);
1240 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1245 MUTEX_EXIT(&rx_connHashTable_lock);
1246 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1248 ap->length = sizeof(struct rx_debugConn);
1249 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1254 MUTEX_EXIT(&rx_connHashTable_lock);
1256 /* if we make it here, there are no interesting packets */
1257 tconn.cid = htonl(0xffffffff); /* means end */
1258 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1260 ap->length = sizeof(struct rx_debugConn);
1261 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1267 * Pass back all the peer structures we have available
1270 case RX_DEBUGI_GETPEER: {
1272 register struct rx_peer *tp;
1273 struct rx_debugPeer tpeer;
1276 tl = sizeof(struct rx_debugPeer) - ap->length;
1278 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1282 memset((char *)&tpeer, 0, sizeof(tpeer));
1283 for(i=0;i<rx_hashTableSize;i++) {
1284 #if !defined(KERNEL)
1285 /* the time complexity of the algorithm used here
1286 * exponentially increses with the number of peers.
1288 * Yielding after processing each hash table entry
1289 * and dropping rx_peerHashTable_lock.
1290 * also increases the risk that we will miss a new
1291 * entry - but we are willing to live with this
1292 * limitation since this is meant for debugging only
1294 #ifdef AFS_PTHREAD_ENV
1297 (void) IOMGR_Poll();
1300 MUTEX_ENTER(&rx_peerHashTable_lock);
1301 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1302 if (tin.index-- <= 0) {
1303 tpeer.host = tp->host;
1304 tpeer.port = tp->port;
1305 tpeer.ifMTU = htons(tp->ifMTU);
1306 tpeer.idleWhen = htonl(tp->idleWhen);
1307 tpeer.refCount = htons(tp->refCount);
1308 tpeer.burstSize = tp->burstSize;
1309 tpeer.burst = tp->burst;
1310 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1311 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1312 tpeer.rtt = htonl(tp->rtt);
1313 tpeer.rtt_dev = htonl(tp->rtt_dev);
1314 tpeer.timeout.sec = htonl(tp->timeout.sec);
1315 tpeer.timeout.usec = htonl(tp->timeout.usec);
1316 tpeer.nSent = htonl(tp->nSent);
1317 tpeer.reSends = htonl(tp->reSends);
1318 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1319 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1320 tpeer.rateFlag = htonl(tp->rateFlag);
1321 tpeer.natMTU = htons(tp->natMTU);
1322 tpeer.maxMTU = htons(tp->maxMTU);
1323 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1324 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1325 tpeer.MTU = htons(tp->MTU);
1326 tpeer.cwind = htons(tp->cwind);
1327 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1328 tpeer.congestSeq = htons(tp->congestSeq);
1329 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1330 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1331 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1332 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1334 MUTEX_EXIT(&rx_peerHashTable_lock);
1335 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1337 ap->length = sizeof(struct rx_debugPeer);
1338 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1343 MUTEX_EXIT(&rx_peerHashTable_lock);
1345 /* if we make it here, there are no interesting packets */
1346 tpeer.host = htonl(0xffffffff); /* means end */
1347 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1349 ap->length = sizeof(struct rx_debugPeer);
1350 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1355 case RX_DEBUGI_RXSTATS: {
1359 tl = sizeof(rx_stats) - ap->length;
1361 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1365 /* Since its all int32s convert to network order with a loop. */
1366 MUTEX_ENTER(&rx_stats_mutex);
1367 s = (afs_int32 *)&rx_stats;
1368 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1369 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1372 ap->length = sizeof(rx_stats);
1373 MUTEX_EXIT(&rx_stats_mutex);
1374 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1380 /* error response packet */
1381 tin.type = htonl(RX_DEBUGI_BADTYPE);
1382 tin.index = tin.type;
1383 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1385 ap->length = sizeof(struct rx_debugIn);
1386 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1393 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1397 register struct rx_packet *ap;
1403 * Only respond to client-initiated version requests, and
1404 * clear that flag in the response.
1406 if (ap->header.flags & RX_CLIENT_INITIATED) {
1409 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1410 rxi_EncodePacketHeader(ap);
1411 memset(buf, 0, sizeof(buf));
1412 strncpy(buf, cml_version_number+4, sizeof(buf)-1);
1413 rx_packetwrite(ap, 0, 65, buf);
1416 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1424 /* send a debug packet back to the sender */
1425 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1426 afs_int32 ahost, short aport, afs_int32 istack)
1428 struct sockaddr_in taddr;
1434 int waslocked = ISAFS_GLOCK();
1437 taddr.sin_family = AF_INET;
1438 taddr.sin_port = aport;
1439 taddr.sin_addr.s_addr = ahost;
1440 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1441 taddr.sin_len = sizeof(struct sockaddr_in);
1444 /* We need to trim the niovecs. */
1445 nbytes = apacket->length;
1446 for (i=1; i < apacket->niovecs; i++) {
1447 if (nbytes <= apacket->wirevec[i].iov_len) {
1448 savelen = apacket->wirevec[i].iov_len;
1449 saven = apacket->niovecs;
1450 apacket->wirevec[i].iov_len = nbytes;
1451 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1453 else nbytes -= apacket->wirevec[i].iov_len;
1457 if (waslocked) AFS_GUNLOCK();
1459 /* debug packets are not reliably delivered, hence the cast below. */
1460 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1461 apacket->length+RX_HEADER_SIZE, istack);
1463 if (waslocked) AFS_GLOCK();
1466 if (saven) { /* means we truncated the packet above. */
1467 apacket->wirevec[i-1].iov_len = savelen;
1468 apacket->niovecs = saven;
1473 /* Send the packet to appropriate destination for the specified
1474 * connection. The header is first encoded and placed in the packet.
1476 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1482 struct sockaddr_in addr;
1483 register struct rx_peer *peer = conn->peer;
1486 char deliveryType = 'S';
1488 /* The address we're sending the packet to */
1489 addr.sin_family = AF_INET;
1490 addr.sin_port = peer->port;
1491 addr.sin_addr.s_addr = peer->host;
1493 /* This stuff should be revamped, I think, so that most, if not
1494 * all, of the header stuff is always added here. We could
1495 * probably do away with the encode/decode routines. XXXXX */
1497 /* Stamp each packet with a unique serial number. The serial
1498 * number is maintained on a connection basis because some types
1499 * of security may be based on the serial number of the packet,
1500 * and security is handled on a per authenticated-connection
1502 /* Pre-increment, to guarantee no zero serial number; a zero
1503 * serial number means the packet was never sent. */
1504 MUTEX_ENTER(&conn->conn_data_lock);
1505 p->header.serial = ++conn->serial;
1506 MUTEX_EXIT(&conn->conn_data_lock);
1507 /* This is so we can adjust retransmit time-outs better in the face of
1508 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1510 if (p->firstSerial == 0) {
1511 p->firstSerial = p->header.serial;
1515 /* If an output tracer function is defined, call it with the packet and
1516 * network address. Note this function may modify its arguments. */
1517 if (rx_almostSent) {
1518 int drop = (*rx_almostSent) (p, &addr);
1519 /* drop packet if return value is non-zero? */
1520 if (drop) deliveryType = 'D'; /* Drop the packet */
1524 /* Get network byte order header */
1525 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1526 * touch ALL the fields */
1528 /* Send the packet out on the same socket that related packets are being
1530 socket = (conn->type == RX_CLIENT_CONNECTION
1531 ? rx_socket : conn->service->socket);
1534 /* Possibly drop this packet, for testing purposes */
1535 if ((deliveryType == 'D') ||
1536 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1537 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1538 deliveryType = 'D'; /* Drop the packet */
1541 deliveryType = 'S'; /* Send the packet */
1542 #endif /* RXDEBUG */
1544 /* Loop until the packet is sent. We'd prefer just to use a
1545 * blocking socket, but unfortunately the interface doesn't
1546 * allow us to have the socket block in send mode, and not
1547 * block in receive mode */
1550 waslocked = ISAFS_GLOCK();
1551 if (waslocked) AFS_GUNLOCK();
1553 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1554 p->length+RX_HEADER_SIZE, istack)){
1555 /* send failed, so let's hurry up the resend, eh? */
1556 MUTEX_ENTER(&rx_stats_mutex);
1557 rx_stats.netSendFailures++;
1558 MUTEX_EXIT(&rx_stats_mutex);
1559 p->retryTime = p->timeSent; /* resend it very soon */
1560 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1563 if (waslocked) AFS_GLOCK();
1568 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1569 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1570 peer->host, peer->port, p->header.serial, p->header.epoch,
1571 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1572 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1574 MUTEX_ENTER(&rx_stats_mutex);
1575 rx_stats.packetsSent[p->header.type-1]++;
1576 MUTEX_EXIT(&rx_stats_mutex);
1577 MUTEX_ENTER(&peer->peer_lock);
1578 hadd32(peer->bytesSent, p->length);
1579 MUTEX_EXIT(&peer->peer_lock);
1582 /* Send a list of packets to appropriate destination for the specified
1583 * connection. The headers are first encoded and placed in the packets.
1585 void rxi_SendPacketList(struct rx_connection * conn,
1586 struct rx_packet **list,
1590 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1593 struct sockaddr_in addr;
1594 register struct rx_peer *peer = conn->peer;
1596 struct rx_packet *p = NULL;
1597 struct iovec wirevec[RX_MAXIOVECS];
1601 struct rx_jumboHeader *jp;
1603 char deliveryType = 'S';
1605 /* The address we're sending the packet to */
1606 addr.sin_family = AF_INET;
1607 addr.sin_port = peer->port;
1608 addr.sin_addr.s_addr = peer->host;
1610 if (len+1 > RX_MAXIOVECS) {
1611 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1615 * Stamp the packets in this jumbogram with consecutive serial numbers
1617 MUTEX_ENTER(&conn->conn_data_lock);
1618 serial = conn->serial;
1619 conn->serial += len;
1620 MUTEX_EXIT(&conn->conn_data_lock);
1623 /* This stuff should be revamped, I think, so that most, if not
1624 * all, of the header stuff is always added here. We could
1625 * probably do away with the encode/decode routines. XXXXX */
1628 length = RX_HEADER_SIZE;
1629 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1630 wirevec[0].iov_len = RX_HEADER_SIZE;
1631 for (i = 0 ; i < len ; i++) {
1634 /* The whole 3.5 jumbogram scheme relies on packets fitting
1635 * in a single packet buffer. */
1636 if (p->niovecs > 2) {
1637 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1640 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1643 if (p->length != RX_JUMBOBUFFERSIZE) {
1644 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1646 p->header.flags |= RX_JUMBO_PACKET;
1647 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1648 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1650 wirevec[i+1].iov_len = p->length;
1651 length += p->length;
1653 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1655 /* Convert jumbo packet header to network byte order */
1656 temp = (afs_uint32)(p->header.flags) << 24;
1657 temp |= (afs_uint32)(p->header.spare);
1658 *(afs_uint32 *)jp = htonl(temp);
1660 jp = (struct rx_jumboHeader *)
1661 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1663 /* Stamp each packet with a unique serial number. The serial
1664 * number is maintained on a connection basis because some types
1665 * of security may be based on the serial number of the packet,
1666 * and security is handled on a per authenticated-connection
1668 /* Pre-increment, to guarantee no zero serial number; a zero
1669 * serial number means the packet was never sent. */
1670 p->header.serial = ++serial;
1671 /* This is so we can adjust retransmit time-outs better in the face of
1672 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1674 if (p->firstSerial == 0) {
1675 p->firstSerial = p->header.serial;
1679 /* If an output tracer function is defined, call it with the packet and
1680 * network address. Note this function may modify its arguments. */
1681 if (rx_almostSent) {
1682 int drop = (*rx_almostSent) (p, &addr);
1683 /* drop packet if return value is non-zero? */
1684 if (drop) deliveryType = 'D'; /* Drop the packet */
1688 /* Get network byte order header */
1689 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1690 * touch ALL the fields */
1693 /* Send the packet out on the same socket that related packets are being
1695 socket = (conn->type == RX_CLIENT_CONNECTION
1696 ? rx_socket : conn->service->socket);
1699 /* Possibly drop this packet, for testing purposes */
1700 if ((deliveryType == 'D') ||
1701 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1702 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1703 deliveryType = 'D'; /* Drop the packet */
1706 deliveryType = 'S'; /* Send the packet */
1707 #endif /* RXDEBUG */
1709 /* Loop until the packet is sent. We'd prefer just to use a
1710 * blocking socket, but unfortunately the interface doesn't
1711 * allow us to have the socket block in send mode, and not
1712 * block in receive mode */
1714 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1715 waslocked = ISAFS_GLOCK();
1716 if (!istack && waslocked) AFS_GUNLOCK();
1718 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1719 /* send failed, so let's hurry up the resend, eh? */
1720 MUTEX_ENTER(&rx_stats_mutex);
1721 rx_stats.netSendFailures++;
1722 MUTEX_EXIT(&rx_stats_mutex);
1723 for (i = 0 ; i < len ; i++) {
1725 p->retryTime = p->timeSent; /* resend it very soon */
1726 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1729 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1730 if (!istack && waslocked) AFS_GLOCK();
1735 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1736 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1737 peer->host, peer->port, p->header.serial, p->header.epoch,
1738 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1739 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1741 MUTEX_ENTER(&rx_stats_mutex);
1742 rx_stats.packetsSent[p->header.type-1]++;
1743 MUTEX_EXIT(&rx_stats_mutex);
1744 MUTEX_ENTER(&peer->peer_lock);
1745 hadd32(peer->bytesSent, p->length);
1746 MUTEX_EXIT(&peer->peer_lock);
1750 /* Send a "special" packet to the peer connection. If call is
1751 * specified, then the packet is directed to a specific call channel
1752 * associated with the connection, otherwise it is directed to the
1753 * connection only. Uses optionalPacket if it is supplied, rather than
1754 * allocating a new packet buffer. Nbytes is the length of the data
1755 * portion of the packet. If data is non-null, nbytes of data are
1756 * copied into the packet. Type is the type of the packet, as defined
1757 * in rx.h. Bug: there's a lot of duplication between this and other
1758 * routines. This needs to be cleaned up. */
1760 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1761 register struct rx_call *call;
1762 register struct rx_connection *conn;
1763 struct rx_packet *optionalPacket;
1768 /* Some of the following stuff should be common code for all
1769 * packet sends (it's repeated elsewhere) */
1770 register struct rx_packet *p;
1772 int savelen = 0, saven = 0;
1773 int channel, callNumber;
1775 channel = call->channel;
1776 callNumber = *call->callNumber;
1777 /* BUSY packets refer to the next call on this connection */
1778 if (type == RX_PACKET_TYPE_BUSY) {
1787 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1788 if (!p) osi_Panic("rxi_SendSpecial failure");
1795 p->header.serviceId = conn->serviceId;
1796 p->header.securityIndex = conn->securityIndex;
1797 p->header.cid = (conn->cid | channel);
1798 p->header.callNumber = callNumber;
1800 p->header.epoch = conn->epoch;
1801 p->header.type = type;
1802 p->header.flags = 0;
1803 if (conn->type == RX_CLIENT_CONNECTION)
1804 p->header.flags |= RX_CLIENT_INITIATED;
1806 rx_packetwrite(p, 0, nbytes, data);
1808 for (i=1; i < p->niovecs; i++) {
1809 if (nbytes <= p->wirevec[i].iov_len) {
1810 savelen = p->wirevec[i].iov_len;
1812 p->wirevec[i].iov_len = nbytes;
1813 p->niovecs = i+1; /* so condition fails because i == niovecs */
1815 else nbytes -= p->wirevec[i].iov_len;
1818 if (call) rxi_Send(call, p, istack);
1819 else rxi_SendPacket(conn, p, istack);
1820 if (saven) { /* means we truncated the packet above. We probably don't */
1821 /* really need to do this, but it seems safer this way, given that */
1822 /* sneaky optionalPacket... */
1823 p->wirevec[i-1].iov_len = savelen;
1826 if (!optionalPacket) rxi_FreePacket(p);
1827 return optionalPacket;
1831 /* Encode the packet's header (from the struct header in the packet to
1832 * the net byte order representation in the wire representation of the
1833 * packet, which is what is actually sent out on the wire) */
1834 void rxi_EncodePacketHeader(p)
1835 register struct rx_packet *p;
1837 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1839 memset((char *)buf, 0, RX_HEADER_SIZE);
1840 *buf++ = htonl(p->header.epoch);
1841 *buf++ = htonl(p->header.cid);
1842 *buf++ = htonl(p->header.callNumber);
1843 *buf++ = htonl(p->header.seq);
1844 *buf++ = htonl(p->header.serial);
1845 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1846 | (((afs_uint32)p->header.flags)<<16)
1847 | (p->header.userStatus<<8) | p->header.securityIndex);
1848 /* Note: top 16 bits of this next word were reserved */
1849 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1852 /* Decode the packet's header (from net byte order to a struct header) */
1853 void rxi_DecodePacketHeader(p)
1854 register struct rx_packet *p;
1856 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1859 p->header.epoch = ntohl(*buf);
1861 p->header.cid = ntohl(*buf);
1863 p->header.callNumber = ntohl(*buf);
1865 p->header.seq = ntohl(*buf);
1867 p->header.serial = ntohl(*buf);
1873 /* C will truncate byte fields to bytes for me */
1874 p->header.type = temp>>24;
1875 p->header.flags = temp>>16;
1876 p->header.userStatus = temp>>8;
1877 p->header.securityIndex = temp>>0;
1882 p->header.serviceId = (temp&0xffff);
1883 p->header.spare = temp>>16;
1884 /* Note: top 16 bits of this last word are the security checksum */
1887 void rxi_PrepareSendPacket(call, p, last)
1888 register struct rx_call *call;
1889 register struct rx_packet *p;
1892 register struct rx_connection *conn = call->conn;
1894 ssize_t len; /* len must be a signed type; it can go negative */
1896 p->flags &= ~RX_PKTFLAG_ACKED;
1897 p->header.cid = (conn->cid | call->channel);
1898 p->header.serviceId = conn->serviceId;
1899 p->header.securityIndex = conn->securityIndex;
1900 p->header.callNumber = *call->callNumber;
1901 p->header.seq = call->tnext++;
1902 p->header.epoch = conn->epoch;
1903 p->header.type = RX_PACKET_TYPE_DATA;
1904 p->header.flags = 0;
1905 p->header.spare = 0;
1906 if (conn->type == RX_CLIENT_CONNECTION)
1907 p->header.flags |= RX_CLIENT_INITIATED;
1910 p->header.flags |= RX_LAST_PACKET;
1912 clock_Zero(&p->retryTime); /* Never yet transmitted */
1913 clock_Zero(&p->firstSent); /* Never yet transmitted */
1914 p->header.serial = 0; /* Another way of saying never transmitted... */
1917 /* Now that we're sure this is the last data on the call, make sure
1918 * that the "length" and the sum of the iov_lens matches. */
1919 len = p->length + call->conn->securityHeaderSize;
1921 for (i=1; i < p->niovecs && len > 0; i++) {
1922 len -= p->wirevec[i].iov_len;
1925 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1928 /* Free any extra elements in the wirevec */
1929 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1930 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1933 p->wirevec[i-1].iov_len += len;
1935 RXS_PreparePacket(conn->securityObject, call, p);
1938 /* Given an interface MTU size, calculate an adjusted MTU size that
1939 * will make efficient use of the RX buffers when the peer is sending
1940 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1941 int rxi_AdjustIfMTU(int mtu)
1946 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1947 if (mtu <= adjMTU) {
1954 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1955 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1958 /* Given an interface MTU size, and the peer's advertised max receive
1959 * size, calculate an adjisted maxMTU size that makes efficient use
1960 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1961 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1963 int maxMTU = mtu * rxi_nSendFrags;
1964 maxMTU = MIN(maxMTU, peerMaxMTU);
1965 return rxi_AdjustIfMTU(maxMTU);
1968 /* Given a packet size, figure out how many datagram packet will fit.
1969 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1970 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1971 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1972 int rxi_AdjustDgramPackets(int frags, int mtu)
1975 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1978 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1979 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1980 /* subtract the size of the first and last packets */
1981 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1985 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));