2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "../afs/param.h"
14 #include <afs/param.h>
21 #include "../afs/sysincludes.h"
22 #include "../afs/afsincludes.h"
23 #include "../rx/rx_kcommon.h"
24 #include "../rx/rx_clock.h"
25 #include "../rx/rx_queue.h"
26 #include "../rx/rx_packet.h"
27 #else /* defined(UKERNEL) */
28 #include "../h/types.h"
29 #ifndef AFS_LINUX20_ENV
30 #include "../h/systm.h"
32 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
33 #include "../afs/sysincludes.h"
35 #include "../h/socket.h"
36 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
37 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
38 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
40 #include "../h/mbuf.h"
42 #include "../netinet/in.h"
43 #include "../afs/afs_osi.h"
44 #include "../rx/rx_kmutex.h"
45 #include "../rx/rx_clock.h"
46 #include "../rx/rx_queue.h"
48 #include <sys/sysmacros.h>
50 #include "../rx/rx_packet.h"
51 #endif /* defined(UKERNEL) */
52 #include "../rx/rx_globals.h"
54 #include "sys/types.h"
57 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
61 #include <sys/socket.h>
62 #include <netinet/in.h>
63 #endif /* AFS_NT40_ENV */
64 #include "rx_xmit_nt.h"
67 #include <sys/socket.h>
68 #include <netinet/in.h>
74 #include <sys/sysmacros.h>
76 #include "rx_packet.h"
77 #include "rx_globals.h"
92 /* rxdb_fileID is used to identify the lock location, along with line#. */
93 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
94 #endif /* RX_LOCKS_DB */
95 struct rx_packet *rx_mallocedP = 0;
97 extern char cml_version_number[];
98 extern int (*rx_almostSent)();
100 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
101 afs_int32 ahost, short aport, afs_int32 istack);
103 /* some rules about packets:
104 * 1. When a packet is allocated, the final iov_buf contains room for
105 * a security trailer, but iov_len masks that fact. If the security
106 * package wants to add the trailer, it may do so, and then extend
107 * iov_len appropriately. For this reason, packet's niovecs and
108 * iov_len fields should be accurate before calling PreparePacket.
112 * all packet buffers (iov_base) are integral multiples of
114 * offset is an integral multiple of the word size.
116 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
120 for (l=0, i=1; i< packet->niovecs ; i++ ) {
121 if (l + packet->wirevec[i].iov_len > offset) {
122 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
124 l += packet->wirevec[i].iov_len;
131 * all packet buffers (iov_base) are integral multiples of the word size.
132 * offset is an integral multiple of the word size.
134 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
138 for (l=0, i=1; i< packet->niovecs ; i++ ) {
139 if (l + packet->wirevec[i].iov_len > offset) {
140 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
144 l += packet->wirevec[i].iov_len;
151 * all packet buffers (iov_base) are integral multiples of the
153 * offset is an integral multiple of the word size.
155 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
157 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
158 int resid, char *out)
160 unsigned int i, j, l, r;
161 for (l=0, i=1; i< packet->niovecs ; i++ ) {
162 if (l + packet->wirevec[i].iov_len > offset) {
165 l += packet->wirevec[i].iov_len;
168 /* i is the iovec which contains the first little bit of data in which we
169 * are interested. l is the total length of everything prior to this iovec.
170 * j is the number of bytes we can safely copy out of this iovec.
173 while ((resid > 0) && (i < packet->niovecs)) {
174 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
175 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
177 l += packet->wirevec[i].iov_len;
181 return (resid ? (r - resid) : r);
186 * all packet buffers (iov_base) are integral multiples of the
188 * offset is an integral multiple of the word size.
190 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
196 for (l=0, i=1; i < packet->niovecs; i++ ) {
197 if (l + packet->wirevec[i].iov_len > offset) {
200 l += packet->wirevec[i].iov_len;
203 /* i is the iovec which contains the first little bit of data in which we
204 * are interested. l is the total length of everything prior to this iovec.
205 * j is the number of bytes we can safely copy out of this iovec.
208 while ((resid > 0) && (i < RX_MAXWVECS)) {
209 if (i >= packet->niovecs)
210 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
213 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
214 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
217 l += packet->wirevec[i].iov_len;
221 return (resid ? (r - resid) : r);
224 static struct rx_packet *allocCBuf(int class)
230 MUTEX_ENTER(&rx_freePktQ_lock);
233 if (rxi_OverQuota(class)) {
235 rxi_NeedMorePackets = TRUE;
236 MUTEX_ENTER(&rx_stats_mutex);
238 case RX_PACKET_CLASS_RECEIVE:
239 rx_stats.receivePktAllocFailures++;
241 case RX_PACKET_CLASS_SEND:
242 rx_stats.sendPktAllocFailures++;
244 case RX_PACKET_CLASS_SPECIAL:
245 rx_stats.specialPktAllocFailures++;
247 case RX_PACKET_CLASS_RECV_CBUF:
248 rx_stats.receiveCbufPktAllocFailures++;
250 case RX_PACKET_CLASS_SEND_CBUF:
251 rx_stats.sendCbufPktAllocFailures++;
254 MUTEX_EXIT(&rx_stats_mutex);
258 if (queue_IsEmpty(&rx_freePacketQueue)) {
260 rxi_NeedMorePackets = TRUE;
264 if (queue_IsEmpty(&rx_freePacketQueue)) {
265 rxi_MorePacketsNoLock(rx_initSendWindow);
270 c = queue_First(&rx_freePacketQueue, rx_packet);
272 if (!(c->flags & RX_PKTFLAG_FREE))
273 osi_Panic("rxi_AllocPacket: packet not free\n");
274 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
280 MUTEX_EXIT(&rx_freePktQ_lock);
287 * Free a packet currently used as a continuation buffer
289 void rxi_freeCBuf(struct rx_packet *c)
294 MUTEX_ENTER(&rx_freePktQ_lock);
296 rxi_FreePacketNoLock(c);
297 /* Wakeup anyone waiting for packets */
300 MUTEX_EXIT(&rx_freePktQ_lock);
304 /* this one is kind of awful.
305 * In rxkad, the packet has been all shortened, and everything, ready for
306 * sending. All of a sudden, we discover we need some of that space back.
307 * This isn't terribly general, because it knows that the packets are only
308 * rounded up to the EBS (userdata + security header).
310 int rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
314 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
315 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
316 p->wirevec[i].iov_len += nb;
321 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
322 p->wirevec[i].iov_len += nb;
329 /* get sufficient space to store nb bytes of data (or more), and hook
330 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
331 * returns the number of bytes >0 which it failed to come up with.
332 * Don't need to worry about locking on packet, since only
333 * one thread can manipulate one at a time. Locking on continution
334 * packets is handled by allocCBuf */
335 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
336 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
340 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
341 register struct rx_packet *cb;
342 if ((cb = allocCBuf(class))) {
343 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
344 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
345 nb -= RX_CBUFFERSIZE;
346 p->length += RX_CBUFFERSIZE;
355 /* Add more packet buffers */
356 void rxi_MorePackets(int apackets)
358 struct rx_packet *p, *e;
362 getme = apackets * sizeof(struct rx_packet);
363 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
365 PIN(p, getme); /* XXXXX */
366 memset((char *)p, 0, getme);
369 MUTEX_ENTER(&rx_freePktQ_lock);
371 for (e = p + apackets; p<e; p++) {
372 p->wirevec[0].iov_base = (char *) (p->wirehead);
373 p->wirevec[0].iov_len = RX_HEADER_SIZE;
374 p->wirevec[1].iov_base = (char *) (p->localdata);
375 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
376 p->flags |= RX_PKTFLAG_FREE;
379 queue_Append(&rx_freePacketQueue, p);
381 rx_nFreePackets += apackets;
382 rxi_NeedMorePackets = FALSE;
386 MUTEX_EXIT(&rx_freePktQ_lock);
391 /* Add more packet buffers */
392 void rxi_MorePacketsNoLock(int apackets)
394 struct rx_packet *p, *e;
397 /* allocate enough packets that 1/4 of the packets will be able
398 * to hold maximal amounts of data */
399 apackets += (apackets/4)
400 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
401 getme = apackets * sizeof(struct rx_packet);
402 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
404 memset((char *)p, 0, getme);
406 for (e = p + apackets; p<e; p++) {
407 p->wirevec[0].iov_base = (char *) (p->wirehead);
408 p->wirevec[0].iov_len = RX_HEADER_SIZE;
409 p->wirevec[1].iov_base = (char *) (p->localdata);
410 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
411 p->flags |= RX_PKTFLAG_FREE;
414 queue_Append(&rx_freePacketQueue, p);
416 rx_nFreePackets += apackets;
417 rxi_NeedMorePackets = FALSE;
422 void rxi_FreeAllPackets(void)
424 /* must be called at proper interrupt level, etcetera */
425 /* MTUXXX need to free all Packets */
426 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
427 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
430 /* Allocate more packets iff we need more continuation buffers */
431 /* In kernel, can't page in memory with interrupts disabled, so we
432 * don't use the event mechanism. */
433 void rx_CheckPackets(void)
435 if (rxi_NeedMorePackets) {
436 rxi_MorePackets(rx_initSendWindow);
440 /* In the packet freeing routine below, the assumption is that
441 we want all of the packets to be used equally frequently, so that we
442 don't get packet buffers paging out. It would be just as valid to
443 assume that we DO want them to page out if not many are being used.
444 In any event, we assume the former, and append the packets to the end
446 /* This explanation is bogus. The free list doesn't remain in any kind of
447 useful order for afs_int32: the packets in use get pretty much randomly scattered
448 across all the pages. In order to permit unused {packets,bufs} to page out, they
449 must be stored so that packets which are adjacent in memory are adjacent in the
450 free list. An array springs rapidly to mind.
453 /* Actually free the packet p. */
454 void rxi_FreePacketNoLock(struct rx_packet *p)
456 dpf(("Free %x\n", p));
458 if (p->flags & RX_PKTFLAG_FREE)
459 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
461 p->flags |= RX_PKTFLAG_FREE;
462 queue_Append(&rx_freePacketQueue, p);
465 int rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
467 struct iovec *iov, *end;
469 if (first != 1) /* MTUXXX */
470 osi_Panic("FreeDataBufs 1: first must be 1");
471 iov = &p->wirevec[1];
472 end = iov + (p->niovecs-1);
473 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
474 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
475 for (iov++ ; iov < end ; iov++) {
477 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
478 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
486 int rxi_nBadIovecs = 0;
488 /* rxi_RestoreDataBufs
490 * Restore the correct sizes to the iovecs. Called when reusing a packet
491 * for reading off the wire.
493 void rxi_RestoreDataBufs(struct rx_packet *p)
496 struct iovec *iov = &p->wirevec[2];
498 p->wirevec[0].iov_base = (char *) (p->wirehead);
499 p->wirevec[0].iov_len = RX_HEADER_SIZE;
500 p->wirevec[1].iov_base = (char *) (p->localdata);
501 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
503 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
504 if (!iov->iov_base) {
509 iov->iov_len = RX_CBUFFERSIZE;
513 int rxi_TrimDataBufs(struct rx_packet *p, int first)
516 struct iovec *iov, *end;
520 osi_Panic("TrimDataBufs 1: first must be 1");
522 /* Skip over continuation buffers containing message data */
523 iov = &p->wirevec[2];
524 end = iov + (p->niovecs-2);
525 length = p->length - p->wirevec[1].iov_len;
526 for (; iov < end && length > 0 ; iov++) {
528 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
529 length -= iov->iov_len;
532 /* iov now points to the first empty data buffer. */
537 MUTEX_ENTER(&rx_freePktQ_lock);
539 for (; iov < end ; iov++) {
541 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
542 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
547 MUTEX_EXIT(&rx_freePktQ_lock);
553 /* Free the packet p. P is assumed not to be on any queue, i.e.
554 * remove it yourself first if you call this routine. */
555 void rxi_FreePacket(struct rx_packet *p)
560 MUTEX_ENTER(&rx_freePktQ_lock);
562 rxi_FreeDataBufsNoLock(p,1);
563 rxi_FreePacketNoLock(p);
564 /* Wakeup anyone waiting for packets */
567 MUTEX_EXIT(&rx_freePktQ_lock);
572 /* rxi_AllocPacket sets up p->length so it reflects the number of
573 * bytes in the packet at this point, **not including** the header.
574 * The header is absolutely necessary, besides, this is the way the
575 * length field is usually used */
576 struct rx_packet *rxi_AllocPacketNoLock(int class)
578 register struct rx_packet *p;
581 if (rxi_OverQuota(class)) {
582 rxi_NeedMorePackets = TRUE;
583 MUTEX_ENTER(&rx_stats_mutex);
585 case RX_PACKET_CLASS_RECEIVE:
586 rx_stats.receivePktAllocFailures++;
588 case RX_PACKET_CLASS_SEND:
589 rx_stats.sendPktAllocFailures++;
591 case RX_PACKET_CLASS_SPECIAL:
592 rx_stats.specialPktAllocFailures++;
594 case RX_PACKET_CLASS_RECV_CBUF:
595 rx_stats.receiveCbufPktAllocFailures++;
597 case RX_PACKET_CLASS_SEND_CBUF:
598 rx_stats.sendCbufPktAllocFailures++;
601 MUTEX_EXIT(&rx_stats_mutex);
602 return (struct rx_packet *) 0;
606 MUTEX_ENTER(&rx_stats_mutex);
607 rx_stats.packetRequests++;
608 MUTEX_EXIT(&rx_stats_mutex);
611 if (queue_IsEmpty(&rx_freePacketQueue))
612 osi_Panic("rxi_AllocPacket error");
614 if (queue_IsEmpty(&rx_freePacketQueue))
615 rxi_MorePacketsNoLock(rx_initSendWindow);
619 p = queue_First(&rx_freePacketQueue, rx_packet);
620 if (!(p->flags & RX_PKTFLAG_FREE))
621 osi_Panic("rxi_AllocPacket: packet not free\n");
623 dpf(("Alloc %x, class %d\n", p, class));
626 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
629 /* have to do this here because rx_FlushWrite fiddles with the iovs in
630 * order to truncate outbound packets. In the near future, may need
631 * to allocate bufs from a static pool here, and/or in AllocSendPacket
633 p->wirevec[0].iov_base = (char *) (p->wirehead);
634 p->wirevec[0].iov_len = RX_HEADER_SIZE;
635 p->wirevec[1].iov_base = (char *) (p->localdata);
636 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
638 p->length = RX_FIRSTBUFFERSIZE;
642 struct rx_packet *rxi_AllocPacket(int class)
644 register struct rx_packet *p;
646 MUTEX_ENTER(&rx_freePktQ_lock);
647 p = rxi_AllocPacketNoLock(class);
648 MUTEX_EXIT(&rx_freePktQ_lock);
652 /* This guy comes up with as many buffers as it {takes,can get} given
653 * the MTU for this call. It also sets the packet length before
654 * returning. caution: this is often called at NETPRI
655 * Called with call locked.
657 struct rx_packet *rxi_AllocSendPacket(register struct rx_call *call, int want)
659 register struct rx_packet *p = (struct rx_packet *) 0;
661 register unsigned delta;
664 mud = call->MTU - RX_HEADER_SIZE;
665 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
666 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
668 while (!(call->error)) {
669 MUTEX_ENTER(&rx_freePktQ_lock);
670 /* if an error occurred, or we get the packet we want, we're done */
671 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
672 MUTEX_EXIT(&rx_freePktQ_lock);
675 want = MIN(want, mud);
677 if ((unsigned) want > p->length)
678 (void) rxi_AllocDataBuf(p, (want - p->length),
679 RX_PACKET_CLASS_SEND_CBUF);
681 if ((unsigned) p->length > mud)
684 if (delta >= p->length) {
693 /* no error occurred, and we didn't get a packet, so we sleep.
694 * At this point, we assume that packets will be returned
695 * sooner or later, as packets are acknowledged, and so we
698 call->flags |= RX_CALL_WAIT_PACKETS;
699 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
700 MUTEX_EXIT(&call->lock);
701 rx_waitingForPackets = 1;
703 #ifdef RX_ENABLE_LOCKS
704 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
706 osi_rxSleep(&rx_waitingForPackets);
708 MUTEX_EXIT(&rx_freePktQ_lock);
709 MUTEX_ENTER(&call->lock);
710 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
711 call->flags &= ~RX_CALL_WAIT_PACKETS;
720 /* count the number of used FDs */
721 static int CountFDs(register int amax)
724 register int i, code;
728 for(i=0;i<amax;i++) {
729 code = fstat(i, &tstat);
730 if (code == 0) count++;
737 #define CountFDs(amax) amax
741 #if !defined(KERNEL) || defined(UKERNEL)
743 /* This function reads a single packet from the interface into the
744 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
745 * (host,port) of the sender are stored in the supplied variables, and
746 * the data length of the packet is stored in the packet structure.
747 * The header is decoded. */
748 int rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 *host, u_short *port)
750 struct sockaddr_in from;
753 register afs_int32 tlen, savelen;
755 rx_computelen(p, tlen);
756 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
758 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
759 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
760 * it once in order to avoid races. */
763 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
771 /* Extend the last iovec for padding, it's just to make sure that the
772 * read doesn't return more data than we expect, and is done to get around
773 * our problems caused by the lack of a length field in the rx header.
774 * Use the extra buffer that follows the localdata in each packet
776 savelen = p->wirevec[p->niovecs-1].iov_len;
777 p->wirevec[p->niovecs-1].iov_len += RX_EXTRABUFFERSIZE;
779 memset((char *)&msg, 0, sizeof(msg));
780 msg.msg_name = (char *) &from;
781 msg.msg_namelen = sizeof(struct sockaddr_in);
782 msg.msg_iov = p->wirevec;
783 msg.msg_iovlen = p->niovecs;
784 nbytes = rxi_Recvmsg(socket, &msg, 0);
786 /* restore the vec to its correct state */
787 p->wirevec[p->niovecs-1].iov_len = savelen;
789 p->length = (nbytes - RX_HEADER_SIZE);
790 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
792 rxi_MorePackets(rx_initSendWindow);
794 else if (nbytes < 0 && errno == EWOULDBLOCK) {
795 MUTEX_ENTER(&rx_stats_mutex);
796 rx_stats.noPacketOnRead++;
797 MUTEX_EXIT(&rx_stats_mutex);
801 MUTEX_ENTER(&rx_stats_mutex);
802 rx_stats.bogusPacketOnRead++;
803 rx_stats.bogusHost = from.sin_addr.s_addr;
804 MUTEX_EXIT(&rx_stats_mutex);
805 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
806 from.sin_port,nbytes));
811 /* Extract packet header. */
812 rxi_DecodePacketHeader(p);
814 *host = from.sin_addr.s_addr;
815 *port = from.sin_port;
816 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
817 struct rx_peer *peer;
818 MUTEX_ENTER(&rx_stats_mutex);
819 rx_stats.packetsRead[p->header.type-1]++;
820 MUTEX_EXIT(&rx_stats_mutex);
822 * Try to look up this peer structure. If it doesn't exist,
823 * don't create a new one -
824 * we don't keep count of the bytes sent/received if a peer
825 * structure doesn't already exist.
827 * The peer/connection cleanup code assumes that there is 1 peer
828 * per connection. If we actually created a peer structure here
829 * and this packet was an rxdebug packet, the peer structure would
830 * never be cleaned up.
832 peer = rxi_FindPeer(*host, *port, 0, 0);
834 MUTEX_ENTER(&peer->peer_lock);
835 hadd32(peer->bytesReceived, p->length);
836 MUTEX_EXIT(&peer->peer_lock);
840 /* Free any empty packet buffers at the end of this packet */
841 rxi_TrimDataBufs(p, 1);
847 #endif /* !KERNEL || UKERNEL */
849 /* This function splits off the first packet in a jumbo packet.
850 * As of AFS 3.5, jumbograms contain more than one fixed size
851 * packet, and the RX_JUMBO_PACKET flag is set in all but the
852 * last packet header. All packets (except the last) are padded to
853 * fall on RX_CBUFFERSIZE boundaries.
854 * HACK: We store the length of the first n-1 packets in the
855 * last two pad bytes. */
857 struct rx_packet *rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host,
858 short port, int first)
860 struct rx_packet *np;
861 struct rx_jumboHeader *jp;
867 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
868 * bytes in length. All but the first packet are preceded by
869 * an abbreviated four byte header. The length of the last packet
870 * is calculated from the size of the jumbogram. */
871 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
873 if ((int)p->length < length) {
874 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
877 niov = p->niovecs - 2;
879 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
882 iov = &p->wirevec[2];
883 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
885 /* Get a pointer to the abbreviated packet header */
886 jp = (struct rx_jumboHeader *)
887 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
889 /* Set up the iovecs for the next packet */
890 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
891 np->wirevec[0].iov_len = sizeof(struct rx_header);
892 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
893 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
894 np->niovecs = niov+1;
895 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
896 np->wirevec[i] = *iov;
898 np->length = p->length - length;
899 p->length = RX_JUMBOBUFFERSIZE;
902 /* Convert the jumbo packet header to host byte order */
903 temp = ntohl(*(afs_uint32 *)jp);
904 jp->flags = (u_char)(temp >> 24);
905 jp->cksum = (u_short)(temp);
907 /* Fill in the packet header */
908 np->header = p->header;
909 np->header.serial = p->header.serial + 1;
910 np->header.seq = p->header.seq + 1;
911 np->header.flags = jp->flags;
912 np->header.spare = jp->cksum;
918 /* Send a udp datagram */
919 int osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
920 int length, int istack)
924 memset(&msg, 0, sizeof(msg));
926 msg.msg_iovlen = nvecs;
928 msg.msg_namelen = sizeof(struct sockaddr_in);
930 rxi_Sendmsg(socket, &msg, 0);
934 #elif !defined(UKERNEL)
936 * message receipt is done in rxk_input or rx_put.
941 * Copy an mblock to the contiguous area pointed to by cp.
942 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
943 * but it doesn't really.
944 * Returns the number of bytes not transferred.
945 * The message is NOT changed.
947 static int cpytoc(mblk_t *mp, register int off, register int len, register char *cp)
951 for (;mp && len > 0; mp = mp->b_cont) {
952 if (mp->b_datap->db_type != M_DATA) {
955 n = MIN(len, (mp->b_wptr - mp->b_rptr));
956 memcpy(cp, (char *)mp->b_rptr, n);
964 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
965 * but it doesn't really.
966 * This sucks, anyway, do it like m_cpy.... below
968 static int cpytoiovec(mblk_t *mp, int off, int len, register struct iovec *iovs, int niovs)
970 register int m,n,o,t,i;
972 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
973 if (mp->b_datap->db_type != M_DATA) {
976 n = MIN(len, (mp->b_wptr - mp->b_rptr));
985 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
994 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
995 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
997 #if !defined(AFS_LINUX20_ENV)
998 static int m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1001 unsigned int l1, l2, i, t;
1003 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1004 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1007 if (m->m_len <= off) {
1017 p1 = mtod(m, caddr_t)+off;
1018 l1 = m->m_len - off;
1020 p2 = iovs[0].iov_base;
1021 l2 = iovs[0].iov_len;
1024 t = MIN(l1, MIN(l2, (unsigned int)len));
1033 p1 = mtod(m, caddr_t);
1039 p2 = iovs[i].iov_base;
1040 l2 = iovs[i].iov_len;
1048 #endif /* AFS_SUN5_ENV */
1050 #if !defined(AFS_LINUX20_ENV)
1051 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1058 struct rx_packet *phandle;
1059 int hdr_len, data_len;
1063 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1069 #endif /*KERNEL && !UKERNEL*/
1072 /* send a response to a debug packet */
1074 struct rx_packet *rxi_ReceiveDebugPacket(register struct rx_packet *ap,
1075 osi_socket asocket, afs_int32 ahost, short aport, int istack)
1077 struct rx_debugIn tin;
1079 struct rx_serverQueueEntry *np, *nqe;
1082 * Only respond to client-initiated Rx debug packets,
1083 * and clear the client flag in the response.
1085 if (ap->header.flags & RX_CLIENT_INITIATED) {
1086 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1087 rxi_EncodePacketHeader(ap);
1092 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1093 /* all done with packet, now set length to the truth, so we can
1094 * reuse this packet */
1095 rx_computelen(ap, ap->length);
1097 tin.type = ntohl(tin.type);
1098 tin.index = ntohl(tin.index);
1100 case RX_DEBUGI_GETSTATS: {
1101 struct rx_debugStats tstat;
1103 /* get basic stats */
1104 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1105 tstat.version = RX_DEBUGI_VERSION;
1106 #ifndef RX_ENABLE_LOCKS
1107 tstat.waitingForPackets = rx_waitingForPackets;
1109 tstat.nFreePackets = htonl(rx_nFreePackets);
1110 tstat.callsExecuted = htonl(rxi_nCalls);
1111 tstat.packetReclaims = htonl(rx_packetReclaims);
1112 tstat.usedFDs = CountFDs(64);
1113 tstat.nWaiting = htonl(rx_nWaiting);
1114 queue_Count( &rx_idleServerQueue, np, nqe,
1115 rx_serverQueueEntry, tstat.idleThreads);
1116 tstat.idleThreads = htonl(tstat.idleThreads);
1117 tl = sizeof(struct rx_debugStats) - ap->length;
1119 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1122 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1123 ap->length = sizeof(struct rx_debugStats);
1124 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1125 rx_computelen(ap, ap->length);
1130 case RX_DEBUGI_GETALLCONN:
1131 case RX_DEBUGI_GETCONN: {
1133 register struct rx_connection *tc;
1134 struct rx_call *tcall;
1135 struct rx_debugConn tconn;
1136 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1139 tl = sizeof(struct rx_debugConn) - ap->length;
1141 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1145 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1146 /* get N'th (maybe) "interesting" connection info */
1147 for(i=0;i<rx_hashTableSize;i++) {
1148 #if !defined(KERNEL)
1149 /* the time complexity of the algorithm used here
1150 * exponentially increses with the number of connections.
1152 #ifdef AFS_PTHREAD_ENV
1155 (void) IOMGR_Poll();
1158 MUTEX_ENTER(&rx_connHashTable_lock);
1159 /* We might be slightly out of step since we are not
1160 * locking each call, but this is only debugging output.
1162 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1163 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1164 tconn.host = tc->peer->host;
1165 tconn.port = tc->peer->port;
1166 tconn.cid = htonl(tc->cid);
1167 tconn.epoch = htonl(tc->epoch);
1168 tconn.serial = htonl(tc->serial);
1169 for(j=0;j<RX_MAXCALLS;j++) {
1170 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1171 if ((tcall=tc->call[j])) {
1172 tconn.callState[j] = tcall->state;
1173 tconn.callMode[j] = tcall->mode;
1174 tconn.callFlags[j] = tcall->flags;
1175 if (queue_IsNotEmpty(&tcall->rq))
1176 tconn.callOther[j] |= RX_OTHER_IN;
1177 if (queue_IsNotEmpty(&tcall->tq))
1178 tconn.callOther[j] |= RX_OTHER_OUT;
1180 else tconn.callState[j] = RX_STATE_NOTINIT;
1183 tconn.natMTU = htonl(tc->peer->natMTU);
1184 tconn.error = htonl(tc->error);
1185 tconn.flags = tc->flags;
1186 tconn.type = tc->type;
1187 tconn.securityIndex = tc->securityIndex;
1188 if (tc->securityObject) {
1189 RXS_GetStats (tc->securityObject, tc,
1191 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1192 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1195 DOHTONL(packetsReceived);
1196 DOHTONL(packetsSent);
1197 DOHTONL(bytesReceived);
1200 i<sizeof(tconn.secStats.spares)/sizeof(short);
1204 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1209 MUTEX_EXIT(&rx_connHashTable_lock);
1210 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1212 ap->length = sizeof(struct rx_debugConn);
1213 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1218 MUTEX_EXIT(&rx_connHashTable_lock);
1220 /* if we make it here, there are no interesting packets */
1221 tconn.cid = htonl(0xffffffff); /* means end */
1222 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1224 ap->length = sizeof(struct rx_debugConn);
1225 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1231 * Pass back all the peer structures we have available
1234 case RX_DEBUGI_GETPEER: {
1236 register struct rx_peer *tp;
1237 struct rx_debugPeer tpeer;
1240 tl = sizeof(struct rx_debugPeer) - ap->length;
1242 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1246 memset((char *)&tpeer, 0, sizeof(tpeer));
1247 for(i=0;i<rx_hashTableSize;i++) {
1248 #if !defined(KERNEL)
1249 /* the time complexity of the algorithm used here
1250 * exponentially increses with the number of peers.
1252 * Yielding after processing each hash table entry
1253 * and dropping rx_peerHashTable_lock.
1254 * also increases the risk that we will miss a new
1255 * entry - but we are willing to live with this
1256 * limitation since this is meant for debugging only
1258 #ifdef AFS_PTHREAD_ENV
1261 (void) IOMGR_Poll();
1264 MUTEX_ENTER(&rx_peerHashTable_lock);
1265 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1266 if (tin.index-- <= 0) {
1267 tpeer.host = tp->host;
1268 tpeer.port = tp->port;
1269 tpeer.ifMTU = htons(tp->ifMTU);
1270 tpeer.idleWhen = htonl(tp->idleWhen);
1271 tpeer.refCount = htons(tp->refCount);
1272 tpeer.burstSize = tp->burstSize;
1273 tpeer.burst = tp->burst;
1274 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1275 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1276 tpeer.rtt = htonl(tp->rtt);
1277 tpeer.rtt_dev = htonl(tp->rtt_dev);
1278 tpeer.timeout.sec = htonl(tp->timeout.sec);
1279 tpeer.timeout.usec = htonl(tp->timeout.usec);
1280 tpeer.nSent = htonl(tp->nSent);
1281 tpeer.reSends = htonl(tp->reSends);
1282 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1283 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1284 tpeer.rateFlag = htonl(tp->rateFlag);
1285 tpeer.natMTU = htons(tp->natMTU);
1286 tpeer.maxMTU = htons(tp->maxMTU);
1287 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1288 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1289 tpeer.MTU = htons(tp->MTU);
1290 tpeer.cwind = htons(tp->cwind);
1291 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1292 tpeer.congestSeq = htons(tp->congestSeq);
1293 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1294 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1295 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1296 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1298 MUTEX_EXIT(&rx_peerHashTable_lock);
1299 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1301 ap->length = sizeof(struct rx_debugPeer);
1302 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1307 MUTEX_EXIT(&rx_peerHashTable_lock);
1309 /* if we make it here, there are no interesting packets */
1310 tpeer.host = htonl(0xffffffff); /* means end */
1311 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1313 ap->length = sizeof(struct rx_debugPeer);
1314 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1319 case RX_DEBUGI_RXSTATS: {
1323 tl = sizeof(rx_stats) - ap->length;
1325 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1329 /* Since its all int32s convert to network order with a loop. */
1330 MUTEX_ENTER(&rx_stats_mutex);
1331 s = (afs_int32 *)&rx_stats;
1332 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1333 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1336 ap->length = sizeof(rx_stats);
1337 MUTEX_EXIT(&rx_stats_mutex);
1338 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1344 /* error response packet */
1345 tin.type = htonl(RX_DEBUGI_BADTYPE);
1346 tin.index = tin.type;
1347 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1349 ap->length = sizeof(struct rx_debugIn);
1350 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1357 struct rx_packet *rxi_ReceiveVersionPacket(register struct rx_packet *ap,
1358 osi_socket asocket, afs_int32 ahost, short aport, int istack)
1363 * Only respond to client-initiated version requests, and
1364 * clear that flag in the response.
1366 if (ap->header.flags & RX_CLIENT_INITIATED) {
1369 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1370 rxi_EncodePacketHeader(ap);
1371 memset(buf, 0, sizeof(buf));
1372 strncpy(buf, cml_version_number+4, sizeof(buf)-1);
1373 rx_packetwrite(ap, 0, 65, buf);
1376 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1384 /* send a debug packet back to the sender */
1385 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1386 afs_int32 ahost, short aport, afs_int32 istack)
1388 struct sockaddr_in taddr;
1394 int waslocked = ISAFS_GLOCK();
1397 taddr.sin_family = AF_INET;
1398 taddr.sin_port = aport;
1399 taddr.sin_addr.s_addr = ahost;
1400 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1401 taddr.sin_len = sizeof(struct sockaddr_in);
1404 /* We need to trim the niovecs. */
1405 nbytes = apacket->length;
1406 for (i=1; i < apacket->niovecs; i++) {
1407 if (nbytes <= apacket->wirevec[i].iov_len) {
1408 savelen = apacket->wirevec[i].iov_len;
1409 saven = apacket->niovecs;
1410 apacket->wirevec[i].iov_len = nbytes;
1411 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1413 else nbytes -= apacket->wirevec[i].iov_len;
1417 if (waslocked) AFS_GUNLOCK();
1419 /* debug packets are not reliably delivered, hence the cast below. */
1420 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1421 apacket->length+RX_HEADER_SIZE, istack);
1423 if (waslocked) AFS_GLOCK();
1426 if (saven) { /* means we truncated the packet above. */
1427 apacket->wirevec[i-1].iov_len = savelen;
1428 apacket->niovecs = saven;
1433 /* Send the packet to appropriate destination for the specified
1434 * connection. The header is first encoded and placed in the packet.
1436 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1442 struct sockaddr_in addr;
1443 register struct rx_peer *peer = conn->peer;
1446 char deliveryType = 'S';
1448 /* The address we're sending the packet to */
1449 addr.sin_family = AF_INET;
1450 addr.sin_port = peer->port;
1451 addr.sin_addr.s_addr = peer->host;
1453 /* This stuff should be revamped, I think, so that most, if not
1454 * all, of the header stuff is always added here. We could
1455 * probably do away with the encode/decode routines. XXXXX */
1457 /* Stamp each packet with a unique serial number. The serial
1458 * number is maintained on a connection basis because some types
1459 * of security may be based on the serial number of the packet,
1460 * and security is handled on a per authenticated-connection
1462 /* Pre-increment, to guarantee no zero serial number; a zero
1463 * serial number means the packet was never sent. */
1464 MUTEX_ENTER(&conn->conn_data_lock);
1465 p->header.serial = ++conn->serial;
1466 MUTEX_EXIT(&conn->conn_data_lock);
1467 /* This is so we can adjust retransmit time-outs better in the face of
1468 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1470 if (p->firstSerial == 0) {
1471 p->firstSerial = p->header.serial;
1475 /* If an output tracer function is defined, call it with the packet and
1476 * network address. Note this function may modify its arguments. */
1477 if (rx_almostSent) {
1478 int drop = (*rx_almostSent) (p, &addr);
1479 /* drop packet if return value is non-zero? */
1480 if (drop) deliveryType = 'D'; /* Drop the packet */
1484 /* Get network byte order header */
1485 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1486 * touch ALL the fields */
1488 /* Send the packet out on the same socket that related packets are being
1490 socket = (conn->type == RX_CLIENT_CONNECTION
1491 ? rx_socket : conn->service->socket);
1494 /* Possibly drop this packet, for testing purposes */
1495 if ((deliveryType == 'D') ||
1496 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1497 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1498 deliveryType = 'D'; /* Drop the packet */
1501 deliveryType = 'S'; /* Send the packet */
1502 #endif /* RXDEBUG */
1504 /* Loop until the packet is sent. We'd prefer just to use a
1505 * blocking socket, but unfortunately the interface doesn't
1506 * allow us to have the socket block in send mode, and not
1507 * block in receive mode */
1510 waslocked = ISAFS_GLOCK();
1511 if (waslocked) AFS_GUNLOCK();
1513 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1514 p->length+RX_HEADER_SIZE, istack)){
1515 /* send failed, so let's hurry up the resend, eh? */
1516 MUTEX_ENTER(&rx_stats_mutex);
1517 rx_stats.netSendFailures++;
1518 MUTEX_EXIT(&rx_stats_mutex);
1519 p->retryTime = p->timeSent; /* resend it very soon */
1520 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1523 if (waslocked) AFS_GLOCK();
1528 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1529 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1530 peer->host, peer->port, p->header.serial, p->header.epoch,
1531 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1532 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1534 MUTEX_ENTER(&rx_stats_mutex);
1535 rx_stats.packetsSent[p->header.type-1]++;
1536 MUTEX_EXIT(&rx_stats_mutex);
1537 MUTEX_ENTER(&peer->peer_lock);
1538 hadd32(peer->bytesSent, p->length);
1539 MUTEX_EXIT(&peer->peer_lock);
1542 /* Send a list of packets to appropriate destination for the specified
1543 * connection. The headers are first encoded and placed in the packets.
1545 void rxi_SendPacketList(struct rx_connection * conn, struct rx_packet **list,
1546 int len, int istack)
1548 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1551 struct sockaddr_in addr;
1552 register struct rx_peer *peer = conn->peer;
1554 struct rx_packet *p = NULL;
1555 struct iovec wirevec[RX_MAXIOVECS];
1559 struct rx_jumboHeader *jp;
1561 char deliveryType = 'S';
1563 /* The address we're sending the packet to */
1564 addr.sin_family = AF_INET;
1565 addr.sin_port = peer->port;
1566 addr.sin_addr.s_addr = peer->host;
1568 if (len+1 > RX_MAXIOVECS) {
1569 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1573 * Stamp the packets in this jumbogram with consecutive serial numbers
1575 MUTEX_ENTER(&conn->conn_data_lock);
1576 serial = conn->serial;
1577 conn->serial += len;
1578 MUTEX_EXIT(&conn->conn_data_lock);
1581 /* This stuff should be revamped, I think, so that most, if not
1582 * all, of the header stuff is always added here. We could
1583 * probably do away with the encode/decode routines. XXXXX */
1586 length = RX_HEADER_SIZE;
1587 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1588 wirevec[0].iov_len = RX_HEADER_SIZE;
1589 for (i = 0 ; i < len ; i++) {
1592 /* The whole 3.5 jumbogram scheme relies on packets fitting
1593 * in a single packet buffer. */
1594 if (p->niovecs > 2) {
1595 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1598 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1601 if (p->length != RX_JUMBOBUFFERSIZE) {
1602 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1604 p->header.flags |= RX_JUMBO_PACKET;
1605 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1606 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1608 wirevec[i+1].iov_len = p->length;
1609 length += p->length;
1611 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1613 /* Convert jumbo packet header to network byte order */
1614 temp = (afs_uint32)(p->header.flags) << 24;
1615 temp |= (afs_uint32)(p->header.spare);
1616 *(afs_uint32 *)jp = htonl(temp);
1618 jp = (struct rx_jumboHeader *)
1619 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1621 /* Stamp each packet with a unique serial number. The serial
1622 * number is maintained on a connection basis because some types
1623 * of security may be based on the serial number of the packet,
1624 * and security is handled on a per authenticated-connection
1626 /* Pre-increment, to guarantee no zero serial number; a zero
1627 * serial number means the packet was never sent. */
1628 p->header.serial = ++serial;
1629 /* This is so we can adjust retransmit time-outs better in the face of
1630 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1632 if (p->firstSerial == 0) {
1633 p->firstSerial = p->header.serial;
1637 /* If an output tracer function is defined, call it with the packet and
1638 * network address. Note this function may modify its arguments. */
1639 if (rx_almostSent) {
1640 int drop = (*rx_almostSent) (p, &addr);
1641 /* drop packet if return value is non-zero? */
1642 if (drop) deliveryType = 'D'; /* Drop the packet */
1646 /* Get network byte order header */
1647 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1648 * touch ALL the fields */
1651 /* Send the packet out on the same socket that related packets are being
1653 socket = (conn->type == RX_CLIENT_CONNECTION
1654 ? rx_socket : conn->service->socket);
1657 /* Possibly drop this packet, for testing purposes */
1658 if ((deliveryType == 'D') ||
1659 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1660 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1661 deliveryType = 'D'; /* Drop the packet */
1664 deliveryType = 'S'; /* Send the packet */
1665 #endif /* RXDEBUG */
1667 /* Loop until the packet is sent. We'd prefer just to use a
1668 * blocking socket, but unfortunately the interface doesn't
1669 * allow us to have the socket block in send mode, and not
1670 * block in receive mode */
1672 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1673 waslocked = ISAFS_GLOCK();
1674 if (!istack && waslocked) AFS_GUNLOCK();
1676 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1677 /* send failed, so let's hurry up the resend, eh? */
1678 MUTEX_ENTER(&rx_stats_mutex);
1679 rx_stats.netSendFailures++;
1680 MUTEX_EXIT(&rx_stats_mutex);
1681 for (i = 0 ; i < len ; i++) {
1683 p->retryTime = p->timeSent; /* resend it very soon */
1684 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1687 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1688 if (!istack && waslocked) AFS_GLOCK();
1693 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1694 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1695 peer->host, peer->port, p->header.serial, p->header.epoch,
1696 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1697 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1699 MUTEX_ENTER(&rx_stats_mutex);
1700 rx_stats.packetsSent[p->header.type-1]++;
1701 MUTEX_EXIT(&rx_stats_mutex);
1702 MUTEX_ENTER(&peer->peer_lock);
1703 hadd32(peer->bytesSent, p->length);
1704 MUTEX_EXIT(&peer->peer_lock);
1708 /* Send a "special" packet to the peer connection. If call is
1709 * specified, then the packet is directed to a specific call channel
1710 * associated with the connection, otherwise it is directed to the
1711 * connection only. Uses optionalPacket if it is supplied, rather than
1712 * allocating a new packet buffer. Nbytes is the length of the data
1713 * portion of the packet. If data is non-null, nbytes of data are
1714 * copied into the packet. Type is the type of the packet, as defined
1715 * in rx.h. Bug: there's a lot of duplication between this and other
1716 * routines. This needs to be cleaned up. */
1717 struct rx_packet *rxi_SendSpecial(register struct rx_call *call,
1718 register struct rx_connection *conn, struct rx_packet *optionalPacket,
1719 int type, char *data, int nbytes, int istack)
1721 /* Some of the following stuff should be common code for all
1722 * packet sends (it's repeated elsewhere) */
1723 register struct rx_packet *p;
1725 int savelen = 0, saven = 0;
1726 int channel, callNumber;
1728 channel = call->channel;
1729 callNumber = *call->callNumber;
1730 /* BUSY packets refer to the next call on this connection */
1731 if (type == RX_PACKET_TYPE_BUSY) {
1740 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1741 if (!p) osi_Panic("rxi_SendSpecial failure");
1748 p->header.serviceId = conn->serviceId;
1749 p->header.securityIndex = conn->securityIndex;
1750 p->header.cid = (conn->cid | channel);
1751 p->header.callNumber = callNumber;
1753 p->header.epoch = conn->epoch;
1754 p->header.type = type;
1755 p->header.flags = 0;
1756 if (conn->type == RX_CLIENT_CONNECTION)
1757 p->header.flags |= RX_CLIENT_INITIATED;
1759 rx_packetwrite(p, 0, nbytes, data);
1761 for (i=1; i < p->niovecs; i++) {
1762 if (nbytes <= p->wirevec[i].iov_len) {
1763 savelen = p->wirevec[i].iov_len;
1765 p->wirevec[i].iov_len = nbytes;
1766 p->niovecs = i+1; /* so condition fails because i == niovecs */
1768 else nbytes -= p->wirevec[i].iov_len;
1771 if (call) rxi_Send(call, p, istack);
1772 else rxi_SendPacket(conn, p, istack);
1773 if (saven) { /* means we truncated the packet above. We probably don't */
1774 /* really need to do this, but it seems safer this way, given that */
1775 /* sneaky optionalPacket... */
1776 p->wirevec[i-1].iov_len = savelen;
1779 if (!optionalPacket) rxi_FreePacket(p);
1780 return optionalPacket;
1784 /* Encode the packet's header (from the struct header in the packet to
1785 * the net byte order representation in the wire representation of the
1786 * packet, which is what is actually sent out on the wire) */
1787 void rxi_EncodePacketHeader(register struct rx_packet *p)
1789 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1791 memset((char *)buf, 0, RX_HEADER_SIZE);
1792 *buf++ = htonl(p->header.epoch);
1793 *buf++ = htonl(p->header.cid);
1794 *buf++ = htonl(p->header.callNumber);
1795 *buf++ = htonl(p->header.seq);
1796 *buf++ = htonl(p->header.serial);
1797 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1798 | (((afs_uint32)p->header.flags)<<16)
1799 | (p->header.userStatus<<8) | p->header.securityIndex);
1800 /* Note: top 16 bits of this next word were reserved */
1801 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1804 /* Decode the packet's header (from net byte order to a struct header) */
1805 void rxi_DecodePacketHeader(register struct rx_packet *p)
1807 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1810 p->header.epoch = ntohl(*buf);
1812 p->header.cid = ntohl(*buf);
1814 p->header.callNumber = ntohl(*buf);
1816 p->header.seq = ntohl(*buf);
1818 p->header.serial = ntohl(*buf);
1824 /* C will truncate byte fields to bytes for me */
1825 p->header.type = temp>>24;
1826 p->header.flags = temp>>16;
1827 p->header.userStatus = temp>>8;
1828 p->header.securityIndex = temp>>0;
1833 p->header.serviceId = (temp&0xffff);
1834 p->header.spare = temp>>16;
1835 /* Note: top 16 bits of this last word are the security checksum */
1838 void rxi_PrepareSendPacket(register struct rx_call *call, register struct rx_packet *p,
1841 register struct rx_connection *conn = call->conn;
1843 ssize_t len; /* len must be a signed type; it can go negative */
1845 p->flags &= ~RX_PKTFLAG_ACKED;
1846 p->header.cid = (conn->cid | call->channel);
1847 p->header.serviceId = conn->serviceId;
1848 p->header.securityIndex = conn->securityIndex;
1849 p->header.callNumber = *call->callNumber;
1850 p->header.seq = call->tnext++;
1851 p->header.epoch = conn->epoch;
1852 p->header.type = RX_PACKET_TYPE_DATA;
1853 p->header.flags = 0;
1854 p->header.spare = 0;
1855 if (conn->type == RX_CLIENT_CONNECTION)
1856 p->header.flags |= RX_CLIENT_INITIATED;
1859 p->header.flags |= RX_LAST_PACKET;
1861 clock_Zero(&p->retryTime); /* Never yet transmitted */
1862 clock_Zero(&p->firstSent); /* Never yet transmitted */
1863 p->header.serial = 0; /* Another way of saying never transmitted... */
1866 /* Now that we're sure this is the last data on the call, make sure
1867 * that the "length" and the sum of the iov_lens matches. */
1868 len = p->length + call->conn->securityHeaderSize;
1870 for (i=1; i < p->niovecs && len > 0; i++) {
1871 len -= p->wirevec[i].iov_len;
1874 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1877 /* Free any extra elements in the wirevec */
1878 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1879 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1882 p->wirevec[i-1].iov_len += len;
1884 RXS_PreparePacket(conn->securityObject, call, p);
1887 /* Given an interface MTU size, calculate an adjusted MTU size that
1888 * will make efficient use of the RX buffers when the peer is sending
1889 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1890 int rxi_AdjustIfMTU(int mtu)
1895 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1896 if (mtu <= adjMTU) {
1903 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1904 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1907 /* Given an interface MTU size, and the peer's advertised max receive
1908 * size, calculate an adjisted maxMTU size that makes efficient use
1909 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1910 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1912 int maxMTU = mtu * rxi_nSendFrags;
1913 maxMTU = MIN(maxMTU, peerMaxMTU);
1914 return rxi_AdjustIfMTU(maxMTU);
1917 /* Given a packet size, figure out how many datagram packet will fit.
1918 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1919 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1920 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1921 int rxi_AdjustDgramPackets(int frags, int mtu)
1924 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1927 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1928 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1929 /* subtract the size of the first and last packets */
1930 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1934 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));