2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
74 #include "rx_xmit_nt.h"
77 #include <sys/socket.h>
78 #include <netinet/in.h>
84 #include <sys/sysmacros.h>
86 #include "rx_packet.h"
87 #include "rx_globals.h"
103 /* rxdb_fileID is used to identify the lock location, along with line#. */
104 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
105 #endif /* RX_LOCKS_DB */
106 struct rx_packet *rx_mallocedP = 0;
108 extern char cml_version_number[];
109 extern int (*rx_almostSent) ();
111 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
112 afs_int32 ahost, short aport,
115 /* some rules about packets:
116 * 1. When a packet is allocated, the final iov_buf contains room for
117 * a security trailer, but iov_len masks that fact. If the security
118 * package wants to add the trailer, it may do so, and then extend
119 * iov_len appropriately. For this reason, packet's niovecs and
120 * iov_len fields should be accurate before calling PreparePacket.
124 * all packet buffers (iov_base) are integral multiples of
126 * offset is an integral multiple of the word size.
129 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
133 for (l = 0, i = 1; i < packet->niovecs; i++) {
134 if (l + packet->wirevec[i].iov_len > offset) {
136 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the word size.
147 * offset is an integral multiple of the word size.
150 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
154 for (l = 0, i = 1; i < packet->niovecs; i++) {
155 if (l + packet->wirevec[i].iov_len > offset) {
156 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
157 (offset - l))) = data;
160 l += packet->wirevec[i].iov_len;
167 * all packet buffers (iov_base) are integral multiples of the
169 * offset is an integral multiple of the word size.
171 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
174 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
177 unsigned int i, j, l, r;
178 for (l = 0, i = 1; i < packet->niovecs; i++) {
179 if (l + packet->wirevec[i].iov_len > offset) {
182 l += packet->wirevec[i].iov_len;
185 /* i is the iovec which contains the first little bit of data in which we
186 * are interested. l is the total length of everything prior to this iovec.
187 * j is the number of bytes we can safely copy out of this iovec.
190 while ((resid > 0) && (i < packet->niovecs)) {
191 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
192 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
195 l += packet->wirevec[i].iov_len;
199 return (resid ? (r - resid) : r);
204 * all packet buffers (iov_base) are integral multiples of the
206 * offset is an integral multiple of the word size.
209 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
214 for (l = 0, i = 1; i < packet->niovecs; i++) {
215 if (l + packet->wirevec[i].iov_len > offset) {
218 l += packet->wirevec[i].iov_len;
221 /* i is the iovec which contains the first little bit of data in which we
222 * are interested. l is the total length of everything prior to this iovec.
223 * j is the number of bytes we can safely copy out of this iovec.
226 while ((resid > 0) && (i < RX_MAXWVECS)) {
227 if (i >= packet->niovecs)
228 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
231 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
232 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
236 l += packet->wirevec[i].iov_len;
240 return (resid ? (r - resid) : r);
243 static struct rx_packet *
250 MUTEX_ENTER(&rx_freePktQ_lock);
253 if (rxi_OverQuota(class)) {
255 rxi_NeedMorePackets = TRUE;
256 MUTEX_ENTER(&rx_stats_mutex);
258 case RX_PACKET_CLASS_RECEIVE:
259 rx_stats.receivePktAllocFailures++;
261 case RX_PACKET_CLASS_SEND:
262 rx_stats.sendPktAllocFailures++;
264 case RX_PACKET_CLASS_SPECIAL:
265 rx_stats.specialPktAllocFailures++;
267 case RX_PACKET_CLASS_RECV_CBUF:
268 rx_stats.receiveCbufPktAllocFailures++;
270 case RX_PACKET_CLASS_SEND_CBUF:
271 rx_stats.sendCbufPktAllocFailures++;
274 MUTEX_EXIT(&rx_stats_mutex);
278 if (queue_IsEmpty(&rx_freePacketQueue)) {
280 rxi_NeedMorePackets = TRUE;
284 if (queue_IsEmpty(&rx_freePacketQueue)) {
285 rxi_MorePacketsNoLock(rx_initSendWindow);
290 c = queue_First(&rx_freePacketQueue, rx_packet);
292 if (!(c->flags & RX_PKTFLAG_FREE))
293 osi_Panic("rxi_AllocPacket: packet not free\n");
294 c->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
300 MUTEX_EXIT(&rx_freePktQ_lock);
307 * Free a packet currently used as a continuation buffer
310 rxi_freeCBuf(struct rx_packet *c)
315 MUTEX_ENTER(&rx_freePktQ_lock);
317 rxi_FreePacketNoLock(c);
318 /* Wakeup anyone waiting for packets */
321 MUTEX_EXIT(&rx_freePktQ_lock);
325 /* this one is kind of awful.
326 * In rxkad, the packet has been all shortened, and everything, ready for
327 * sending. All of a sudden, we discover we need some of that space back.
328 * This isn't terribly general, because it knows that the packets are only
329 * rounded up to the EBS (userdata + security header).
332 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
336 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
337 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
338 p->wirevec[i].iov_len += nb;
342 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
343 p->wirevec[i].iov_len += nb;
351 /* get sufficient space to store nb bytes of data (or more), and hook
352 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
353 * returns the number of bytes >0 which it failed to come up with.
354 * Don't need to worry about locking on packet, since only
355 * one thread can manipulate one at a time. Locking on continution
356 * packets is handled by allocCBuf */
357 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
359 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
363 for (i = p->niovecs; nb > 0 && i < RX_MAXWVECS; i++) {
364 register struct rx_packet *cb;
365 if ((cb = allocCBuf(class))) {
366 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
367 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
368 nb -= RX_CBUFFERSIZE;
369 p->length += RX_CBUFFERSIZE;
378 /* Add more packet buffers */
380 rxi_MorePackets(int apackets)
382 struct rx_packet *p, *e;
386 getme = apackets * sizeof(struct rx_packet);
387 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
389 PIN(p, getme); /* XXXXX */
390 memset((char *)p, 0, getme);
393 MUTEX_ENTER(&rx_freePktQ_lock);
395 for (e = p + apackets; p < e; p++) {
396 p->wirevec[0].iov_base = (char *)(p->wirehead);
397 p->wirevec[0].iov_len = RX_HEADER_SIZE;
398 p->wirevec[1].iov_base = (char *)(p->localdata);
399 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
400 p->flags |= RX_PKTFLAG_FREE;
403 queue_Append(&rx_freePacketQueue, p);
405 rx_nFreePackets += apackets;
406 rxi_NeedMorePackets = FALSE;
410 MUTEX_EXIT(&rx_freePktQ_lock);
415 /* Add more packet buffers */
417 rxi_MorePacketsNoLock(int apackets)
419 struct rx_packet *p, *e;
422 /* allocate enough packets that 1/4 of the packets will be able
423 * to hold maximal amounts of data */
424 apackets += (apackets / 4)
425 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
426 getme = apackets * sizeof(struct rx_packet);
427 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
429 memset((char *)p, 0, getme);
431 for (e = p + apackets; p < e; p++) {
432 p->wirevec[0].iov_base = (char *)(p->wirehead);
433 p->wirevec[0].iov_len = RX_HEADER_SIZE;
434 p->wirevec[1].iov_base = (char *)(p->localdata);
435 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
436 p->flags |= RX_PKTFLAG_FREE;
439 queue_Append(&rx_freePacketQueue, p);
441 rx_nFreePackets += apackets;
442 rxi_NeedMorePackets = FALSE;
448 rxi_FreeAllPackets(void)
450 /* must be called at proper interrupt level, etcetera */
451 /* MTUXXX need to free all Packets */
452 osi_Free(rx_mallocedP,
453 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
454 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
457 /* Allocate more packets iff we need more continuation buffers */
458 /* In kernel, can't page in memory with interrupts disabled, so we
459 * don't use the event mechanism. */
461 rx_CheckPackets(void)
463 if (rxi_NeedMorePackets) {
464 rxi_MorePackets(rx_initSendWindow);
468 /* In the packet freeing routine below, the assumption is that
469 we want all of the packets to be used equally frequently, so that we
470 don't get packet buffers paging out. It would be just as valid to
471 assume that we DO want them to page out if not many are being used.
472 In any event, we assume the former, and append the packets to the end
474 /* This explanation is bogus. The free list doesn't remain in any kind of
475 useful order for afs_int32: the packets in use get pretty much randomly scattered
476 across all the pages. In order to permit unused {packets,bufs} to page out, they
477 must be stored so that packets which are adjacent in memory are adjacent in the
478 free list. An array springs rapidly to mind.
481 /* Actually free the packet p. */
483 rxi_FreePacketNoLock(struct rx_packet *p)
485 dpf(("Free %lx\n", (unsigned long)p));
487 if (p->flags & RX_PKTFLAG_FREE)
488 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
490 p->flags |= RX_PKTFLAG_FREE;
491 queue_Append(&rx_freePacketQueue, p);
495 rxi_FreeDataBufsNoLock(struct rx_packet *p, int first)
497 struct iovec *iov, *end;
499 if (first != 1) /* MTUXXX */
500 osi_Panic("FreeDataBufs 1: first must be 1");
501 iov = &p->wirevec[1];
502 end = iov + (p->niovecs - 1);
503 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
504 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
505 for (iov++; iov < end; iov++) {
507 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
508 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
516 int rxi_nBadIovecs = 0;
518 /* rxi_RestoreDataBufs
520 * Restore the correct sizes to the iovecs. Called when reusing a packet
521 * for reading off the wire.
524 rxi_RestoreDataBufs(struct rx_packet *p)
527 struct iovec *iov = &p->wirevec[2];
529 p->wirevec[0].iov_base = (char *)(p->wirehead);
530 p->wirevec[0].iov_len = RX_HEADER_SIZE;
531 p->wirevec[1].iov_base = (char *)(p->localdata);
532 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
534 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
535 if (!iov->iov_base) {
540 iov->iov_len = RX_CBUFFERSIZE;
545 rxi_TrimDataBufs(struct rx_packet *p, int first)
548 struct iovec *iov, *end;
552 osi_Panic("TrimDataBufs 1: first must be 1");
554 /* Skip over continuation buffers containing message data */
555 iov = &p->wirevec[2];
556 end = iov + (p->niovecs - 2);
557 length = p->length - p->wirevec[1].iov_len;
558 for (; iov < end && length > 0; iov++) {
560 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
561 length -= iov->iov_len;
564 /* iov now points to the first empty data buffer. */
569 MUTEX_ENTER(&rx_freePktQ_lock);
571 for (; iov < end; iov++) {
573 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
574 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
579 MUTEX_EXIT(&rx_freePktQ_lock);
585 /* Free the packet p. P is assumed not to be on any queue, i.e.
586 * remove it yourself first if you call this routine. */
588 rxi_FreePacket(struct rx_packet *p)
593 MUTEX_ENTER(&rx_freePktQ_lock);
595 rxi_FreeDataBufsNoLock(p, 1);
596 rxi_FreePacketNoLock(p);
597 /* Wakeup anyone waiting for packets */
600 MUTEX_EXIT(&rx_freePktQ_lock);
605 /* rxi_AllocPacket sets up p->length so it reflects the number of
606 * bytes in the packet at this point, **not including** the header.
607 * The header is absolutely necessary, besides, this is the way the
608 * length field is usually used */
610 rxi_AllocPacketNoLock(int class)
612 register struct rx_packet *p;
615 if (rxi_OverQuota(class)) {
616 rxi_NeedMorePackets = TRUE;
617 MUTEX_ENTER(&rx_stats_mutex);
619 case RX_PACKET_CLASS_RECEIVE:
620 rx_stats.receivePktAllocFailures++;
622 case RX_PACKET_CLASS_SEND:
623 rx_stats.sendPktAllocFailures++;
625 case RX_PACKET_CLASS_SPECIAL:
626 rx_stats.specialPktAllocFailures++;
628 case RX_PACKET_CLASS_RECV_CBUF:
629 rx_stats.receiveCbufPktAllocFailures++;
631 case RX_PACKET_CLASS_SEND_CBUF:
632 rx_stats.sendCbufPktAllocFailures++;
635 MUTEX_EXIT(&rx_stats_mutex);
636 return (struct rx_packet *)0;
640 MUTEX_ENTER(&rx_stats_mutex);
641 rx_stats.packetRequests++;
642 MUTEX_EXIT(&rx_stats_mutex);
645 if (queue_IsEmpty(&rx_freePacketQueue))
646 osi_Panic("rxi_AllocPacket error");
648 if (queue_IsEmpty(&rx_freePacketQueue))
649 rxi_MorePacketsNoLock(rx_initSendWindow);
653 p = queue_First(&rx_freePacketQueue, rx_packet);
654 if (!(p->flags & RX_PKTFLAG_FREE))
655 osi_Panic("rxi_AllocPacket: packet not free\n");
657 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
660 p->flags = 0; /* clear RX_PKTFLAG_FREE, initialize the rest */
663 /* have to do this here because rx_FlushWrite fiddles with the iovs in
664 * order to truncate outbound packets. In the near future, may need
665 * to allocate bufs from a static pool here, and/or in AllocSendPacket
667 p->wirevec[0].iov_base = (char *)(p->wirehead);
668 p->wirevec[0].iov_len = RX_HEADER_SIZE;
669 p->wirevec[1].iov_base = (char *)(p->localdata);
670 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
672 p->length = RX_FIRSTBUFFERSIZE;
677 rxi_AllocPacket(int class)
679 register struct rx_packet *p;
681 MUTEX_ENTER(&rx_freePktQ_lock);
682 p = rxi_AllocPacketNoLock(class);
683 MUTEX_EXIT(&rx_freePktQ_lock);
687 /* This guy comes up with as many buffers as it {takes,can get} given
688 * the MTU for this call. It also sets the packet length before
689 * returning. caution: this is often called at NETPRI
690 * Called with call locked.
693 rxi_AllocSendPacket(register struct rx_call *call, int want)
695 register struct rx_packet *p = (struct rx_packet *)0;
697 register unsigned delta;
700 mud = call->MTU - RX_HEADER_SIZE;
702 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
703 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
705 while (!(call->error)) {
706 MUTEX_ENTER(&rx_freePktQ_lock);
707 /* if an error occurred, or we get the packet we want, we're done */
708 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
709 MUTEX_EXIT(&rx_freePktQ_lock);
712 want = MIN(want, mud);
714 if ((unsigned)want > p->length)
715 (void)rxi_AllocDataBuf(p, (want - p->length),
716 RX_PACKET_CLASS_SEND_CBUF);
718 if ((unsigned)p->length > mud)
721 if (delta >= p->length) {
730 /* no error occurred, and we didn't get a packet, so we sleep.
731 * At this point, we assume that packets will be returned
732 * sooner or later, as packets are acknowledged, and so we
735 call->flags |= RX_CALL_WAIT_PACKETS;
736 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
737 MUTEX_EXIT(&call->lock);
738 rx_waitingForPackets = 1;
740 #ifdef RX_ENABLE_LOCKS
741 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
743 osi_rxSleep(&rx_waitingForPackets);
745 MUTEX_EXIT(&rx_freePktQ_lock);
746 MUTEX_ENTER(&call->lock);
747 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
748 call->flags &= ~RX_CALL_WAIT_PACKETS;
757 /* count the number of used FDs */
759 CountFDs(register int amax)
762 register int i, code;
766 for (i = 0; i < amax; i++) {
767 code = fstat(i, &tstat);
776 #define CountFDs(amax) amax
780 #if !defined(KERNEL) || defined(UKERNEL)
782 /* This function reads a single packet from the interface into the
783 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
784 * (host,port) of the sender are stored in the supplied variables, and
785 * the data length of the packet is stored in the packet structure.
786 * The header is decoded. */
788 rxi_ReadPacket(int socket, register struct rx_packet *p, afs_uint32 * host,
791 struct sockaddr_in from;
794 register afs_int32 tlen, savelen;
796 rx_computelen(p, tlen);
797 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
799 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
800 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
801 * it once in order to avoid races. */
804 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
812 /* Extend the last iovec for padding, it's just to make sure that the
813 * read doesn't return more data than we expect, and is done to get around
814 * our problems caused by the lack of a length field in the rx header.
815 * Use the extra buffer that follows the localdata in each packet
817 savelen = p->wirevec[p->niovecs - 1].iov_len;
818 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
820 memset((char *)&msg, 0, sizeof(msg));
821 msg.msg_name = (char *)&from;
822 msg.msg_namelen = sizeof(struct sockaddr_in);
823 msg.msg_iov = p->wirevec;
824 msg.msg_iovlen = p->niovecs;
825 nbytes = rxi_Recvmsg(socket, &msg, 0);
827 /* restore the vec to its correct state */
828 p->wirevec[p->niovecs - 1].iov_len = savelen;
830 p->length = (nbytes - RX_HEADER_SIZE);
831 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
833 rxi_MorePackets(rx_initSendWindow);
834 else if (nbytes < 0 && errno == EWOULDBLOCK) {
835 MUTEX_ENTER(&rx_stats_mutex);
836 rx_stats.noPacketOnRead++;
837 MUTEX_EXIT(&rx_stats_mutex);
839 MUTEX_ENTER(&rx_stats_mutex);
840 rx_stats.bogusPacketOnRead++;
841 rx_stats.bogusHost = from.sin_addr.s_addr;
842 MUTEX_EXIT(&rx_stats_mutex);
843 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
844 from.sin_port, nbytes));
848 /* Extract packet header. */
849 rxi_DecodePacketHeader(p);
851 *host = from.sin_addr.s_addr;
852 *port = from.sin_port;
853 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
854 struct rx_peer *peer;
855 MUTEX_ENTER(&rx_stats_mutex);
856 rx_stats.packetsRead[p->header.type - 1]++;
857 MUTEX_EXIT(&rx_stats_mutex);
859 * Try to look up this peer structure. If it doesn't exist,
860 * don't create a new one -
861 * we don't keep count of the bytes sent/received if a peer
862 * structure doesn't already exist.
864 * The peer/connection cleanup code assumes that there is 1 peer
865 * per connection. If we actually created a peer structure here
866 * and this packet was an rxdebug packet, the peer structure would
867 * never be cleaned up.
869 peer = rxi_FindPeer(*host, *port, 0, 0);
870 /* Since this may not be associated with a connection,
871 * it may have no refCount, meaning we could race with
874 if (peer && (peer->refCount > 0)) {
875 MUTEX_ENTER(&peer->peer_lock);
876 hadd32(peer->bytesReceived, p->length);
877 MUTEX_EXIT(&peer->peer_lock);
881 /* Free any empty packet buffers at the end of this packet */
882 rxi_TrimDataBufs(p, 1);
888 #endif /* !KERNEL || UKERNEL */
890 /* This function splits off the first packet in a jumbo packet.
891 * As of AFS 3.5, jumbograms contain more than one fixed size
892 * packet, and the RX_JUMBO_PACKET flag is set in all but the
893 * last packet header. All packets (except the last) are padded to
894 * fall on RX_CBUFFERSIZE boundaries.
895 * HACK: We store the length of the first n-1 packets in the
896 * last two pad bytes. */
899 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
902 struct rx_packet *np;
903 struct rx_jumboHeader *jp;
909 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
910 * bytes in length. All but the first packet are preceded by
911 * an abbreviated four byte header. The length of the last packet
912 * is calculated from the size of the jumbogram. */
913 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
915 if ((int)p->length < length) {
916 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
919 niov = p->niovecs - 2;
921 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
924 iov = &p->wirevec[2];
925 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
927 /* Get a pointer to the abbreviated packet header */
928 jp = (struct rx_jumboHeader *)
929 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
931 /* Set up the iovecs for the next packet */
932 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
933 np->wirevec[0].iov_len = sizeof(struct rx_header);
934 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
935 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
936 np->niovecs = niov + 1;
937 for (i = 2, iov++; i <= niov; i++, iov++) {
938 np->wirevec[i] = *iov;
940 np->length = p->length - length;
941 p->length = RX_JUMBOBUFFERSIZE;
944 /* Convert the jumbo packet header to host byte order */
945 temp = ntohl(*(afs_uint32 *) jp);
946 jp->flags = (u_char) (temp >> 24);
947 jp->cksum = (u_short) (temp);
949 /* Fill in the packet header */
950 np->header = p->header;
951 np->header.serial = p->header.serial + 1;
952 np->header.seq = p->header.seq + 1;
953 np->header.flags = jp->flags;
954 np->header.spare = jp->cksum;
960 /* Send a udp datagram */
962 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
963 int length, int istack)
967 memset(&msg, 0, sizeof(msg));
969 msg.msg_iovlen = nvecs;
971 msg.msg_namelen = sizeof(struct sockaddr_in);
973 rxi_Sendmsg(socket, &msg, 0);
977 #elif !defined(UKERNEL)
979 * message receipt is done in rxk_input or rx_put.
982 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
984 * Copy an mblock to the contiguous area pointed to by cp.
985 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
986 * but it doesn't really.
987 * Returns the number of bytes not transferred.
988 * The message is NOT changed.
991 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
995 for (; mp && len > 0; mp = mp->b_cont) {
996 if (mp->b_datap->db_type != M_DATA) {
999 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1000 memcpy(cp, (char *)mp->b_rptr, n);
1008 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1009 * but it doesn't really.
1010 * This sucks, anyway, do it like m_cpy.... below
1013 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1016 register int m, n, o, t, i;
1018 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1019 if (mp->b_datap->db_type != M_DATA) {
1022 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1028 t = iovs[i].iov_len;
1031 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1041 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1042 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1044 #if !defined(AFS_LINUX20_ENV)
1046 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1049 unsigned int l1, l2, i, t;
1051 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1052 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1055 if (m->m_len <= off) {
1065 p1 = mtod(m, caddr_t) + off;
1066 l1 = m->m_len - off;
1068 p2 = iovs[0].iov_base;
1069 l2 = iovs[0].iov_len;
1072 t = MIN(l1, MIN(l2, (unsigned int)len));
1083 p1 = mtod(m, caddr_t);
1089 p2 = iovs[i].iov_base;
1090 l2 = iovs[i].iov_len;
1098 #endif /* AFS_SUN5_ENV */
1100 #if !defined(AFS_LINUX20_ENV)
1102 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1103 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1109 struct rx_packet *phandle;
1110 int hdr_len, data_len;
1115 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1122 #endif /*KERNEL && !UKERNEL */
1125 /* send a response to a debug packet */
1128 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1129 afs_int32 ahost, short aport, int istack)
1131 struct rx_debugIn tin;
1133 struct rx_serverQueueEntry *np, *nqe;
1136 * Only respond to client-initiated Rx debug packets,
1137 * and clear the client flag in the response.
1139 if (ap->header.flags & RX_CLIENT_INITIATED) {
1140 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1141 rxi_EncodePacketHeader(ap);
1146 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1147 /* all done with packet, now set length to the truth, so we can
1148 * reuse this packet */
1149 rx_computelen(ap, ap->length);
1151 tin.type = ntohl(tin.type);
1152 tin.index = ntohl(tin.index);
1154 case RX_DEBUGI_GETSTATS:{
1155 struct rx_debugStats tstat;
1157 /* get basic stats */
1158 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1159 tstat.version = RX_DEBUGI_VERSION;
1160 #ifndef RX_ENABLE_LOCKS
1161 tstat.waitingForPackets = rx_waitingForPackets;
1163 MUTEX_ENTER(&rx_serverPool_lock);
1164 tstat.nFreePackets = htonl(rx_nFreePackets);
1165 tstat.callsExecuted = htonl(rxi_nCalls);
1166 tstat.packetReclaims = htonl(rx_packetReclaims);
1167 tstat.usedFDs = CountFDs(64);
1168 tstat.nWaiting = htonl(rx_nWaiting);
1169 tstat.nWaited = htonl(rx_nWaited);
1170 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1172 MUTEX_EXIT(&rx_serverPool_lock);
1173 tstat.idleThreads = htonl(tstat.idleThreads);
1174 tl = sizeof(struct rx_debugStats) - ap->length;
1176 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1179 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1181 ap->length = sizeof(struct rx_debugStats);
1182 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1183 rx_computelen(ap, ap->length);
1188 case RX_DEBUGI_GETALLCONN:
1189 case RX_DEBUGI_GETCONN:{
1191 register struct rx_connection *tc;
1192 struct rx_call *tcall;
1193 struct rx_debugConn tconn;
1194 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1197 tl = sizeof(struct rx_debugConn) - ap->length;
1199 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1203 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1204 /* get N'th (maybe) "interesting" connection info */
1205 for (i = 0; i < rx_hashTableSize; i++) {
1206 #if !defined(KERNEL)
1207 /* the time complexity of the algorithm used here
1208 * exponentially increses with the number of connections.
1210 #ifdef AFS_PTHREAD_ENV
1216 MUTEX_ENTER(&rx_connHashTable_lock);
1217 /* We might be slightly out of step since we are not
1218 * locking each call, but this is only debugging output.
1220 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1221 if ((all || rxi_IsConnInteresting(tc))
1222 && tin.index-- <= 0) {
1223 tconn.host = tc->peer->host;
1224 tconn.port = tc->peer->port;
1225 tconn.cid = htonl(tc->cid);
1226 tconn.epoch = htonl(tc->epoch);
1227 tconn.serial = htonl(tc->serial);
1228 for (j = 0; j < RX_MAXCALLS; j++) {
1229 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1230 if ((tcall = tc->call[j])) {
1231 tconn.callState[j] = tcall->state;
1232 tconn.callMode[j] = tcall->mode;
1233 tconn.callFlags[j] = tcall->flags;
1234 if (queue_IsNotEmpty(&tcall->rq))
1235 tconn.callOther[j] |= RX_OTHER_IN;
1236 if (queue_IsNotEmpty(&tcall->tq))
1237 tconn.callOther[j] |= RX_OTHER_OUT;
1239 tconn.callState[j] = RX_STATE_NOTINIT;
1242 tconn.natMTU = htonl(tc->peer->natMTU);
1243 tconn.error = htonl(tc->error);
1244 tconn.flags = tc->flags;
1245 tconn.type = tc->type;
1246 tconn.securityIndex = tc->securityIndex;
1247 if (tc->securityObject) {
1248 RXS_GetStats(tc->securityObject, tc,
1250 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1251 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1254 DOHTONL(packetsReceived);
1255 DOHTONL(packetsSent);
1256 DOHTONL(bytesReceived);
1260 sizeof(tconn.secStats.spares) /
1265 sizeof(tconn.secStats.sparel) /
1266 sizeof(afs_int32); i++)
1270 MUTEX_EXIT(&rx_connHashTable_lock);
1271 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1274 ap->length = sizeof(struct rx_debugConn);
1275 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1281 MUTEX_EXIT(&rx_connHashTable_lock);
1283 /* if we make it here, there are no interesting packets */
1284 tconn.cid = htonl(0xffffffff); /* means end */
1285 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1288 ap->length = sizeof(struct rx_debugConn);
1289 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1295 * Pass back all the peer structures we have available
1298 case RX_DEBUGI_GETPEER:{
1300 register struct rx_peer *tp;
1301 struct rx_debugPeer tpeer;
1304 tl = sizeof(struct rx_debugPeer) - ap->length;
1306 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1310 memset((char *)&tpeer, 0, sizeof(tpeer));
1311 for (i = 0; i < rx_hashTableSize; i++) {
1312 #if !defined(KERNEL)
1313 /* the time complexity of the algorithm used here
1314 * exponentially increses with the number of peers.
1316 * Yielding after processing each hash table entry
1317 * and dropping rx_peerHashTable_lock.
1318 * also increases the risk that we will miss a new
1319 * entry - but we are willing to live with this
1320 * limitation since this is meant for debugging only
1322 #ifdef AFS_PTHREAD_ENV
1328 MUTEX_ENTER(&rx_peerHashTable_lock);
1329 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1330 if (tin.index-- <= 0) {
1331 tpeer.host = tp->host;
1332 tpeer.port = tp->port;
1333 tpeer.ifMTU = htons(tp->ifMTU);
1334 tpeer.idleWhen = htonl(tp->idleWhen);
1335 tpeer.refCount = htons(tp->refCount);
1336 tpeer.burstSize = tp->burstSize;
1337 tpeer.burst = tp->burst;
1338 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1339 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1340 tpeer.rtt = htonl(tp->rtt);
1341 tpeer.rtt_dev = htonl(tp->rtt_dev);
1342 tpeer.timeout.sec = htonl(tp->timeout.sec);
1343 tpeer.timeout.usec = htonl(tp->timeout.usec);
1344 tpeer.nSent = htonl(tp->nSent);
1345 tpeer.reSends = htonl(tp->reSends);
1346 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1347 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1348 tpeer.rateFlag = htonl(tp->rateFlag);
1349 tpeer.natMTU = htons(tp->natMTU);
1350 tpeer.maxMTU = htons(tp->maxMTU);
1351 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1352 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1353 tpeer.MTU = htons(tp->MTU);
1354 tpeer.cwind = htons(tp->cwind);
1355 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1356 tpeer.congestSeq = htons(tp->congestSeq);
1357 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1358 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1359 tpeer.bytesReceived.high =
1360 htonl(tp->bytesReceived.high);
1361 tpeer.bytesReceived.low =
1362 htonl(tp->bytesReceived.low);
1364 MUTEX_EXIT(&rx_peerHashTable_lock);
1365 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1368 ap->length = sizeof(struct rx_debugPeer);
1369 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1375 MUTEX_EXIT(&rx_peerHashTable_lock);
1377 /* if we make it here, there are no interesting packets */
1378 tpeer.host = htonl(0xffffffff); /* means end */
1379 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1382 ap->length = sizeof(struct rx_debugPeer);
1383 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1388 case RX_DEBUGI_RXSTATS:{
1392 tl = sizeof(rx_stats) - ap->length;
1394 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1398 /* Since its all int32s convert to network order with a loop. */
1399 MUTEX_ENTER(&rx_stats_mutex);
1400 s = (afs_int32 *) & rx_stats;
1401 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
1402 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
1405 ap->length = sizeof(rx_stats);
1406 MUTEX_EXIT(&rx_stats_mutex);
1407 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1413 /* error response packet */
1414 tin.type = htonl(RX_DEBUGI_BADTYPE);
1415 tin.index = tin.type;
1416 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1418 ap->length = sizeof(struct rx_debugIn);
1419 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1427 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
1428 afs_int32 ahost, short aport, int istack)
1433 * Only respond to client-initiated version requests, and
1434 * clear that flag in the response.
1436 if (ap->header.flags & RX_CLIENT_INITIATED) {
1439 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1440 rxi_EncodePacketHeader(ap);
1441 memset(buf, 0, sizeof(buf));
1442 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
1443 rx_packetwrite(ap, 0, 65, buf);
1446 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1454 /* send a debug packet back to the sender */
1456 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1457 afs_int32 ahost, short aport, afs_int32 istack)
1459 struct sockaddr_in taddr;
1465 int waslocked = ISAFS_GLOCK();
1468 taddr.sin_family = AF_INET;
1469 taddr.sin_port = aport;
1470 taddr.sin_addr.s_addr = ahost;
1471 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
1472 taddr.sin_len = sizeof(struct sockaddr_in);
1475 /* We need to trim the niovecs. */
1476 nbytes = apacket->length;
1477 for (i = 1; i < apacket->niovecs; i++) {
1478 if (nbytes <= apacket->wirevec[i].iov_len) {
1479 savelen = apacket->wirevec[i].iov_len;
1480 saven = apacket->niovecs;
1481 apacket->wirevec[i].iov_len = nbytes;
1482 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
1484 nbytes -= apacket->wirevec[i].iov_len;
1488 #ifdef RX_KERNEL_TRACE
1489 if (ICL_SETACTIVE(afs_iclSetp)) {
1492 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1493 "before osi_NetSend()");
1501 /* debug packets are not reliably delivered, hence the cast below. */
1502 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1503 apacket->length + RX_HEADER_SIZE, istack);
1505 #ifdef RX_KERNEL_TRACE
1506 if (ICL_SETACTIVE(afs_iclSetp)) {
1508 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1509 "after osi_NetSend()");
1519 if (saven) { /* means we truncated the packet above. */
1520 apacket->wirevec[i - 1].iov_len = savelen;
1521 apacket->niovecs = saven;
1526 /* Send the packet to appropriate destination for the specified
1527 * call. The header is first encoded and placed in the packet.
1530 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
1531 struct rx_packet *p, int istack)
1537 struct sockaddr_in addr;
1538 register struct rx_peer *peer = conn->peer;
1541 char deliveryType = 'S';
1543 /* The address we're sending the packet to */
1544 memset(&addr, 0, sizeof(addr));
1545 addr.sin_family = AF_INET;
1546 addr.sin_port = peer->port;
1547 addr.sin_addr.s_addr = peer->host;
1549 /* This stuff should be revamped, I think, so that most, if not
1550 * all, of the header stuff is always added here. We could
1551 * probably do away with the encode/decode routines. XXXXX */
1553 /* Stamp each packet with a unique serial number. The serial
1554 * number is maintained on a connection basis because some types
1555 * of security may be based on the serial number of the packet,
1556 * and security is handled on a per authenticated-connection
1558 /* Pre-increment, to guarantee no zero serial number; a zero
1559 * serial number means the packet was never sent. */
1560 MUTEX_ENTER(&conn->conn_data_lock);
1561 p->header.serial = ++conn->serial;
1562 MUTEX_EXIT(&conn->conn_data_lock);
1563 /* This is so we can adjust retransmit time-outs better in the face of
1564 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1566 if (p->firstSerial == 0) {
1567 p->firstSerial = p->header.serial;
1570 /* If an output tracer function is defined, call it with the packet and
1571 * network address. Note this function may modify its arguments. */
1572 if (rx_almostSent) {
1573 int drop = (*rx_almostSent) (p, &addr);
1574 /* drop packet if return value is non-zero? */
1576 deliveryType = 'D'; /* Drop the packet */
1580 /* Get network byte order header */
1581 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1582 * touch ALL the fields */
1584 /* Send the packet out on the same socket that related packets are being
1588 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1591 /* Possibly drop this packet, for testing purposes */
1592 if ((deliveryType == 'D')
1593 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1594 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1595 deliveryType = 'D'; /* Drop the packet */
1597 deliveryType = 'S'; /* Send the packet */
1598 #endif /* RXDEBUG */
1600 /* Loop until the packet is sent. We'd prefer just to use a
1601 * blocking socket, but unfortunately the interface doesn't
1602 * allow us to have the socket block in send mode, and not
1603 * block in receive mode */
1606 waslocked = ISAFS_GLOCK();
1607 #ifdef RX_KERNEL_TRACE
1608 if (ICL_SETACTIVE(afs_iclSetp)) {
1611 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1612 "before osi_NetSend()");
1621 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1622 p->length + RX_HEADER_SIZE, istack)) != 0) {
1623 /* send failed, so let's hurry up the resend, eh? */
1624 MUTEX_ENTER(&rx_stats_mutex);
1625 rx_stats.netSendFailures++;
1626 MUTEX_EXIT(&rx_stats_mutex);
1627 p->retryTime = p->timeSent; /* resend it very soon */
1628 clock_Addmsec(&(p->retryTime),
1629 10 + (((afs_uint32) p->backoff) << 8));
1631 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1632 /* Linux is nice -- it can tell us right away that we cannot
1633 * reach this recipient by returning an ENETUNREACH error
1634 * code. So, when this happens let's "down" the host NOW so
1635 * we don't sit around waiting for this host to timeout later.
1637 if (call && code == -ENETUNREACH)
1638 call->lastReceiveTime = 0;
1642 #ifdef RX_KERNEL_TRACE
1643 if (ICL_SETACTIVE(afs_iclSetp)) {
1645 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
1646 "after osi_NetSend()");
1658 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1660 MUTEX_ENTER(&rx_stats_mutex);
1661 rx_stats.packetsSent[p->header.type - 1]++;
1662 MUTEX_EXIT(&rx_stats_mutex);
1663 MUTEX_ENTER(&peer->peer_lock);
1664 hadd32(peer->bytesSent, p->length);
1665 MUTEX_EXIT(&peer->peer_lock);
1668 /* Send a list of packets to appropriate destination for the specified
1669 * connection. The headers are first encoded and placed in the packets.
1672 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
1673 struct rx_packet **list, int len, int istack)
1675 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1678 struct sockaddr_in addr;
1679 register struct rx_peer *peer = conn->peer;
1681 struct rx_packet *p = NULL;
1682 struct iovec wirevec[RX_MAXIOVECS];
1683 int i, length, code;
1686 struct rx_jumboHeader *jp;
1688 char deliveryType = 'S';
1690 /* The address we're sending the packet to */
1691 addr.sin_family = AF_INET;
1692 addr.sin_port = peer->port;
1693 addr.sin_addr.s_addr = peer->host;
1695 if (len + 1 > RX_MAXIOVECS) {
1696 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1700 * Stamp the packets in this jumbogram with consecutive serial numbers
1702 MUTEX_ENTER(&conn->conn_data_lock);
1703 serial = conn->serial;
1704 conn->serial += len;
1705 MUTEX_EXIT(&conn->conn_data_lock);
1708 /* This stuff should be revamped, I think, so that most, if not
1709 * all, of the header stuff is always added here. We could
1710 * probably do away with the encode/decode routines. XXXXX */
1713 length = RX_HEADER_SIZE;
1714 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1715 wirevec[0].iov_len = RX_HEADER_SIZE;
1716 for (i = 0; i < len; i++) {
1719 /* The whole 3.5 jumbogram scheme relies on packets fitting
1720 * in a single packet buffer. */
1721 if (p->niovecs > 2) {
1722 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1725 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1728 if (p->length != RX_JUMBOBUFFERSIZE) {
1729 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1731 p->header.flags |= RX_JUMBO_PACKET;
1732 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1733 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1735 wirevec[i + 1].iov_len = p->length;
1736 length += p->length;
1738 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
1740 /* Convert jumbo packet header to network byte order */
1741 temp = (afs_uint32) (p->header.flags) << 24;
1742 temp |= (afs_uint32) (p->header.spare);
1743 *(afs_uint32 *) jp = htonl(temp);
1745 jp = (struct rx_jumboHeader *)
1746 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1748 /* Stamp each packet with a unique serial number. The serial
1749 * number is maintained on a connection basis because some types
1750 * of security may be based on the serial number of the packet,
1751 * and security is handled on a per authenticated-connection
1753 /* Pre-increment, to guarantee no zero serial number; a zero
1754 * serial number means the packet was never sent. */
1755 p->header.serial = ++serial;
1756 /* This is so we can adjust retransmit time-outs better in the face of
1757 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1759 if (p->firstSerial == 0) {
1760 p->firstSerial = p->header.serial;
1763 /* If an output tracer function is defined, call it with the packet and
1764 * network address. Note this function may modify its arguments. */
1765 if (rx_almostSent) {
1766 int drop = (*rx_almostSent) (p, &addr);
1767 /* drop packet if return value is non-zero? */
1769 deliveryType = 'D'; /* Drop the packet */
1773 /* Get network byte order header */
1774 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1775 * touch ALL the fields */
1778 /* Send the packet out on the same socket that related packets are being
1782 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
1785 /* Possibly drop this packet, for testing purposes */
1786 if ((deliveryType == 'D')
1787 || ((rx_intentionallyDroppedPacketsPer100 > 0)
1788 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1789 deliveryType = 'D'; /* Drop the packet */
1791 deliveryType = 'S'; /* Send the packet */
1792 #endif /* RXDEBUG */
1794 /* Loop until the packet is sent. We'd prefer just to use a
1795 * blocking socket, but unfortunately the interface doesn't
1796 * allow us to have the socket block in send mode, and not
1797 * block in receive mode */
1799 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1800 waslocked = ISAFS_GLOCK();
1801 if (!istack && waslocked)
1805 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
1807 /* send failed, so let's hurry up the resend, eh? */
1808 MUTEX_ENTER(&rx_stats_mutex);
1809 rx_stats.netSendFailures++;
1810 MUTEX_EXIT(&rx_stats_mutex);
1811 for (i = 0; i < len; i++) {
1813 p->retryTime = p->timeSent; /* resend it very soon */
1814 clock_Addmsec(&(p->retryTime),
1815 10 + (((afs_uint32) p->backoff) << 8));
1817 #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
1818 /* Linux is nice -- it can tell us right away that we cannot
1819 * reach this recipient by returning an ENETUNREACH error
1820 * code. So, when this happens let's "down" the host NOW so
1821 * we don't sit around waiting for this host to timeout later.
1823 if (call && code == -ENETUNREACH)
1824 call->lastReceiveTime = 0;
1827 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1828 if (!istack && waslocked)
1837 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
1840 MUTEX_ENTER(&rx_stats_mutex);
1841 rx_stats.packetsSent[p->header.type - 1]++;
1842 MUTEX_EXIT(&rx_stats_mutex);
1843 MUTEX_ENTER(&peer->peer_lock);
1845 hadd32(peer->bytesSent, p->length);
1846 MUTEX_EXIT(&peer->peer_lock);
1850 /* Send a "special" packet to the peer connection. If call is
1851 * specified, then the packet is directed to a specific call channel
1852 * associated with the connection, otherwise it is directed to the
1853 * connection only. Uses optionalPacket if it is supplied, rather than
1854 * allocating a new packet buffer. Nbytes is the length of the data
1855 * portion of the packet. If data is non-null, nbytes of data are
1856 * copied into the packet. Type is the type of the packet, as defined
1857 * in rx.h. Bug: there's a lot of duplication between this and other
1858 * routines. This needs to be cleaned up. */
1860 rxi_SendSpecial(register struct rx_call *call,
1861 register struct rx_connection *conn,
1862 struct rx_packet *optionalPacket, int type, char *data,
1863 int nbytes, int istack)
1865 /* Some of the following stuff should be common code for all
1866 * packet sends (it's repeated elsewhere) */
1867 register struct rx_packet *p;
1869 int savelen = 0, saven = 0;
1870 int channel, callNumber;
1872 channel = call->channel;
1873 callNumber = *call->callNumber;
1874 /* BUSY packets refer to the next call on this connection */
1875 if (type == RX_PACKET_TYPE_BUSY) {
1884 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1886 osi_Panic("rxi_SendSpecial failure");
1893 p->header.serviceId = conn->serviceId;
1894 p->header.securityIndex = conn->securityIndex;
1895 p->header.cid = (conn->cid | channel);
1896 p->header.callNumber = callNumber;
1898 p->header.epoch = conn->epoch;
1899 p->header.type = type;
1900 p->header.flags = 0;
1901 if (conn->type == RX_CLIENT_CONNECTION)
1902 p->header.flags |= RX_CLIENT_INITIATED;
1904 rx_packetwrite(p, 0, nbytes, data);
1906 for (i = 1; i < p->niovecs; i++) {
1907 if (nbytes <= p->wirevec[i].iov_len) {
1908 savelen = p->wirevec[i].iov_len;
1910 p->wirevec[i].iov_len = nbytes;
1911 p->niovecs = i + 1; /* so condition fails because i == niovecs */
1913 nbytes -= p->wirevec[i].iov_len;
1917 rxi_Send(call, p, istack);
1919 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
1920 if (saven) { /* means we truncated the packet above. We probably don't */
1921 /* really need to do this, but it seems safer this way, given that */
1922 /* sneaky optionalPacket... */
1923 p->wirevec[i - 1].iov_len = savelen;
1926 if (!optionalPacket)
1928 return optionalPacket;
1932 /* Encode the packet's header (from the struct header in the packet to
1933 * the net byte order representation in the wire representation of the
1934 * packet, which is what is actually sent out on the wire) */
1936 rxi_EncodePacketHeader(register struct rx_packet *p)
1938 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1940 memset((char *)buf, 0, RX_HEADER_SIZE);
1941 *buf++ = htonl(p->header.epoch);
1942 *buf++ = htonl(p->header.cid);
1943 *buf++ = htonl(p->header.callNumber);
1944 *buf++ = htonl(p->header.seq);
1945 *buf++ = htonl(p->header.serial);
1946 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
1947 | (((afs_uint32) p->header.flags) << 16)
1948 | (p->header.userStatus << 8) | p->header.securityIndex);
1949 /* Note: top 16 bits of this next word were reserved */
1950 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
1953 /* Decode the packet's header (from net byte order to a struct header) */
1955 rxi_DecodePacketHeader(register struct rx_packet *p)
1957 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
1960 p->header.epoch = ntohl(*buf);
1962 p->header.cid = ntohl(*buf);
1964 p->header.callNumber = ntohl(*buf);
1966 p->header.seq = ntohl(*buf);
1968 p->header.serial = ntohl(*buf);
1974 /* C will truncate byte fields to bytes for me */
1975 p->header.type = temp >> 24;
1976 p->header.flags = temp >> 16;
1977 p->header.userStatus = temp >> 8;
1978 p->header.securityIndex = temp >> 0;
1983 p->header.serviceId = (temp & 0xffff);
1984 p->header.spare = temp >> 16;
1985 /* Note: top 16 bits of this last word are the security checksum */
1989 rxi_PrepareSendPacket(register struct rx_call *call,
1990 register struct rx_packet *p, register int last)
1992 register struct rx_connection *conn = call->conn;
1994 ssize_t len; /* len must be a signed type; it can go negative */
1996 p->flags &= ~RX_PKTFLAG_ACKED;
1997 p->header.cid = (conn->cid | call->channel);
1998 p->header.serviceId = conn->serviceId;
1999 p->header.securityIndex = conn->securityIndex;
2000 p->header.callNumber = *call->callNumber;
2001 p->header.seq = call->tnext++;
2002 p->header.epoch = conn->epoch;
2003 p->header.type = RX_PACKET_TYPE_DATA;
2004 p->header.flags = 0;
2005 p->header.spare = 0;
2006 if (conn->type == RX_CLIENT_CONNECTION)
2007 p->header.flags |= RX_CLIENT_INITIATED;
2010 p->header.flags |= RX_LAST_PACKET;
2012 clock_Zero(&p->retryTime); /* Never yet transmitted */
2013 clock_Zero(&p->firstSent); /* Never yet transmitted */
2014 p->header.serial = 0; /* Another way of saying never transmitted... */
2017 /* Now that we're sure this is the last data on the call, make sure
2018 * that the "length" and the sum of the iov_lens matches. */
2019 len = p->length + call->conn->securityHeaderSize;
2021 for (i = 1; i < p->niovecs && len > 0; i++) {
2022 len -= p->wirevec[i].iov_len;
2025 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2027 /* Free any extra elements in the wirevec */
2028 for (j = MAX(2, i); j < p->niovecs; j++) {
2029 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
2032 p->wirevec[i - 1].iov_len += len;
2034 RXS_PreparePacket(conn->securityObject, call, p);
2037 /* Given an interface MTU size, calculate an adjusted MTU size that
2038 * will make efficient use of the RX buffers when the peer is sending
2039 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2041 rxi_AdjustIfMTU(int mtu)
2046 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2047 if (mtu <= adjMTU) {
2054 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2055 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2058 /* Given an interface MTU size, and the peer's advertised max receive
2059 * size, calculate an adjisted maxMTU size that makes efficient use
2060 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2062 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2064 int maxMTU = mtu * rxi_nSendFrags;
2065 maxMTU = MIN(maxMTU, peerMaxMTU);
2066 return rxi_AdjustIfMTU(maxMTU);
2069 /* Given a packet size, figure out how many datagram packet will fit.
2070 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2071 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2072 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2074 rxi_AdjustDgramPackets(int frags, int mtu)
2077 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2080 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2081 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2082 /* subtract the size of the first and last packets */
2083 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2087 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));