2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 #include "../afs/param.h"
12 #include <afsconfig.h>
14 #include "../afs/sysincludes.h"
15 #include "../afs/afsincludes.h"
16 #include "../rx/rx_kcommon.h"
17 #include "../rx/rx_clock.h"
18 #include "../rx/rx_queue.h"
19 #include "../rx/rx_packet.h"
20 #else /* defined(UKERNEL) */
21 #include "../h/types.h"
22 #ifndef AFS_LINUX20_ENV
23 #include "../h/systm.h"
25 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
26 #include "../afs/sysincludes.h"
28 #include "../h/socket.h"
29 #include "../netinet/in.h"
30 #include "../afs/afs_osi.h"
31 #include "../rx/rx_kmutex.h"
32 #include "../rx/rx_clock.h"
33 #include "../rx/rx_queue.h"
35 #include <sys/sysmacros.h>
37 #include "../rx/rx_packet.h"
38 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
39 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
40 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
42 #include "../h/mbuf.h"
44 #endif /* defined(UKERNEL) */
45 #include "../rx/rx_globals.h"
47 #include <afs/param.h>
48 #include <afsconfig.h>
49 #include "sys/types.h"
52 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #endif /* AFS_NT40_ENV */
59 #include "rx_xmit_nt.h"
62 #include <sys/socket.h>
63 #include <netinet/in.h>
69 #include <sys/sysmacros.h>
71 #include "rx_packet.h"
72 #include "rx_globals.h"
74 #include "rx_internal.h"
87 /* rxdb_fileID is used to identify the lock location, along with line#. */
88 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
89 #endif /* RX_LOCKS_DB */
90 struct rx_packet *rx_mallocedP = 0;
92 extern char cml_version_number[];
93 extern int (*rx_almostSent)();
95 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
96 afs_int32 ahost, short aport, afs_int32 istack);
98 /* some rules about packets:
99 * 1. When a packet is allocated, the final iov_buf contains room for
100 * a security trailer, but iov_len masks that fact. If the security
101 * package wants to add the trailer, it may do so, and then extend
102 * iov_len appropriately. For this reason, packet's niovecs and
103 * iov_len fields should be accurate before calling PreparePacket.
107 * all packet buffers (iov_base) are integral multiples of
109 * offset is an integral multiple of the word size.
111 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
115 for (l=0, i=1; i< packet->niovecs ; i++ ) {
116 if (l + packet->wirevec[i].iov_len > offset) {
117 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
119 l += packet->wirevec[i].iov_len;
126 * all packet buffers (iov_base) are integral multiples of the word size.
127 * offset is an integral multiple of the word size.
129 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
133 for (l=0, i=1; i< packet->niovecs ; i++ ) {
134 if (l + packet->wirevec[i].iov_len > offset) {
135 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
139 l += packet->wirevec[i].iov_len;
146 * all packet buffers (iov_base) are integral multiples of the
148 * offset is an integral multiple of the word size.
150 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
152 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
153 int resid, char *out)
155 unsigned int i, j, l, r;
156 for (l=0, i=1; i< packet->niovecs ; i++ ) {
157 if (l + packet->wirevec[i].iov_len > offset) {
160 l += packet->wirevec[i].iov_len;
163 /* i is the iovec which contains the first little bit of data in which we
164 * are interested. l is the total length of everything prior to this iovec.
165 * j is the number of bytes we can safely copy out of this iovec.
168 while ((resid > 0) && (i < packet->niovecs)) {
169 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
170 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
172 l += packet->wirevec[i].iov_len;
176 return (resid ? (r - resid) : r);
181 * all packet buffers (iov_base) are integral multiples of the
183 * offset is an integral multiple of the word size.
185 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
191 for (l=0, i=1; i < packet->niovecs; i++ ) {
192 if (l + packet->wirevec[i].iov_len > offset) {
195 l += packet->wirevec[i].iov_len;
198 /* i is the iovec which contains the first little bit of data in which we
199 * are interested. l is the total length of everything prior to this iovec.
200 * j is the number of bytes we can safely copy out of this iovec.
203 while ((resid > 0) && (i < RX_MAXWVECS)) {
204 if (i >= packet->niovecs)
205 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
208 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
209 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
212 l += packet->wirevec[i].iov_len;
216 return (resid ? (r - resid) : r);
219 static struct rx_packet * allocCBuf(int class)
225 MUTEX_ENTER(&rx_freePktQ_lock);
228 if (rxi_OverQuota(class)) {
230 rxi_NeedMorePackets = TRUE;
231 MUTEX_ENTER(&rx_stats_mutex);
233 case RX_PACKET_CLASS_RECEIVE:
234 rx_stats.receivePktAllocFailures++;
236 case RX_PACKET_CLASS_SEND:
237 rx_stats.sendPktAllocFailures++;
239 case RX_PACKET_CLASS_SPECIAL:
240 rx_stats.specialPktAllocFailures++;
242 case RX_PACKET_CLASS_RECV_CBUF:
243 rx_stats.receiveCbufPktAllocFailures++;
245 case RX_PACKET_CLASS_SEND_CBUF:
246 rx_stats.sendCbufPktAllocFailures++;
249 MUTEX_EXIT(&rx_stats_mutex);
253 if (queue_IsEmpty(&rx_freePacketQueue)) {
255 rxi_NeedMorePackets = TRUE;
259 if (queue_IsEmpty(&rx_freePacketQueue)) {
260 rxi_MorePacketsNoLock(rx_initSendWindow);
265 c = queue_First(&rx_freePacketQueue, rx_packet);
267 if (c->header.flags != RX_FREE_PACKET)
268 osi_Panic("rxi_AllocPacket: packet not free\n");
274 MUTEX_EXIT(&rx_freePktQ_lock);
281 * Free a packet currently used as a continuation buffer
283 void rxi_freeCBuf(struct rx_packet *c)
288 MUTEX_ENTER(&rx_freePktQ_lock);
290 rxi_FreePacketNoLock(c);
291 /* Wakeup anyone waiting for packets */
294 MUTEX_EXIT(&rx_freePktQ_lock);
298 /* this one is kind of awful.
299 * In rxkad, the packet has been all shortened, and everything, ready for
300 * sending. All of a sudden, we discover we need some of that space back.
301 * This isn't terribly general, because it knows that the packets are only
302 * rounded up to the EBS (userdata + security header).
304 int rxi_RoundUpPacket(p, nb)
305 struct rx_packet * p;
310 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
311 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
312 p->wirevec[i].iov_len += nb;
317 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
318 p->wirevec[i].iov_len += nb;
325 /* get sufficient space to store nb bytes of data (or more), and hook
326 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
327 * returns the number of bytes >0 which it failed to come up with.
328 * Don't need to worry about locking on packet, since only
329 * one thread can manipulate one at a time. Locking on continution
330 * packets is handled by allocCBuf */
331 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
332 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
336 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
337 register struct rx_packet *cb;
338 if ((cb = allocCBuf(class))) {
339 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
340 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
341 nb -= RX_CBUFFERSIZE;
342 p->length += RX_CBUFFERSIZE;
351 /* Add more packet buffers */
352 void rxi_MorePackets(int apackets)
354 struct rx_packet *p, *e;
358 getme = apackets * sizeof(struct rx_packet);
359 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
361 PIN(p, getme); /* XXXXX */
362 bzero((char *)p, getme);
365 MUTEX_ENTER(&rx_freePktQ_lock);
367 for (e = p + apackets; p<e; p++) {
368 p->wirevec[0].iov_base = (char *) (p->wirehead);
369 p->wirevec[0].iov_len = RX_HEADER_SIZE;
370 p->wirevec[1].iov_base = (char *) (p->localdata);
371 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
372 p->header.flags = RX_FREE_PACKET;
375 queue_Append(&rx_freePacketQueue, p);
377 rx_nFreePackets += apackets;
378 rxi_NeedMorePackets = FALSE;
382 MUTEX_EXIT(&rx_freePktQ_lock);
387 /* Add more packet buffers */
388 void rxi_MorePacketsNoLock(int apackets)
390 struct rx_packet *p, *e;
393 /* allocate enough packets that 1/4 of the packets will be able
394 * to hold maximal amounts of data */
395 apackets += (apackets/4)
396 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
397 getme = apackets * sizeof(struct rx_packet);
398 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
400 bzero((char *)p, getme);
402 for (e = p + apackets; p<e; p++) {
403 p->wirevec[0].iov_base = (char *) (p->wirehead);
404 p->wirevec[0].iov_len = RX_HEADER_SIZE;
405 p->wirevec[1].iov_base = (char *) (p->localdata);
406 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
407 p->header.flags = RX_FREE_PACKET;
410 queue_Append(&rx_freePacketQueue, p);
412 rx_nFreePackets += apackets;
413 rxi_NeedMorePackets = FALSE;
418 void rxi_FreeAllPackets(void)
420 /* must be called at proper interrupt level, etcetera */
421 /* MTUXXX need to free all Packets */
422 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
423 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
426 /* Allocate more packets iff we need more continuation buffers */
427 /* In kernel, can't page in memory with interrupts disabled, so we
428 * don't use the event mechanism. */
429 void rx_CheckPackets()
431 if (rxi_NeedMorePackets) {
432 rxi_MorePackets(rx_initSendWindow);
436 /* In the packet freeing routine below, the assumption is that
437 we want all of the packets to be used equally frequently, so that we
438 don't get packet buffers paging out. It would be just as valid to
439 assume that we DO want them to page out if not many are being used.
440 In any event, we assume the former, and append the packets to the end
442 /* This explanation is bogus. The free list doesn't remain in any kind of
443 useful order for afs_int32: the packets in use get pretty much randomly scattered
444 across all the pages. In order to permit unused {packets,bufs} to page out, they
445 must be stored so that packets which are adjacent in memory are adjacent in the
446 free list. An array springs rapidly to mind.
449 /* Actually free the packet p. */
450 void rxi_FreePacketNoLock(struct rx_packet *p)
452 dpf(("Free %x\n", p));
454 if (p->header.flags & RX_FREE_PACKET)
455 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
457 p->header.flags = RX_FREE_PACKET;
458 queue_Append(&rx_freePacketQueue, p);
461 int rxi_FreeDataBufsNoLock(p, first)
462 struct rx_packet * p;
465 struct iovec *iov, *end;
467 if (first != 1) /* MTUXXX */
468 osi_Panic("FreeDataBufs 1: first must be 1");
469 iov = &p->wirevec[1];
470 end = iov + (p->niovecs-1);
471 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
472 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
473 for (iov++ ; iov < end ; iov++) {
475 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
476 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
484 int rxi_nBadIovecs = 0;
486 /* rxi_RestoreDataBufs
488 * Restore the correct sizes to the iovecs. Called when reusing a packet
489 * for reading off the wire.
491 void rxi_RestoreDataBufs(struct rx_packet *p)
494 struct iovec *iov = &p->wirevec[2];
496 p->wirevec[0].iov_base = (char *) (p->wirehead);
497 p->wirevec[0].iov_len = RX_HEADER_SIZE;
498 p->wirevec[1].iov_base = (char *) (p->localdata);
499 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
501 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
502 if (!iov->iov_base) {
507 iov->iov_len = RX_CBUFFERSIZE;
511 int rxi_TrimDataBufs(p, first)
512 struct rx_packet * p;
516 struct iovec *iov, *end;
520 osi_Panic("TrimDataBufs 1: first must be 1");
522 /* Skip over continuation buffers containing message data */
523 iov = &p->wirevec[2];
524 end = iov + (p->niovecs-2);
525 length = p->length - p->wirevec[1].iov_len;
526 for (; iov < end && length > 0 ; iov++) {
528 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
529 length -= iov->iov_len;
532 /* iov now points to the first empty data buffer. */
537 MUTEX_ENTER(&rx_freePktQ_lock);
539 for (; iov < end ; iov++) {
541 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
542 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
547 MUTEX_EXIT(&rx_freePktQ_lock);
553 /* Free the packet p. P is assumed not to be on any queue, i.e.
554 * remove it yourself first if you call this routine. */
555 void rxi_FreePacket(struct rx_packet *p)
560 MUTEX_ENTER(&rx_freePktQ_lock);
562 rxi_FreeDataBufsNoLock(p,1);
563 rxi_FreePacketNoLock(p);
564 /* Wakeup anyone waiting for packets */
567 MUTEX_EXIT(&rx_freePktQ_lock);
572 /* rxi_AllocPacket sets up p->length so it reflects the number of
573 * bytes in the packet at this point, **not including** the header.
574 * The header is absolutely necessary, besides, this is the way the
575 * length field is usually used */
576 struct rx_packet *rxi_AllocPacketNoLock(class)
579 register struct rx_packet *p;
582 if (rxi_OverQuota(class)) {
583 rxi_NeedMorePackets = TRUE;
584 MUTEX_ENTER(&rx_stats_mutex);
586 case RX_PACKET_CLASS_RECEIVE:
587 rx_stats.receivePktAllocFailures++;
589 case RX_PACKET_CLASS_SEND:
590 rx_stats.sendPktAllocFailures++;
592 case RX_PACKET_CLASS_SPECIAL:
593 rx_stats.specialPktAllocFailures++;
595 case RX_PACKET_CLASS_RECV_CBUF:
596 rx_stats.receiveCbufPktAllocFailures++;
598 case RX_PACKET_CLASS_SEND_CBUF:
599 rx_stats.sendCbufPktAllocFailures++;
602 MUTEX_EXIT(&rx_stats_mutex);
603 return (struct rx_packet *) 0;
607 MUTEX_ENTER(&rx_stats_mutex);
608 rx_stats.packetRequests++;
609 MUTEX_EXIT(&rx_stats_mutex);
612 if (queue_IsEmpty(&rx_freePacketQueue))
613 osi_Panic("rxi_AllocPacket error");
615 if (queue_IsEmpty(&rx_freePacketQueue))
616 rxi_MorePacketsNoLock(rx_initSendWindow);
620 p = queue_First(&rx_freePacketQueue, rx_packet);
621 if (p->header.flags != RX_FREE_PACKET)
622 osi_Panic("rxi_AllocPacket: packet not free\n");
624 dpf(("Alloc %x, class %d\n", p, class));
629 /* have to do this here because rx_FlushWrite fiddles with the iovs in
630 * order to truncate outbound packets. In the near future, may need
631 * to allocate bufs from a static pool here, and/or in AllocSendPacket
633 p->wirevec[0].iov_base = (char *) (p->wirehead);
634 p->wirevec[0].iov_len = RX_HEADER_SIZE;
635 p->wirevec[1].iov_base = (char *) (p->localdata);
636 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
638 p->length = RX_FIRSTBUFFERSIZE;
642 struct rx_packet *rxi_AllocPacket(class)
645 register struct rx_packet *p;
647 MUTEX_ENTER(&rx_freePktQ_lock);
648 p = rxi_AllocPacketNoLock(class);
649 MUTEX_EXIT(&rx_freePktQ_lock);
653 /* This guy comes up with as many buffers as it {takes,can get} given
654 * the MTU for this call. It also sets the packet length before
655 * returning. caution: this is often called at NETPRI
656 * Called with call locked.
658 struct rx_packet *rxi_AllocSendPacket(call, want)
659 register struct rx_call *call;
662 register struct rx_packet *p = (struct rx_packet *) 0;
664 register unsigned delta;
667 mud = call->MTU - RX_HEADER_SIZE;
668 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
669 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
671 while (!(call->error)) {
672 MUTEX_ENTER(&rx_freePktQ_lock);
673 /* if an error occurred, or we get the packet we want, we're done */
674 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
675 MUTEX_EXIT(&rx_freePktQ_lock);
678 want = MIN(want, mud);
680 if ((unsigned) want > p->length)
681 (void) rxi_AllocDataBuf(p, (want - p->length),
682 RX_PACKET_CLASS_SEND_CBUF);
684 if ((unsigned) p->length > mud)
687 if (delta >= p->length) {
696 /* no error occurred, and we didn't get a packet, so we sleep.
697 * At this point, we assume that packets will be returned
698 * sooner or later, as packets are acknowledged, and so we
701 call->flags |= RX_CALL_WAIT_PACKETS;
702 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
703 MUTEX_EXIT(&call->lock);
704 rx_waitingForPackets = 1;
706 #ifdef RX_ENABLE_LOCKS
707 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
709 osi_rxSleep(&rx_waitingForPackets);
711 MUTEX_EXIT(&rx_freePktQ_lock);
712 MUTEX_ENTER(&call->lock);
713 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
714 call->flags &= ~RX_CALL_WAIT_PACKETS;
723 /* count the number of used FDs */
724 static int CountFDs(amax)
727 register int i, code;
731 for(i=0;i<amax;i++) {
732 code = fstat(i, &tstat);
733 if (code == 0) count++;
740 #define CountFDs(amax) amax
744 #if !defined(KERNEL) || defined(UKERNEL)
746 /* This function reads a single packet from the interface into the
747 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
748 * (host,port) of the sender are stored in the supplied variables, and
749 * the data length of the packet is stored in the packet structure.
750 * The header is decoded. */
751 int rxi_ReadPacket(socket, p, host, port)
753 register struct rx_packet *p;
757 struct sockaddr_in from;
760 register afs_int32 tlen, savelen;
762 rx_computelen(p, tlen);
763 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
765 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
766 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
767 * it once in order to avoid races. */
770 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
778 /* Extend the last iovec for padding, it's just to make sure that the
779 * read doesn't return more data than we expect, and is done to get around
780 * our problems caused by the lack of a length field in the rx header.
781 * Use the extra buffer that follows the localdata in each packet
783 savelen = p->wirevec[p->niovecs].iov_len;
784 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
786 bzero((char *)&msg, sizeof(msg));
787 msg.msg_name = (char *) &from;
788 msg.msg_namelen = sizeof(struct sockaddr_in);
789 msg.msg_iov = p->wirevec;
790 msg.msg_iovlen = p->niovecs;
791 nbytes = rxi_Recvmsg(socket, &msg, 0);
793 /* restore the vec to its correct state */
794 p->wirevec[p->niovecs].iov_len = savelen;
796 p->length = (nbytes - RX_HEADER_SIZE);
797 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
799 rxi_MorePackets(rx_initSendWindow);
801 else if (nbytes < 0 && errno == EWOULDBLOCK) {
802 MUTEX_ENTER(&rx_stats_mutex);
803 rx_stats.noPacketOnRead++;
804 MUTEX_EXIT(&rx_stats_mutex);
808 MUTEX_ENTER(&rx_stats_mutex);
809 rx_stats.bogusPacketOnRead++;
810 rx_stats.bogusHost = from.sin_addr.s_addr;
811 MUTEX_EXIT(&rx_stats_mutex);
812 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
813 from.sin_port,nbytes));
818 /* Extract packet header. */
819 rxi_DecodePacketHeader(p);
821 *host = from.sin_addr.s_addr;
822 *port = from.sin_port;
823 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
824 struct rx_peer *peer;
825 MUTEX_ENTER(&rx_stats_mutex);
826 rx_stats.packetsRead[p->header.type-1]++;
827 MUTEX_EXIT(&rx_stats_mutex);
829 * Try to look up this peer structure. If it doesn't exist,
830 * don't create a new one -
831 * we don't keep count of the bytes sent/received if a peer
832 * structure doesn't already exist.
834 * The peer/connection cleanup code assumes that there is 1 peer
835 * per connection. If we actually created a peer structure here
836 * and this packet was an rxdebug packet, the peer structure would
837 * never be cleaned up.
839 peer = rxi_FindPeer(*host, *port, 0, 0);
841 MUTEX_ENTER(&peer->peer_lock);
842 hadd32(peer->bytesReceived, p->length);
843 MUTEX_EXIT(&peer->peer_lock);
847 /* Free any empty packet buffers at the end of this packet */
848 rxi_TrimDataBufs(p, 1);
854 #endif /* !KERNEL || UKERNEL */
856 /* This function splits off the first packet in a jumbo packet.
857 * As of AFS 3.5, jumbograms contain more than one fixed size
858 * packet, and the RX_JUMBO_PACKET flag is set in all but the
859 * last packet header. All packets (except the last) are padded to
860 * fall on RX_CBUFFERSIZE boundaries.
861 * HACK: We store the length of the first n-1 packets in the
862 * last two pad bytes. */
864 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
865 register struct rx_packet *p;
870 struct rx_packet *np;
871 struct rx_jumboHeader *jp;
877 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
878 * bytes in length. All but the first packet are preceded by
879 * an abbreviated four byte header. The length of the last packet
880 * is calculated from the size of the jumbogram. */
881 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
883 if ((int)p->length < length) {
884 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
887 niov = p->niovecs - 2;
889 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
892 iov = &p->wirevec[2];
893 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
895 /* Get a pointer to the abbreviated packet header */
896 jp = (struct rx_jumboHeader *)
897 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
899 /* Set up the iovecs for the next packet */
900 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
901 np->wirevec[0].iov_len = sizeof(struct rx_header);
902 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
903 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
904 np->niovecs = niov+1;
905 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
906 np->wirevec[i] = *iov;
908 np->length = p->length - length;
909 p->length = RX_JUMBOBUFFERSIZE;
912 /* Convert the jumbo packet header to host byte order */
913 temp = ntohl(*(afs_uint32 *)jp);
914 jp->flags = (u_char)(temp >> 24);
915 jp->cksum = (u_short)(temp);
917 /* Fill in the packet header */
918 np->header = p->header;
919 np->header.serial = p->header.serial + 1;
920 np->header.seq = p->header.seq + 1;
921 np->header.flags = jp->flags;
922 np->header.spare = jp->cksum;
928 /* Send a udp datagram */
929 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
939 memset(&msg, 0, sizeof(msg));
941 msg.msg_iovlen = nvecs;
943 msg.msg_namelen = sizeof(struct sockaddr_in);
945 rxi_Sendmsg(socket, &msg, 0);
949 #elif !defined(UKERNEL)
950 /* osi_NetSend is defined in afs/afs_osinet.c
951 * message receipt is done in rxk_input or rx_put.
956 * Copy an mblock to the contiguous area pointed to by cp.
957 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
958 * but it doesn't really.
959 * Returns the number of bytes not transferred.
960 * The message is NOT changed.
962 static int cpytoc(mp, off, len, cp)
964 register int off, len;
969 for (;mp && len > 0; mp = mp->b_cont) {
970 if (mp->b_datap->db_type != M_DATA) {
973 n = MIN(len, (mp->b_wptr - mp->b_rptr));
974 bcopy((char *)mp->b_rptr, cp, n);
982 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
983 * but it doesn't really.
984 * This sucks, anyway, do it like m_cpy.... below
986 static int cpytoiovec(mp, off, len, iovs, niovs)
989 register struct iovec *iovs;
991 register int m,n,o,t,i;
993 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
994 if (mp->b_datap->db_type != M_DATA) {
997 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1003 t = iovs[i].iov_len;
1006 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1015 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1016 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1018 #if !defined(AFS_LINUX20_ENV)
1019 static int m_cpytoiovec(m, off, len, iovs, niovs)
1021 int off, len, niovs;
1022 struct iovec iovs[];
1025 unsigned int l1, l2, i, t;
1027 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1028 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1031 if (m->m_len <= off) {
1041 p1 = mtod(m, caddr_t)+off;
1042 l1 = m->m_len - off;
1044 p2 = iovs[0].iov_base;
1045 l2 = iovs[0].iov_len;
1048 t = MIN(l1, MIN(l2, (unsigned int)len));
1057 p1 = mtod(m, caddr_t);
1063 p2 = iovs[i].iov_base;
1064 l2 = iovs[i].iov_len;
1072 #endif /* AFS_SUN5_ENV */
1074 #if !defined(AFS_LINUX20_ENV)
1075 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1082 struct rx_packet *phandle;
1083 int hdr_len, data_len;
1087 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1093 #endif /*KERNEL && !UKERNEL*/
1096 /* send a response to a debug packet */
1098 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1102 register struct rx_packet *ap;
1105 struct rx_debugIn tin;
1107 struct rx_serverQueueEntry *np, *nqe;
1109 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1110 /* all done with packet, now set length to the truth, so we can
1111 * reuse this packet */
1112 rx_computelen(ap, ap->length);
1114 tin.type = ntohl(tin.type);
1115 tin.index = ntohl(tin.index);
1117 case RX_DEBUGI_GETSTATS: {
1118 struct rx_debugStats tstat;
1120 /* get basic stats */
1121 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1122 tstat.version = RX_DEBUGI_VERSION;
1123 #ifndef RX_ENABLE_LOCKS
1124 tstat.waitingForPackets = rx_waitingForPackets;
1126 tstat.nFreePackets = htonl(rx_nFreePackets);
1127 tstat.callsExecuted = htonl(rxi_nCalls);
1128 tstat.packetReclaims = htonl(rx_packetReclaims);
1129 tstat.usedFDs = CountFDs(64);
1130 tstat.nWaiting = htonl(rx_nWaiting);
1131 queue_Count( &rx_idleServerQueue, np, nqe,
1132 rx_serverQueueEntry, tstat.idleThreads);
1133 tstat.idleThreads = htonl(tstat.idleThreads);
1134 tl = sizeof(struct rx_debugStats) - ap->length;
1136 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1139 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1140 ap->length = sizeof(struct rx_debugStats);
1141 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1142 rx_computelen(ap, ap->length);
1147 case RX_DEBUGI_GETALLCONN:
1148 case RX_DEBUGI_GETCONN: {
1150 register struct rx_connection *tc;
1151 struct rx_call *tcall;
1152 struct rx_debugConn tconn;
1153 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1156 tl = sizeof(struct rx_debugConn) - ap->length;
1158 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1162 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1163 /* get N'th (maybe) "interesting" connection info */
1164 for(i=0;i<rx_hashTableSize;i++) {
1165 #if !defined(KERNEL)
1166 /* the time complexity of the algorithm used here
1167 * exponentially increses with the number of connections.
1169 #ifdef AFS_PTHREAD_ENV
1172 (void) IOMGR_Poll();
1175 MUTEX_ENTER(&rx_connHashTable_lock);
1176 /* We might be slightly out of step since we are not
1177 * locking each call, but this is only debugging output.
1179 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1180 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1181 tconn.host = tc->peer->host;
1182 tconn.port = tc->peer->port;
1183 tconn.cid = htonl(tc->cid);
1184 tconn.epoch = htonl(tc->epoch);
1185 tconn.serial = htonl(tc->serial);
1186 for(j=0;j<RX_MAXCALLS;j++) {
1187 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1188 if ((tcall=tc->call[j])) {
1189 tconn.callState[j] = tcall->state;
1190 tconn.callMode[j] = tcall->mode;
1191 tconn.callFlags[j] = tcall->flags;
1192 if (queue_IsNotEmpty(&tcall->rq))
1193 tconn.callOther[j] |= RX_OTHER_IN;
1194 if (queue_IsNotEmpty(&tcall->tq))
1195 tconn.callOther[j] |= RX_OTHER_OUT;
1197 else tconn.callState[j] = RX_STATE_NOTINIT;
1200 tconn.natMTU = htonl(tc->peer->natMTU);
1201 tconn.error = htonl(tc->error);
1202 tconn.flags = tc->flags;
1203 tconn.type = tc->type;
1204 tconn.securityIndex = tc->securityIndex;
1205 if (tc->securityObject) {
1206 RXS_GetStats (tc->securityObject, tc,
1208 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1209 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1212 DOHTONL(packetsReceived);
1213 DOHTONL(packetsSent);
1214 DOHTONL(bytesReceived);
1217 i<sizeof(tconn.secStats.spares)/sizeof(short);
1221 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1226 MUTEX_EXIT(&rx_connHashTable_lock);
1227 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1229 ap->length = sizeof(struct rx_debugConn);
1230 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1235 MUTEX_EXIT(&rx_connHashTable_lock);
1237 /* if we make it here, there are no interesting packets */
1238 tconn.cid = htonl(0xffffffff); /* means end */
1239 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1241 ap->length = sizeof(struct rx_debugConn);
1242 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1248 * Pass back all the peer structures we have available
1251 case RX_DEBUGI_GETPEER: {
1253 register struct rx_peer *tp;
1254 struct rx_debugPeer tpeer;
1257 tl = sizeof(struct rx_debugPeer) - ap->length;
1259 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1263 bzero ((char *)&tpeer, sizeof(tpeer));
1264 for(i=0;i<rx_hashTableSize;i++) {
1265 #if !defined(KERNEL)
1266 /* the time complexity of the algorithm used here
1267 * exponentially increses with the number of peers.
1269 * Yielding after processing each hash table entry
1270 * and dropping rx_peerHashTable_lock.
1271 * also increases the risk that we will miss a new
1272 * entry - but we are willing to live with this
1273 * limitation since this is meant for debugging only
1275 #ifdef AFS_PTHREAD_ENV
1278 (void) IOMGR_Poll();
1281 MUTEX_ENTER(&rx_peerHashTable_lock);
1282 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1283 if (tin.index-- <= 0) {
1284 tpeer.host = tp->host;
1285 tpeer.port = tp->port;
1286 tpeer.ifMTU = htons(tp->ifMTU);
1287 tpeer.idleWhen = htonl(tp->idleWhen);
1288 tpeer.refCount = htons(tp->refCount);
1289 tpeer.burstSize = tp->burstSize;
1290 tpeer.burst = tp->burst;
1291 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1292 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1293 tpeer.rtt = htonl(tp->rtt);
1294 tpeer.rtt_dev = htonl(tp->rtt_dev);
1295 tpeer.timeout.sec = htonl(tp->timeout.sec);
1296 tpeer.timeout.usec = htonl(tp->timeout.usec);
1297 tpeer.nSent = htonl(tp->nSent);
1298 tpeer.reSends = htonl(tp->reSends);
1299 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1300 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1301 tpeer.rateFlag = htonl(tp->rateFlag);
1302 tpeer.natMTU = htons(tp->natMTU);
1303 tpeer.maxMTU = htons(tp->maxMTU);
1304 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1305 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1306 tpeer.MTU = htons(tp->MTU);
1307 tpeer.cwind = htons(tp->cwind);
1308 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1309 tpeer.congestSeq = htons(tp->congestSeq);
1310 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1311 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1312 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1313 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1315 MUTEX_EXIT(&rx_peerHashTable_lock);
1316 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1318 ap->length = sizeof(struct rx_debugPeer);
1319 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1324 MUTEX_EXIT(&rx_peerHashTable_lock);
1326 /* if we make it here, there are no interesting packets */
1327 tpeer.host = htonl(0xffffffff); /* means end */
1328 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1330 ap->length = sizeof(struct rx_debugPeer);
1331 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1336 case RX_DEBUGI_RXSTATS: {
1340 tl = sizeof(rx_stats) - ap->length;
1342 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1346 /* Since its all int32s convert to network order with a loop. */
1347 MUTEX_ENTER(&rx_stats_mutex);
1348 s = (afs_int32 *)&rx_stats;
1349 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1350 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1353 ap->length = sizeof(rx_stats);
1354 MUTEX_EXIT(&rx_stats_mutex);
1355 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1361 /* error response packet */
1362 tin.type = htonl(RX_DEBUGI_BADTYPE);
1363 tin.index = tin.type;
1364 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1366 ap->length = sizeof(struct rx_debugIn);
1367 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1374 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1378 register struct rx_packet *ap;
1382 rx_packetwrite(ap, 0, 65, cml_version_number+4);
1385 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1391 /* send a debug packet back to the sender */
1392 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1393 afs_int32 ahost, short aport, afs_int32 istack)
1395 struct sockaddr_in taddr;
1401 int waslocked = ISAFS_GLOCK();
1404 taddr.sin_family = AF_INET;
1405 taddr.sin_port = aport;
1406 taddr.sin_addr.s_addr = ahost;
1409 /* We need to trim the niovecs. */
1410 nbytes = apacket->length;
1411 for (i=1; i < apacket->niovecs; i++) {
1412 if (nbytes <= apacket->wirevec[i].iov_len) {
1413 savelen = apacket->wirevec[i].iov_len;
1414 saven = apacket->niovecs;
1415 apacket->wirevec[i].iov_len = nbytes;
1416 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1418 else nbytes -= apacket->wirevec[i].iov_len;
1422 if (waslocked) AFS_GUNLOCK();
1424 /* debug packets are not reliably delivered, hence the cast below. */
1425 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1426 apacket->length+RX_HEADER_SIZE, istack);
1428 if (waslocked) AFS_GLOCK();
1431 if (saven) { /* means we truncated the packet above. */
1432 apacket->wirevec[i-1].iov_len = savelen;
1433 apacket->niovecs = saven;
1438 /* Send the packet to appropriate destination for the specified
1439 * connection. The header is first encoded and placed in the packet.
1441 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1447 struct sockaddr_in addr;
1448 register struct rx_peer *peer = conn->peer;
1451 char deliveryType = 'S';
1453 /* The address we're sending the packet to */
1454 addr.sin_family = AF_INET;
1455 addr.sin_port = peer->port;
1456 addr.sin_addr.s_addr = peer->host;
1458 /* This stuff should be revamped, I think, so that most, if not
1459 * all, of the header stuff is always added here. We could
1460 * probably do away with the encode/decode routines. XXXXX */
1462 /* Stamp each packet with a unique serial number. The serial
1463 * number is maintained on a connection basis because some types
1464 * of security may be based on the serial number of the packet,
1465 * and security is handled on a per authenticated-connection
1467 /* Pre-increment, to guarantee no zero serial number; a zero
1468 * serial number means the packet was never sent. */
1469 MUTEX_ENTER(&conn->conn_data_lock);
1470 p->header.serial = ++conn->serial;
1471 MUTEX_EXIT(&conn->conn_data_lock);
1472 /* This is so we can adjust retransmit time-outs better in the face of
1473 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1475 if (p->firstSerial == 0) {
1476 p->firstSerial = p->header.serial;
1480 /* If an output tracer function is defined, call it with the packet and
1481 * network address. Note this function may modify its arguments. */
1482 if (rx_almostSent) {
1483 int drop = (*rx_almostSent) (p, &addr);
1484 /* drop packet if return value is non-zero? */
1485 if (drop) deliveryType = 'D'; /* Drop the packet */
1489 /* Get network byte order header */
1490 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1491 * touch ALL the fields */
1493 /* Send the packet out on the same socket that related packets are being
1495 socket = (conn->type == RX_CLIENT_CONNECTION
1496 ? rx_socket : conn->service->socket);
1499 /* Possibly drop this packet, for testing purposes */
1500 if ((deliveryType == 'D') ||
1501 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1502 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1503 deliveryType = 'D'; /* Drop the packet */
1506 deliveryType = 'S'; /* Send the packet */
1507 #endif /* RXDEBUG */
1509 /* Loop until the packet is sent. We'd prefer just to use a
1510 * blocking socket, but unfortunately the interface doesn't
1511 * allow us to have the socket block in send mode, and not
1512 * block in receive mode */
1515 waslocked = ISAFS_GLOCK();
1516 if (waslocked) AFS_GUNLOCK();
1518 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1519 p->length+RX_HEADER_SIZE, istack)){
1520 /* send failed, so let's hurry up the resend, eh? */
1521 MUTEX_ENTER(&rx_stats_mutex);
1522 rx_stats.netSendFailures++;
1523 MUTEX_EXIT(&rx_stats_mutex);
1524 p->retryTime = p->timeSent; /* resend it very soon */
1525 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1528 if (waslocked) AFS_GLOCK();
1533 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1534 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1535 peer->host, peer->port, p->header.serial, p->header.epoch,
1536 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1537 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1539 MUTEX_ENTER(&rx_stats_mutex);
1540 rx_stats.packetsSent[p->header.type-1]++;
1541 MUTEX_EXIT(&rx_stats_mutex);
1542 MUTEX_ENTER(&peer->peer_lock);
1543 hadd32(peer->bytesSent, p->length);
1544 MUTEX_EXIT(&peer->peer_lock);
1547 /* Send a list of packets to appropriate destination for the specified
1548 * connection. The headers are first encoded and placed in the packets.
1550 void rxi_SendPacketList(struct rx_connection * conn,
1551 struct rx_packet **list,
1555 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1558 struct sockaddr_in addr;
1559 register struct rx_peer *peer = conn->peer;
1561 struct rx_packet *p = NULL;
1562 struct iovec wirevec[RX_MAXIOVECS];
1566 struct rx_jumboHeader *jp;
1568 char deliveryType = 'S';
1570 /* The address we're sending the packet to */
1571 addr.sin_family = AF_INET;
1572 addr.sin_port = peer->port;
1573 addr.sin_addr.s_addr = peer->host;
1575 if (len+1 > RX_MAXIOVECS) {
1576 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1580 * Stamp the packets in this jumbogram with consecutive serial numbers
1582 MUTEX_ENTER(&conn->conn_data_lock);
1583 serial = conn->serial;
1584 conn->serial += len;
1585 MUTEX_EXIT(&conn->conn_data_lock);
1588 /* This stuff should be revamped, I think, so that most, if not
1589 * all, of the header stuff is always added here. We could
1590 * probably do away with the encode/decode routines. XXXXX */
1593 length = RX_HEADER_SIZE;
1594 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1595 wirevec[0].iov_len = RX_HEADER_SIZE;
1596 for (i = 0 ; i < len ; i++) {
1599 /* The whole 3.5 jumbogram scheme relies on packets fitting
1600 * in a single packet buffer. */
1601 if (p->niovecs > 2) {
1602 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1605 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1608 if (p->length != RX_JUMBOBUFFERSIZE) {
1609 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1611 p->header.flags |= RX_JUMBO_PACKET;
1612 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1613 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1615 wirevec[i+1].iov_len = p->length;
1616 length += p->length;
1618 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1620 /* Convert jumbo packet header to network byte order */
1621 temp = (afs_uint32)(p->header.flags) << 24;
1622 temp |= (afs_uint32)(p->header.spare);
1623 *(afs_uint32 *)jp = htonl(temp);
1625 jp = (struct rx_jumboHeader *)
1626 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1628 /* Stamp each packet with a unique serial number. The serial
1629 * number is maintained on a connection basis because some types
1630 * of security may be based on the serial number of the packet,
1631 * and security is handled on a per authenticated-connection
1633 /* Pre-increment, to guarantee no zero serial number; a zero
1634 * serial number means the packet was never sent. */
1635 p->header.serial = ++serial;
1636 /* This is so we can adjust retransmit time-outs better in the face of
1637 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1639 if (p->firstSerial == 0) {
1640 p->firstSerial = p->header.serial;
1644 /* If an output tracer function is defined, call it with the packet and
1645 * network address. Note this function may modify its arguments. */
1646 if (rx_almostSent) {
1647 int drop = (*rx_almostSent) (p, &addr);
1648 /* drop packet if return value is non-zero? */
1649 if (drop) deliveryType = 'D'; /* Drop the packet */
1653 /* Get network byte order header */
1654 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1655 * touch ALL the fields */
1658 /* Send the packet out on the same socket that related packets are being
1660 socket = (conn->type == RX_CLIENT_CONNECTION
1661 ? rx_socket : conn->service->socket);
1664 /* Possibly drop this packet, for testing purposes */
1665 if ((deliveryType == 'D') ||
1666 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1667 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1668 deliveryType = 'D'; /* Drop the packet */
1671 deliveryType = 'S'; /* Send the packet */
1672 #endif /* RXDEBUG */
1674 /* Loop until the packet is sent. We'd prefer just to use a
1675 * blocking socket, but unfortunately the interface doesn't
1676 * allow us to have the socket block in send mode, and not
1677 * block in receive mode */
1679 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1680 waslocked = ISAFS_GLOCK();
1681 if (!istack && waslocked) AFS_GUNLOCK();
1683 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1684 /* send failed, so let's hurry up the resend, eh? */
1685 MUTEX_ENTER(&rx_stats_mutex);
1686 rx_stats.netSendFailures++;
1687 MUTEX_EXIT(&rx_stats_mutex);
1688 for (i = 0 ; i < len ; i++) {
1690 p->retryTime = p->timeSent; /* resend it very soon */
1691 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1694 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1695 if (!istack && waslocked) AFS_GLOCK();
1700 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1701 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1702 peer->host, peer->port, p->header.serial, p->header.epoch,
1703 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1704 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1706 MUTEX_ENTER(&rx_stats_mutex);
1707 rx_stats.packetsSent[p->header.type-1]++;
1708 MUTEX_EXIT(&rx_stats_mutex);
1709 MUTEX_ENTER(&peer->peer_lock);
1710 hadd32(peer->bytesSent, p->length);
1711 MUTEX_EXIT(&peer->peer_lock);
1715 /* Send a "special" packet to the peer connection. If call is
1716 * specified, then the packet is directed to a specific call channel
1717 * associated with the connection, otherwise it is directed to the
1718 * connection only. Uses optionalPacket if it is supplied, rather than
1719 * allocating a new packet buffer. Nbytes is the length of the data
1720 * portion of the packet. If data is non-null, nbytes of data are
1721 * copied into the packet. Type is the type of the packet, as defined
1722 * in rx.h. Bug: there's a lot of duplication between this and other
1723 * routines. This needs to be cleaned up. */
1725 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1726 register struct rx_call *call;
1727 register struct rx_connection *conn;
1728 struct rx_packet *optionalPacket;
1733 /* Some of the following stuff should be common code for all
1734 * packet sends (it's repeated elsewhere) */
1735 register struct rx_packet *p;
1737 int savelen = 0, saven = 0;
1738 int channel, callNumber;
1740 channel = call->channel;
1741 callNumber = *call->callNumber;
1742 /* BUSY packets refer to the next call on this connection */
1743 if (type == RX_PACKET_TYPE_BUSY) {
1752 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1753 if (!p) osi_Panic("rxi_SendSpecial failure");
1760 p->header.serviceId = conn->serviceId;
1761 p->header.securityIndex = conn->securityIndex;
1762 p->header.cid = (conn->cid | channel);
1763 p->header.callNumber = callNumber;
1765 p->header.epoch = conn->epoch;
1766 p->header.type = type;
1767 p->header.flags = 0;
1768 if (conn->type == RX_CLIENT_CONNECTION)
1769 p->header.flags |= RX_CLIENT_INITIATED;
1771 rx_packetwrite(p, 0, nbytes, data);
1773 for (i=1; i < p->niovecs; i++) {
1774 if (nbytes <= p->wirevec[i].iov_len) {
1775 savelen = p->wirevec[i].iov_len;
1777 p->wirevec[i].iov_len = nbytes;
1778 p->niovecs = i+1; /* so condition fails because i == niovecs */
1780 else nbytes -= p->wirevec[i].iov_len;
1783 if (call) rxi_Send(call, p, istack);
1784 else rxi_SendPacket(conn, p, istack);
1785 if (saven) { /* means we truncated the packet above. We probably don't */
1786 /* really need to do this, but it seems safer this way, given that */
1787 /* sneaky optionalPacket... */
1788 p->wirevec[i-1].iov_len = savelen;
1791 if (!optionalPacket) rxi_FreePacket(p);
1792 return optionalPacket;
1796 /* Encode the packet's header (from the struct header in the packet to
1797 * the net byte order representation in the wire representation of the
1798 * packet, which is what is actually sent out on the wire) */
1799 void rxi_EncodePacketHeader(p)
1800 register struct rx_packet *p;
1802 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1804 bzero((char *)buf, RX_HEADER_SIZE);
1805 *buf++ = htonl(p->header.epoch);
1806 *buf++ = htonl(p->header.cid);
1807 *buf++ = htonl(p->header.callNumber);
1808 *buf++ = htonl(p->header.seq);
1809 *buf++ = htonl(p->header.serial);
1810 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1811 | (((afs_uint32)p->header.flags)<<16)
1812 | (p->header.userStatus<<8) | p->header.securityIndex);
1813 /* Note: top 16 bits of this next word were reserved */
1814 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1817 /* Decode the packet's header (from net byte order to a struct header) */
1818 void rxi_DecodePacketHeader(p)
1819 register struct rx_packet *p;
1821 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1824 p->header.epoch = ntohl(*buf++);
1825 p->header.cid = ntohl(*buf++);
1826 p->header.callNumber = ntohl(*buf++);
1827 p->header.seq = ntohl(*buf++);
1828 p->header.serial = ntohl(*buf++);
1829 temp = ntohl(*buf++);
1830 /* C will truncate byte fields to bytes for me */
1831 p->header.type = temp>>24;
1832 p->header.flags = temp>>16;
1833 p->header.userStatus = temp>>8;
1834 p->header.securityIndex = temp>>0;
1835 temp = ntohl(*buf++);
1836 p->header.serviceId = (temp&0xffff);
1837 p->header.spare = temp>>16;
1838 /* Note: top 16 bits of this last word are the security checksum */
1841 void rxi_PrepareSendPacket(call, p, last)
1842 register struct rx_call *call;
1843 register struct rx_packet *p;
1846 register struct rx_connection *conn = call->conn;
1848 ssize_t len; /* len must be a signed type; it can go negative */
1851 p->header.cid = (conn->cid | call->channel);
1852 p->header.serviceId = conn->serviceId;
1853 p->header.securityIndex = conn->securityIndex;
1854 p->header.callNumber = *call->callNumber;
1855 p->header.seq = call->tnext++;
1856 p->header.epoch = conn->epoch;
1857 p->header.type = RX_PACKET_TYPE_DATA;
1858 p->header.flags = 0;
1859 p->header.spare = 0;
1860 if (conn->type == RX_CLIENT_CONNECTION)
1861 p->header.flags |= RX_CLIENT_INITIATED;
1864 p->header.flags |= RX_LAST_PACKET;
1866 clock_Zero(&p->retryTime); /* Never yet transmitted */
1867 clock_Zero(&p->firstSent); /* Never yet transmitted */
1868 p->header.serial = 0; /* Another way of saying never transmitted... */
1871 /* Now that we're sure this is the last data on the call, make sure
1872 * that the "length" and the sum of the iov_lens matches. */
1873 len = p->length + call->conn->securityHeaderSize;
1875 for (i=1; i < p->niovecs && len > 0; i++) {
1876 len -= p->wirevec[i].iov_len;
1879 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1882 /* Free any extra elements in the wirevec */
1883 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1884 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1887 p->wirevec[i-1].iov_len += len;
1889 RXS_PreparePacket(conn->securityObject, call, p);
1892 /* Given an interface MTU size, calculate an adjusted MTU size that
1893 * will make efficient use of the RX buffers when the peer is sending
1894 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1895 int rxi_AdjustIfMTU(int mtu)
1900 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1901 if (mtu <= adjMTU) {
1908 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1909 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1912 /* Given an interface MTU size, and the peer's advertised max receive
1913 * size, calculate an adjisted maxMTU size that makes efficient use
1914 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1915 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1917 int maxMTU = mtu * rxi_nSendFrags;
1918 maxMTU = MIN(maxMTU, peerMaxMTU);
1919 return rxi_AdjustIfMTU(maxMTU);
1922 /* Given a packet size, figure out how many datagram packet will fit.
1923 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1924 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1925 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1926 int rxi_AdjustDgramPackets(int frags, int mtu)
1929 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1932 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1933 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1934 /* subtract the size of the first and last packets */
1935 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1939 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));