2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 #include "../afs/param.h"
12 #include <afsconfig.h>
14 #include "../afs/sysincludes.h"
15 #include "../afs/afsincludes.h"
16 #include "../rx/rx_kcommon.h"
17 #include "../rx/rx_clock.h"
18 #include "../rx/rx_queue.h"
19 #include "../rx/rx_packet.h"
20 #else /* defined(UKERNEL) */
21 #include "../h/types.h"
22 #ifndef AFS_LINUX20_ENV
23 #include "../h/systm.h"
25 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
26 #include "../afs/sysincludes.h"
28 #include "../h/socket.h"
29 #include "../netinet/in.h"
30 #include "../afs/afs_osi.h"
31 #include "../rx/rx_kmutex.h"
32 #include "../rx/rx_clock.h"
33 #include "../rx/rx_queue.h"
35 #include <sys/sysmacros.h>
37 #include "../rx/rx_packet.h"
38 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV)
39 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
40 #include "../sys/mount.h" /* it gets pulled in by something later anyway */
42 #include "../h/mbuf.h"
44 #endif /* defined(UKERNEL) */
45 #include "../rx/rx_globals.h"
47 #include <afs/param.h>
48 #include <afsconfig.h>
49 #include "sys/types.h"
52 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #endif /* AFS_NT40_ENV */
59 #include "rx_xmit_nt.h"
62 #include <sys/socket.h>
63 #include <netinet/in.h>
69 #include <sys/sysmacros.h>
71 #include "rx_packet.h"
72 #include "rx_globals.h"
74 #include "rx_internal.h"
87 /* rxdb_fileID is used to identify the lock location, along with line#. */
88 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
89 #endif /* RX_LOCKS_DB */
90 struct rx_packet *rx_mallocedP = 0;
92 extern char cml_version_number[];
93 extern int (*rx_almostSent)();
95 void rxi_FreePacketNoLock(struct rx_packet *p);
96 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
97 afs_int32 ahost, short aport, afs_int32 istack);
99 extern char cml_version_number[];
100 extern int (*rx_almostSent)();
101 /* some rules about packets:
102 * 1. When a packet is allocated, the final iov_buf contains room for
103 * a security trailer, but iov_len masks that fact. If the security
104 * package wants to add the trailer, it may do so, and then extend
105 * iov_len appropriately. For this reason, packet's niovecs and
106 * iov_len fields should be accurate before calling PreparePacket.
110 * all packet buffers (iov_base) are integral multiples of
112 * offset is an integral multiple of the word size.
114 afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
118 for (l=0, i=1; i< packet->niovecs ; i++ ) {
119 if (l + packet->wirevec[i].iov_len > offset) {
120 return *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset-l)));
122 l += packet->wirevec[i].iov_len;
129 * all packet buffers (iov_base) are integral multiples of the word size.
130 * offset is an integral multiple of the word size.
132 afs_int32 rx_SlowPutInt32(struct rx_packet *packet, size_t offset, afs_int32 data)
136 for (l=0, i=1; i< packet->niovecs ; i++ ) {
137 if (l + packet->wirevec[i].iov_len > offset) {
138 *((afs_int32 *)((char*)(packet->wirevec[i].iov_base) + (offset - l))) =
142 l += packet->wirevec[i].iov_len;
149 * all packet buffers (iov_base) are integral multiples of the
151 * offset is an integral multiple of the word size.
153 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
155 afs_int32 rx_SlowReadPacket(struct rx_packet *packet, unsigned int offset,
156 int resid, char *out)
158 unsigned int i, j, l, r;
159 for (l=0, i=1; i< packet->niovecs ; i++ ) {
160 if (l + packet->wirevec[i].iov_len > offset) {
163 l += packet->wirevec[i].iov_len;
166 /* i is the iovec which contains the first little bit of data in which we
167 * are interested. l is the total length of everything prior to this iovec.
168 * j is the number of bytes we can safely copy out of this iovec.
171 while ((resid > 0) && (i < packet->niovecs)) {
172 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
173 bcopy ((char *)(packet->wirevec[i].iov_base) + (offset - l), out, j);
175 l += packet->wirevec[i].iov_len;
179 return (resid ? (r - resid) : r);
184 * all packet buffers (iov_base) are integral multiples of the
186 * offset is an integral multiple of the word size.
188 afs_int32 rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid,
194 for (l=0, i=1; i < packet->niovecs; i++ ) {
195 if (l + packet->wirevec[i].iov_len > offset) {
198 l += packet->wirevec[i].iov_len;
201 /* i is the iovec which contains the first little bit of data in which we
202 * are interested. l is the total length of everything prior to this iovec.
203 * j is the number of bytes we can safely copy out of this iovec.
206 while ((resid > 0) && (i < RX_MAXWVECS)) {
207 if (i >= packet->niovecs)
208 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) >0) /* ++niovecs as a side-effect */
211 b = (char*)(packet->wirevec[i].iov_base) + (offset - l);
212 j = MIN (resid, packet->wirevec[i].iov_len - (offset - l));
215 l += packet->wirevec[i].iov_len;
219 return (resid ? (r - resid) : r);
222 static struct rx_packet * allocCBuf(int class)
226 extern void rxi_MorePacketsNoLock();
231 MUTEX_ENTER(&rx_freePktQ_lock);
234 if (rxi_OverQuota(class)) {
236 rxi_NeedMorePackets = TRUE;
237 MUTEX_ENTER(&rx_stats_mutex);
239 case RX_PACKET_CLASS_RECEIVE:
240 rx_stats.receivePktAllocFailures++;
242 case RX_PACKET_CLASS_SEND:
243 rx_stats.sendPktAllocFailures++;
245 case RX_PACKET_CLASS_SPECIAL:
246 rx_stats.specialPktAllocFailures++;
248 case RX_PACKET_CLASS_RECV_CBUF:
249 rx_stats.receiveCbufPktAllocFailures++;
251 case RX_PACKET_CLASS_SEND_CBUF:
252 rx_stats.sendCbufPktAllocFailures++;
255 MUTEX_EXIT(&rx_stats_mutex);
259 if (queue_IsEmpty(&rx_freePacketQueue)) {
261 rxi_NeedMorePackets = TRUE;
265 if (queue_IsEmpty(&rx_freePacketQueue)) {
266 rxi_MorePacketsNoLock(rx_initSendWindow);
271 c = queue_First(&rx_freePacketQueue, rx_packet);
273 if (c->header.flags != RX_FREE_PACKET)
274 osi_Panic("rxi_AllocPacket: packet not free\n");
280 MUTEX_EXIT(&rx_freePktQ_lock);
287 * Free a packet currently used as a continuation buffer
289 void rxi_freeCBuf(struct rx_packet *c)
291 extern void rxi_PacketsUnWait();
295 MUTEX_ENTER(&rx_freePktQ_lock);
297 rxi_FreePacketNoLock(c);
298 /* Wakeup anyone waiting for packets */
301 MUTEX_EXIT(&rx_freePktQ_lock);
305 /* this one is kind of awful.
306 * In rxkad, the packet has been all shortened, and everything, ready for
307 * sending. All of a sudden, we discover we need some of that space back.
308 * This isn't terribly general, because it knows that the packets are only
309 * rounded up to the EBS (userdata + security header).
311 int rxi_RoundUpPacket(p, nb)
312 struct rx_packet * p;
317 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
318 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
319 p->wirevec[i].iov_len += nb;
324 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
325 p->wirevec[i].iov_len += nb;
332 /* get sufficient space to store nb bytes of data (or more), and hook
333 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
334 * returns the number of bytes >0 which it failed to come up with.
335 * Don't need to worry about locking on packet, since only
336 * one thread can manipulate one at a time. Locking on continution
337 * packets is handled by allocCBuf */
338 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
339 int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
343 for (i=p->niovecs; nb>0 && i<RX_MAXWVECS; i++) {
344 register struct rx_packet *cb;
345 if ((cb = allocCBuf(class))) {
346 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
347 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
348 nb -= RX_CBUFFERSIZE;
349 p->length += RX_CBUFFERSIZE;
358 /* Add more packet buffers */
359 void rxi_MorePackets(int apackets)
361 extern void rxi_PacketsUnWait();
362 struct rx_packet *p, *e;
366 getme = apackets * sizeof(struct rx_packet);
367 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
369 PIN(p, getme); /* XXXXX */
370 bzero((char *)p, getme);
373 MUTEX_ENTER(&rx_freePktQ_lock);
375 for (e = p + apackets; p<e; p++) {
376 p->wirevec[0].iov_base = (char *) (p->wirehead);
377 p->wirevec[0].iov_len = RX_HEADER_SIZE;
378 p->wirevec[1].iov_base = (char *) (p->localdata);
379 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
380 p->header.flags = RX_FREE_PACKET;
383 queue_Append(&rx_freePacketQueue, p);
385 rx_nFreePackets += apackets;
386 rxi_NeedMorePackets = FALSE;
390 MUTEX_EXIT(&rx_freePktQ_lock);
395 /* Add more packet buffers */
396 void rxi_MorePacketsNoLock(int apackets)
398 extern void rxi_PacketsUnWait();
399 struct rx_packet *p, *e;
402 /* allocate enough packets that 1/4 of the packets will be able
403 * to hold maximal amounts of data */
404 apackets += (apackets/4)
405 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE);
406 getme = apackets * sizeof(struct rx_packet);
407 p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme);
409 bzero((char *)p, getme);
411 for (e = p + apackets; p<e; p++) {
412 p->wirevec[0].iov_base = (char *) (p->wirehead);
413 p->wirevec[0].iov_len = RX_HEADER_SIZE;
414 p->wirevec[1].iov_base = (char *) (p->localdata);
415 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
416 p->header.flags = RX_FREE_PACKET;
419 queue_Append(&rx_freePacketQueue, p);
421 rx_nFreePackets += apackets;
422 rxi_NeedMorePackets = FALSE;
427 void rxi_FreeAllPackets(void)
429 /* must be called at proper interrupt level, etcetera */
430 /* MTUXXX need to free all Packets */
431 osi_Free(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
432 UNPIN(rx_mallocedP, (rx_maxReceiveWindow+2) * sizeof(struct rx_packet));
435 /* Allocate more packets iff we need more continuation buffers */
436 /* In kernel, can't page in memory with interrupts disabled, so we
437 * don't use the event mechanism. */
438 void rx_CheckPackets()
440 if (rxi_NeedMorePackets) {
441 rxi_MorePackets(rx_initSendWindow);
445 /* In the packet freeing routine below, the assumption is that
446 we want all of the packets to be used equally frequently, so that we
447 don't get packet buffers paging out. It would be just as valid to
448 assume that we DO want them to page out if not many are being used.
449 In any event, we assume the former, and append the packets to the end
451 /* This explanation is bogus. The free list doesn't remain in any kind of
452 useful order for afs_int32: the packets in use get pretty much randomly scattered
453 across all the pages. In order to permit unused {packets,bufs} to page out, they
454 must be stored so that packets which are adjacent in memory are adjacent in the
455 free list. An array springs rapidly to mind.
458 /* Actually free the packet p. */
459 void rxi_FreePacketNoLock(struct rx_packet *p)
461 dpf(("Free %x\n", p));
463 if (p->header.flags & RX_FREE_PACKET)
464 osi_Panic("rxi_FreePacketNoLock: packet already free\n");
466 p->header.flags = RX_FREE_PACKET;
467 queue_Append(&rx_freePacketQueue, p);
470 int rxi_FreeDataBufsNoLock(p, first)
471 struct rx_packet * p;
474 struct iovec *iov, *end;
476 if (first != 1) /* MTUXXX */
477 osi_Panic("FreeDataBufs 1: first must be 1");
478 iov = &p->wirevec[1];
479 end = iov + (p->niovecs-1);
480 if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */
481 osi_Panic("FreeDataBufs 2: vec 1 must be localdata");
482 for (iov++ ; iov < end ; iov++) {
484 osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL");
485 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
493 int rxi_nBadIovecs = 0;
495 /* rxi_RestoreDataBufs
497 * Restore the correct sizes to the iovecs. Called when reusing a packet
498 * for reading off the wire.
500 void rxi_RestoreDataBufs(struct rx_packet *p)
503 struct iovec *iov = &p->wirevec[2];
505 p->wirevec[0].iov_base = (char *) (p->wirehead);
506 p->wirevec[0].iov_len = RX_HEADER_SIZE;
507 p->wirevec[1].iov_base = (char *) (p->localdata);
508 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
510 for (i=2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
511 if (!iov->iov_base) {
516 iov->iov_len = RX_CBUFFERSIZE;
520 int rxi_TrimDataBufs(p, first)
521 struct rx_packet * p;
524 extern void rxi_PacketsUnWait();
526 struct iovec *iov, *end;
530 osi_Panic("TrimDataBufs 1: first must be 1");
532 /* Skip over continuation buffers containing message data */
533 iov = &p->wirevec[2];
534 end = iov + (p->niovecs-2);
535 length = p->length - p->wirevec[1].iov_len;
536 for (; iov < end && length > 0 ; iov++) {
538 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
539 length -= iov->iov_len;
542 /* iov now points to the first empty data buffer. */
547 MUTEX_ENTER(&rx_freePktQ_lock);
549 for (; iov < end ; iov++) {
551 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
552 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
557 MUTEX_EXIT(&rx_freePktQ_lock);
563 /* Free the packet p. P is assumed not to be on any queue, i.e.
564 * remove it yourself first if you call this routine. */
565 void rxi_FreePacket(struct rx_packet *p)
567 extern void rxi_PacketsUnWait();
571 MUTEX_ENTER(&rx_freePktQ_lock);
573 rxi_FreeDataBufsNoLock(p,1);
574 rxi_FreePacketNoLock(p);
575 /* Wakeup anyone waiting for packets */
578 MUTEX_EXIT(&rx_freePktQ_lock);
583 /* rxi_AllocPacket sets up p->length so it reflects the number of
584 * bytes in the packet at this point, **not including** the header.
585 * The header is absolutely necessary, besides, this is the way the
586 * length field is usually used */
587 struct rx_packet *rxi_AllocPacketNoLock(class)
590 register struct rx_packet *p;
593 if (rxi_OverQuota(class)) {
594 rxi_NeedMorePackets = TRUE;
595 MUTEX_ENTER(&rx_stats_mutex);
597 case RX_PACKET_CLASS_RECEIVE:
598 rx_stats.receivePktAllocFailures++;
600 case RX_PACKET_CLASS_SEND:
601 rx_stats.sendPktAllocFailures++;
603 case RX_PACKET_CLASS_SPECIAL:
604 rx_stats.specialPktAllocFailures++;
606 case RX_PACKET_CLASS_RECV_CBUF:
607 rx_stats.receiveCbufPktAllocFailures++;
609 case RX_PACKET_CLASS_SEND_CBUF:
610 rx_stats.sendCbufPktAllocFailures++;
613 MUTEX_EXIT(&rx_stats_mutex);
614 return (struct rx_packet *) 0;
618 MUTEX_ENTER(&rx_stats_mutex);
619 rx_stats.packetRequests++;
620 MUTEX_EXIT(&rx_stats_mutex);
623 if (queue_IsEmpty(&rx_freePacketQueue))
624 osi_Panic("rxi_AllocPacket error");
626 if (queue_IsEmpty(&rx_freePacketQueue))
627 rxi_MorePacketsNoLock(rx_initSendWindow);
631 p = queue_First(&rx_freePacketQueue, rx_packet);
632 if (p->header.flags != RX_FREE_PACKET)
633 osi_Panic("rxi_AllocPacket: packet not free\n");
635 dpf(("Alloc %x, class %d\n", p, class));
640 /* have to do this here because rx_FlushWrite fiddles with the iovs in
641 * order to truncate outbound packets. In the near future, may need
642 * to allocate bufs from a static pool here, and/or in AllocSendPacket
644 p->wirevec[0].iov_base = (char *) (p->wirehead);
645 p->wirevec[0].iov_len = RX_HEADER_SIZE;
646 p->wirevec[1].iov_base = (char *) (p->localdata);
647 p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE;
649 p->length = RX_FIRSTBUFFERSIZE;
653 struct rx_packet *rxi_AllocPacket(class)
656 register struct rx_packet *p;
658 MUTEX_ENTER(&rx_freePktQ_lock);
659 p = rxi_AllocPacketNoLock(class);
660 MUTEX_EXIT(&rx_freePktQ_lock);
664 /* This guy comes up with as many buffers as it {takes,can get} given
665 * the MTU for this call. It also sets the packet length before
666 * returning. caution: this is often called at NETPRI
667 * Called with call locked.
669 struct rx_packet *rxi_AllocSendPacket(call, want)
670 register struct rx_call *call;
673 register struct rx_packet *p = (struct rx_packet *) 0;
675 register unsigned delta;
678 mud = call->MTU - RX_HEADER_SIZE;
679 delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
680 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
682 while (!(call->error)) {
683 MUTEX_ENTER(&rx_freePktQ_lock);
684 /* if an error occurred, or we get the packet we want, we're done */
685 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
686 MUTEX_EXIT(&rx_freePktQ_lock);
689 want = MIN(want, mud);
691 if ((unsigned) want > p->length)
692 (void) rxi_AllocDataBuf(p, (want - p->length),
693 RX_PACKET_CLASS_SEND_CBUF);
695 if ((unsigned) p->length > mud)
698 if (delta >= p->length) {
707 /* no error occurred, and we didn't get a packet, so we sleep.
708 * At this point, we assume that packets will be returned
709 * sooner or later, as packets are acknowledged, and so we
712 call->flags |= RX_CALL_WAIT_PACKETS;
713 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
714 MUTEX_EXIT(&call->lock);
715 rx_waitingForPackets = 1;
717 #ifdef RX_ENABLE_LOCKS
718 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
720 osi_rxSleep(&rx_waitingForPackets);
722 MUTEX_EXIT(&rx_freePktQ_lock);
723 MUTEX_ENTER(&call->lock);
724 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
725 call->flags &= ~RX_CALL_WAIT_PACKETS;
734 /* count the number of used FDs */
735 static int CountFDs(amax)
738 register int i, code;
742 for(i=0;i<amax;i++) {
743 code = fstat(i, &tstat);
744 if (code == 0) count++;
751 #define CountFDs(amax) amax
755 #if !defined(KERNEL) || defined(UKERNEL)
757 /* This function reads a single packet from the interface into the
758 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
759 * (host,port) of the sender are stored in the supplied variables, and
760 * the data length of the packet is stored in the packet structure.
761 * The header is decoded. */
762 int rxi_ReadPacket(socket, p, host, port)
764 register struct rx_packet *p;
768 struct sockaddr_in from;
771 register afs_int32 tlen, savelen;
773 rx_computelen(p, tlen);
774 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
776 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
777 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
778 * it once in order to avoid races. */
781 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
789 /* Extend the last iovec for padding, it's just to make sure that the
790 * read doesn't return more data than we expect, and is done to get around
791 * our problems caused by the lack of a length field in the rx header.
792 * Use the extra buffer that follows the localdata in each packet
794 savelen = p->wirevec[p->niovecs].iov_len;
795 p->wirevec[p->niovecs].iov_len += RX_EXTRABUFFERSIZE;
797 bzero((char *)&msg, sizeof(msg));
798 msg.msg_name = (char *) &from;
799 msg.msg_namelen = sizeof(struct sockaddr_in);
800 msg.msg_iov = p->wirevec;
801 msg.msg_iovlen = p->niovecs;
802 nbytes = rxi_Recvmsg(socket, &msg, 0);
804 /* restore the vec to its correct state */
805 p->wirevec[p->niovecs].iov_len = savelen;
807 p->length = (nbytes - RX_HEADER_SIZE);
808 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
810 rxi_MorePackets(rx_initSendWindow);
812 else if (nbytes < 0 && errno == EWOULDBLOCK) {
813 MUTEX_ENTER(&rx_stats_mutex);
814 rx_stats.noPacketOnRead++;
815 MUTEX_EXIT(&rx_stats_mutex);
819 MUTEX_ENTER(&rx_stats_mutex);
820 rx_stats.bogusPacketOnRead++;
821 rx_stats.bogusHost = from.sin_addr.s_addr;
822 MUTEX_EXIT(&rx_stats_mutex);
823 dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr,
824 from.sin_port,nbytes));
829 /* Extract packet header. */
830 rxi_DecodePacketHeader(p);
832 *host = from.sin_addr.s_addr;
833 *port = from.sin_port;
834 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
835 struct rx_peer *peer;
836 MUTEX_ENTER(&rx_stats_mutex);
837 rx_stats.packetsRead[p->header.type-1]++;
838 MUTEX_EXIT(&rx_stats_mutex);
840 * Try to look up this peer structure. If it doesn't exist,
841 * don't create a new one -
842 * we don't keep count of the bytes sent/received if a peer
843 * structure doesn't already exist.
845 * The peer/connection cleanup code assumes that there is 1 peer
846 * per connection. If we actually created a peer structure here
847 * and this packet was an rxdebug packet, the peer structure would
848 * never be cleaned up.
850 peer = rxi_FindPeer(*host, *port, 0, 0);
852 MUTEX_ENTER(&peer->peer_lock);
853 hadd32(peer->bytesReceived, p->length);
854 MUTEX_EXIT(&peer->peer_lock);
858 /* Free any empty packet buffers at the end of this packet */
859 rxi_TrimDataBufs(p, 1);
865 #endif /* !KERNEL || UKERNEL */
867 /* This function splits off the first packet in a jumbo packet.
868 * As of AFS 3.5, jumbograms contain more than one fixed size
869 * packet, and the RX_JUMBO_PACKET flag is set in all but the
870 * last packet header. All packets (except the last) are padded to
871 * fall on RX_CBUFFERSIZE boundaries.
872 * HACK: We store the length of the first n-1 packets in the
873 * last two pad bytes. */
875 struct rx_packet *rxi_SplitJumboPacket(p, host, port, first)
876 register struct rx_packet *p;
881 struct rx_packet *np;
882 struct rx_jumboHeader *jp;
888 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
889 * bytes in length. All but the first packet are preceded by
890 * an abbreviated four byte header. The length of the last packet
891 * is calculated from the size of the jumbogram. */
892 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
894 if ((int)p->length < length) {
895 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
898 niov = p->niovecs - 2;
900 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
903 iov = &p->wirevec[2];
904 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
906 /* Get a pointer to the abbreviated packet header */
907 jp = (struct rx_jumboHeader *)
908 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
910 /* Set up the iovecs for the next packet */
911 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
912 np->wirevec[0].iov_len = sizeof(struct rx_header);
913 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
914 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
915 np->niovecs = niov+1;
916 for (i = 2 , iov++ ; i <= niov ; i++ , iov++) {
917 np->wirevec[i] = *iov;
919 np->length = p->length - length;
920 p->length = RX_JUMBOBUFFERSIZE;
923 /* Convert the jumbo packet header to host byte order */
924 temp = ntohl(*(afs_uint32 *)jp);
925 jp->flags = (u_char)(temp >> 24);
926 jp->cksum = (u_short)(temp);
928 /* Fill in the packet header */
929 np->header = p->header;
930 np->header.serial = p->header.serial + 1;
931 np->header.seq = p->header.seq + 1;
932 np->header.flags = jp->flags;
933 np->header.spare = jp->cksum;
939 /* Send a udp datagram */
940 int osi_NetSend(socket, addr, dvec, nvecs, length, istack)
950 memset(&msg, 0, sizeof(msg));
952 msg.msg_iovlen = nvecs;
954 msg.msg_namelen = sizeof(struct sockaddr_in);
956 rxi_Sendmsg(socket, &msg, 0);
960 #elif !defined(UKERNEL)
961 /* osi_NetSend is defined in afs/afs_osinet.c
962 * message receipt is done in rxk_input or rx_put.
967 * Copy an mblock to the contiguous area pointed to by cp.
968 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
969 * but it doesn't really.
970 * Returns the number of bytes not transferred.
971 * The message is NOT changed.
973 static int cpytoc(mp, off, len, cp)
975 register int off, len;
980 for (;mp && len > 0; mp = mp->b_cont) {
981 if (mp->b_datap->db_type != M_DATA) {
984 n = MIN(len, (mp->b_wptr - mp->b_rptr));
985 bcopy((char *)mp->b_rptr, cp, n);
993 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
994 * but it doesn't really.
995 * This sucks, anyway, do it like m_cpy.... below
997 static int cpytoiovec(mp, off, len, iovs, niovs)
1000 register struct iovec *iovs;
1002 register int m,n,o,t,i;
1004 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1005 if (mp->b_datap->db_type != M_DATA) {
1008 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1014 t = iovs[i].iov_len;
1017 bcopy((char *)mp->b_rptr, iovs[i].iov_base + o, m);
1026 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1027 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1029 #if !defined(AFS_LINUX20_ENV)
1030 static int m_cpytoiovec(m, off, len, iovs, niovs)
1032 int off, len, niovs;
1033 struct iovec iovs[];
1036 unsigned int l1, l2, i, t;
1038 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1039 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1042 if (m->m_len <= off) {
1052 p1 = mtod(m, caddr_t)+off;
1053 l1 = m->m_len - off;
1055 p2 = iovs[0].iov_base;
1056 l2 = iovs[0].iov_len;
1059 t = MIN(l1, MIN(l2, (unsigned int)len));
1068 p1 = mtod(m, caddr_t);
1074 p2 = iovs[i].iov_base;
1075 l2 = iovs[i].iov_len;
1083 #endif /* AFS_SUN5_ENV */
1085 #if !defined(AFS_LINUX20_ENV)
1086 int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1093 struct rx_packet *phandle;
1094 int hdr_len, data_len;
1098 code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs);
1104 #endif /*KERNEL && !UKERNEL*/
1107 /* send a response to a debug packet */
1109 struct rx_packet *rxi_ReceiveDebugPacket(ap, asocket, ahost, aport, istack)
1113 register struct rx_packet *ap;
1116 struct rx_debugIn tin;
1118 struct rx_serverQueueEntry *np, *nqe;
1120 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1121 /* all done with packet, now set length to the truth, so we can
1122 * reuse this packet */
1123 rx_computelen(ap, ap->length);
1125 tin.type = ntohl(tin.type);
1126 tin.index = ntohl(tin.index);
1128 case RX_DEBUGI_GETSTATS: {
1129 struct rx_debugStats tstat;
1131 /* get basic stats */
1132 bzero ((char *)&tstat, sizeof(tstat)); /* make sure spares are zero */
1133 tstat.version = RX_DEBUGI_VERSION;
1134 #ifndef RX_ENABLE_LOCKS
1135 tstat.waitingForPackets = rx_waitingForPackets;
1137 tstat.nFreePackets = htonl(rx_nFreePackets);
1138 tstat.callsExecuted = htonl(rxi_nCalls);
1139 tstat.packetReclaims = htonl(rx_packetReclaims);
1140 tstat.usedFDs = CountFDs(64);
1141 tstat.nWaiting = htonl(rx_nWaiting);
1142 queue_Count( &rx_idleServerQueue, np, nqe,
1143 rx_serverQueueEntry, tstat.idleThreads);
1144 tstat.idleThreads = htonl(tstat.idleThreads);
1145 tl = sizeof(struct rx_debugStats) - ap->length;
1147 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1150 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat);
1151 ap->length = sizeof(struct rx_debugStats);
1152 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1153 rx_computelen(ap, ap->length);
1158 case RX_DEBUGI_GETALLCONN:
1159 case RX_DEBUGI_GETCONN: {
1161 register struct rx_connection *tc;
1162 struct rx_call *tcall;
1163 struct rx_debugConn tconn;
1164 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1167 tl = sizeof(struct rx_debugConn) - ap->length;
1169 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1173 bzero ((char *)&tconn, sizeof(tconn)); /* make sure spares are zero */
1174 /* get N'th (maybe) "interesting" connection info */
1175 for(i=0;i<rx_hashTableSize;i++) {
1176 #if !defined(KERNEL)
1177 /* the time complexity of the algorithm used here
1178 * exponentially increses with the number of connections.
1180 #ifdef AFS_PTHREAD_ENV
1183 (void) IOMGR_Poll();
1186 MUTEX_ENTER(&rx_connHashTable_lock);
1187 /* We might be slightly out of step since we are not
1188 * locking each call, but this is only debugging output.
1190 for(tc=rx_connHashTable[i]; tc; tc=tc->next) {
1191 if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) {
1192 tconn.host = tc->peer->host;
1193 tconn.port = tc->peer->port;
1194 tconn.cid = htonl(tc->cid);
1195 tconn.epoch = htonl(tc->epoch);
1196 tconn.serial = htonl(tc->serial);
1197 for(j=0;j<RX_MAXCALLS;j++) {
1198 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1199 if ((tcall=tc->call[j])) {
1200 tconn.callState[j] = tcall->state;
1201 tconn.callMode[j] = tcall->mode;
1202 tconn.callFlags[j] = tcall->flags;
1203 if (queue_IsNotEmpty(&tcall->rq))
1204 tconn.callOther[j] |= RX_OTHER_IN;
1205 if (queue_IsNotEmpty(&tcall->tq))
1206 tconn.callOther[j] |= RX_OTHER_OUT;
1208 else tconn.callState[j] = RX_STATE_NOTINIT;
1211 tconn.natMTU = htonl(tc->peer->natMTU);
1212 tconn.error = htonl(tc->error);
1213 tconn.flags = tc->flags;
1214 tconn.type = tc->type;
1215 tconn.securityIndex = tc->securityIndex;
1216 if (tc->securityObject) {
1217 RXS_GetStats (tc->securityObject, tc,
1219 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1220 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1223 DOHTONL(packetsReceived);
1224 DOHTONL(packetsSent);
1225 DOHTONL(bytesReceived);
1228 i<sizeof(tconn.secStats.spares)/sizeof(short);
1232 i<sizeof(tconn.secStats.sparel)/sizeof(afs_int32);
1237 MUTEX_EXIT(&rx_connHashTable_lock);
1238 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char*)&tconn);
1240 ap->length = sizeof(struct rx_debugConn);
1241 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1246 MUTEX_EXIT(&rx_connHashTable_lock);
1248 /* if we make it here, there are no interesting packets */
1249 tconn.cid = htonl(0xffffffff); /* means end */
1250 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn);
1252 ap->length = sizeof(struct rx_debugConn);
1253 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1259 * Pass back all the peer structures we have available
1262 case RX_DEBUGI_GETPEER: {
1264 register struct rx_peer *tp;
1265 struct rx_debugPeer tpeer;
1268 tl = sizeof(struct rx_debugPeer) - ap->length;
1270 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1274 bzero ((char *)&tpeer, sizeof(tpeer));
1275 for(i=0;i<rx_hashTableSize;i++) {
1276 #if !defined(KERNEL)
1277 /* the time complexity of the algorithm used here
1278 * exponentially increses with the number of peers.
1280 * Yielding after processing each hash table entry
1281 * and dropping rx_peerHashTable_lock.
1282 * also increases the risk that we will miss a new
1283 * entry - but we are willing to live with this
1284 * limitation since this is meant for debugging only
1286 #ifdef AFS_PTHREAD_ENV
1289 (void) IOMGR_Poll();
1292 MUTEX_ENTER(&rx_peerHashTable_lock);
1293 for(tp=rx_peerHashTable[i]; tp; tp=tp->next) {
1294 if (tin.index-- <= 0) {
1295 tpeer.host = tp->host;
1296 tpeer.port = tp->port;
1297 tpeer.ifMTU = htons(tp->ifMTU);
1298 tpeer.idleWhen = htonl(tp->idleWhen);
1299 tpeer.refCount = htons(tp->refCount);
1300 tpeer.burstSize = tp->burstSize;
1301 tpeer.burst = tp->burst;
1302 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1303 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1304 tpeer.rtt = htonl(tp->rtt);
1305 tpeer.rtt_dev = htonl(tp->rtt_dev);
1306 tpeer.timeout.sec = htonl(tp->timeout.sec);
1307 tpeer.timeout.usec = htonl(tp->timeout.usec);
1308 tpeer.nSent = htonl(tp->nSent);
1309 tpeer.reSends = htonl(tp->reSends);
1310 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1311 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1312 tpeer.rateFlag = htonl(tp->rateFlag);
1313 tpeer.natMTU = htons(tp->natMTU);
1314 tpeer.maxMTU = htons(tp->maxMTU);
1315 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1316 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1317 tpeer.MTU = htons(tp->MTU);
1318 tpeer.cwind = htons(tp->cwind);
1319 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1320 tpeer.congestSeq = htons(tp->congestSeq);
1321 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1322 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1323 tpeer.bytesReceived.high = htonl(tp->bytesReceived.high);
1324 tpeer.bytesReceived.low = htonl(tp->bytesReceived.low);
1326 MUTEX_EXIT(&rx_peerHashTable_lock);
1327 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char*)&tpeer);
1329 ap->length = sizeof(struct rx_debugPeer);
1330 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1335 MUTEX_EXIT(&rx_peerHashTable_lock);
1337 /* if we make it here, there are no interesting packets */
1338 tpeer.host = htonl(0xffffffff); /* means end */
1339 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer);
1341 ap->length = sizeof(struct rx_debugPeer);
1342 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1347 case RX_DEBUGI_RXSTATS: {
1351 tl = sizeof(rx_stats) - ap->length;
1353 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1357 /* Since its all int32s convert to network order with a loop. */
1358 MUTEX_ENTER(&rx_stats_mutex);
1359 s = (afs_int32 *)&rx_stats;
1360 for (i=0; i<sizeof(rx_stats)/sizeof(afs_int32); i++,s++)
1361 rx_PutInt32(ap, i*sizeof(afs_int32), htonl(*s));
1364 ap->length = sizeof(rx_stats);
1365 MUTEX_EXIT(&rx_stats_mutex);
1366 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1372 /* error response packet */
1373 tin.type = htonl(RX_DEBUGI_BADTYPE);
1374 tin.index = tin.type;
1375 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1377 ap->length = sizeof(struct rx_debugIn);
1378 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1385 struct rx_packet *rxi_ReceiveVersionPacket(ap, asocket, ahost, aport, istack)
1389 register struct rx_packet *ap;
1393 rx_packetwrite(ap, 0, 65, cml_version_number+4);
1396 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1402 /* send a debug packet back to the sender */
1403 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
1404 afs_int32 ahost, short aport, afs_int32 istack)
1406 struct sockaddr_in taddr;
1412 int waslocked = ISAFS_GLOCK();
1415 taddr.sin_family = AF_INET;
1416 taddr.sin_port = aport;
1417 taddr.sin_addr.s_addr = ahost;
1420 /* We need to trim the niovecs. */
1421 nbytes = apacket->length;
1422 for (i=1; i < apacket->niovecs; i++) {
1423 if (nbytes <= apacket->wirevec[i].iov_len) {
1424 savelen = apacket->wirevec[i].iov_len;
1425 saven = apacket->niovecs;
1426 apacket->wirevec[i].iov_len = nbytes;
1427 apacket->niovecs = i+1; /* so condition fails because i == niovecs */
1429 else nbytes -= apacket->wirevec[i].iov_len;
1433 if (waslocked) AFS_GUNLOCK();
1435 /* debug packets are not reliably delivered, hence the cast below. */
1436 (void) osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
1437 apacket->length+RX_HEADER_SIZE, istack);
1439 if (waslocked) AFS_GLOCK();
1442 if (saven) { /* means we truncated the packet above. */
1443 apacket->wirevec[i-1].iov_len = savelen;
1444 apacket->niovecs = saven;
1449 /* Send the packet to appropriate destination for the specified
1450 * connection. The header is first encoded and placed in the packet.
1452 void rxi_SendPacket(struct rx_connection * conn, struct rx_packet *p,
1458 struct sockaddr_in addr;
1459 register struct rx_peer *peer = conn->peer;
1462 char deliveryType = 'S';
1464 /* The address we're sending the packet to */
1465 addr.sin_family = AF_INET;
1466 addr.sin_port = peer->port;
1467 addr.sin_addr.s_addr = peer->host;
1469 /* This stuff should be revamped, I think, so that most, if not
1470 * all, of the header stuff is always added here. We could
1471 * probably do away with the encode/decode routines. XXXXX */
1473 /* Stamp each packet with a unique serial number. The serial
1474 * number is maintained on a connection basis because some types
1475 * of security may be based on the serial number of the packet,
1476 * and security is handled on a per authenticated-connection
1478 /* Pre-increment, to guarantee no zero serial number; a zero
1479 * serial number means the packet was never sent. */
1480 MUTEX_ENTER(&conn->conn_data_lock);
1481 p->header.serial = ++conn->serial;
1482 MUTEX_EXIT(&conn->conn_data_lock);
1483 /* This is so we can adjust retransmit time-outs better in the face of
1484 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1486 if (p->firstSerial == 0) {
1487 p->firstSerial = p->header.serial;
1491 /* If an output tracer function is defined, call it with the packet and
1492 * network address. Note this function may modify its arguments. */
1493 if (rx_almostSent) {
1494 int drop = (*rx_almostSent) (p, &addr);
1495 /* drop packet if return value is non-zero? */
1496 if (drop) deliveryType = 'D'; /* Drop the packet */
1500 /* Get network byte order header */
1501 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1502 * touch ALL the fields */
1504 /* Send the packet out on the same socket that related packets are being
1506 socket = (conn->type == RX_CLIENT_CONNECTION
1507 ? rx_socket : conn->service->socket);
1510 /* Possibly drop this packet, for testing purposes */
1511 if ((deliveryType == 'D') ||
1512 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1513 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1514 deliveryType = 'D'; /* Drop the packet */
1517 deliveryType = 'S'; /* Send the packet */
1518 #endif /* RXDEBUG */
1520 /* Loop until the packet is sent. We'd prefer just to use a
1521 * blocking socket, but unfortunately the interface doesn't
1522 * allow us to have the socket block in send mode, and not
1523 * block in receive mode */
1526 waslocked = ISAFS_GLOCK();
1527 if (waslocked) AFS_GUNLOCK();
1529 if (osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
1530 p->length+RX_HEADER_SIZE, istack)){
1531 /* send failed, so let's hurry up the resend, eh? */
1532 MUTEX_ENTER(&rx_stats_mutex);
1533 rx_stats.netSendFailures++;
1534 MUTEX_EXIT(&rx_stats_mutex);
1535 p->retryTime = p->timeSent; /* resend it very soon */
1536 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1539 if (waslocked) AFS_GLOCK();
1544 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1545 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1546 peer->host, peer->port, p->header.serial, p->header.epoch,
1547 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1548 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1550 MUTEX_ENTER(&rx_stats_mutex);
1551 rx_stats.packetsSent[p->header.type-1]++;
1552 MUTEX_EXIT(&rx_stats_mutex);
1553 MUTEX_ENTER(&peer->peer_lock);
1554 hadd32(peer->bytesSent, p->length);
1555 MUTEX_EXIT(&peer->peer_lock);
1558 /* Send a list of packets to appropriate destination for the specified
1559 * connection. The headers are first encoded and placed in the packets.
1561 void rxi_SendPacketList(struct rx_connection * conn,
1562 struct rx_packet **list,
1566 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1569 struct sockaddr_in addr;
1570 register struct rx_peer *peer = conn->peer;
1572 struct rx_packet *p = NULL;
1573 struct iovec wirevec[RX_MAXIOVECS];
1577 struct rx_jumboHeader *jp;
1579 char deliveryType = 'S';
1581 /* The address we're sending the packet to */
1582 addr.sin_family = AF_INET;
1583 addr.sin_port = peer->port;
1584 addr.sin_addr.s_addr = peer->host;
1586 if (len+1 > RX_MAXIOVECS) {
1587 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
1591 * Stamp the packets in this jumbogram with consecutive serial numbers
1593 MUTEX_ENTER(&conn->conn_data_lock);
1594 serial = conn->serial;
1595 conn->serial += len;
1596 MUTEX_EXIT(&conn->conn_data_lock);
1599 /* This stuff should be revamped, I think, so that most, if not
1600 * all, of the header stuff is always added here. We could
1601 * probably do away with the encode/decode routines. XXXXX */
1604 length = RX_HEADER_SIZE;
1605 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
1606 wirevec[0].iov_len = RX_HEADER_SIZE;
1607 for (i = 0 ; i < len ; i++) {
1610 /* The whole 3.5 jumbogram scheme relies on packets fitting
1611 * in a single packet buffer. */
1612 if (p->niovecs > 2) {
1613 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
1616 /* Set the RX_JUMBO_PACKET flags in all but the last packets
1619 if (p->length != RX_JUMBOBUFFERSIZE) {
1620 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
1622 p->header.flags |= RX_JUMBO_PACKET;
1623 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1624 wirevec[i+1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1626 wirevec[i+1].iov_len = p->length;
1627 length += p->length;
1629 wirevec[i+1].iov_base = (char *)(&p->localdata[0]);
1631 /* Convert jumbo packet header to network byte order */
1632 temp = (afs_uint32)(p->header.flags) << 24;
1633 temp |= (afs_uint32)(p->header.spare);
1634 *(afs_uint32 *)jp = htonl(temp);
1636 jp = (struct rx_jumboHeader *)
1637 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
1639 /* Stamp each packet with a unique serial number. The serial
1640 * number is maintained on a connection basis because some types
1641 * of security may be based on the serial number of the packet,
1642 * and security is handled on a per authenticated-connection
1644 /* Pre-increment, to guarantee no zero serial number; a zero
1645 * serial number means the packet was never sent. */
1646 p->header.serial = ++serial;
1647 /* This is so we can adjust retransmit time-outs better in the face of
1648 * rapidly changing round-trip times. RTO estimation is not a la Karn.
1650 if (p->firstSerial == 0) {
1651 p->firstSerial = p->header.serial;
1655 /* If an output tracer function is defined, call it with the packet and
1656 * network address. Note this function may modify its arguments. */
1657 if (rx_almostSent) {
1658 int drop = (*rx_almostSent) (p, &addr);
1659 /* drop packet if return value is non-zero? */
1660 if (drop) deliveryType = 'D'; /* Drop the packet */
1664 /* Get network byte order header */
1665 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
1666 * touch ALL the fields */
1669 /* Send the packet out on the same socket that related packets are being
1671 socket = (conn->type == RX_CLIENT_CONNECTION
1672 ? rx_socket : conn->service->socket);
1675 /* Possibly drop this packet, for testing purposes */
1676 if ((deliveryType == 'D') ||
1677 ((rx_intentionallyDroppedPacketsPer100 > 0) &&
1678 (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
1679 deliveryType = 'D'; /* Drop the packet */
1682 deliveryType = 'S'; /* Send the packet */
1683 #endif /* RXDEBUG */
1685 /* Loop until the packet is sent. We'd prefer just to use a
1686 * blocking socket, but unfortunately the interface doesn't
1687 * allow us to have the socket block in send mode, and not
1688 * block in receive mode */
1690 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1691 waslocked = ISAFS_GLOCK();
1692 if (!istack && waslocked) AFS_GUNLOCK();
1694 if (osi_NetSend(socket, &addr, &wirevec[0], len+1, length, istack)){
1695 /* send failed, so let's hurry up the resend, eh? */
1696 MUTEX_ENTER(&rx_stats_mutex);
1697 rx_stats.netSendFailures++;
1698 MUTEX_EXIT(&rx_stats_mutex);
1699 for (i = 0 ; i < len ; i++) {
1701 p->retryTime = p->timeSent; /* resend it very soon */
1702 clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8));
1705 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
1706 if (!istack && waslocked) AFS_GLOCK();
1711 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d len %d",
1712 deliveryType, p->header.serial, rx_packetTypes[p->header.type-1],
1713 peer->host, peer->port, p->header.serial, p->header.epoch,
1714 p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1715 p, p->retryTime.sec, p->retryTime.usec/1000, p->length));
1717 MUTEX_ENTER(&rx_stats_mutex);
1718 rx_stats.packetsSent[p->header.type-1]++;
1719 MUTEX_EXIT(&rx_stats_mutex);
1720 MUTEX_ENTER(&peer->peer_lock);
1721 hadd32(peer->bytesSent, p->length);
1722 MUTEX_EXIT(&peer->peer_lock);
1726 /* Send a "special" packet to the peer connection. If call is
1727 * specified, then the packet is directed to a specific call channel
1728 * associated with the connection, otherwise it is directed to the
1729 * connection only. Uses optionalPacket if it is supplied, rather than
1730 * allocating a new packet buffer. Nbytes is the length of the data
1731 * portion of the packet. If data is non-null, nbytes of data are
1732 * copied into the packet. Type is the type of the packet, as defined
1733 * in rx.h. Bug: there's a lot of duplication between this and other
1734 * routines. This needs to be cleaned up. */
1736 rxi_SendSpecial(call, conn, optionalPacket, type, data, nbytes, istack)
1737 register struct rx_call *call;
1738 register struct rx_connection *conn;
1739 struct rx_packet *optionalPacket;
1744 /* Some of the following stuff should be common code for all
1745 * packet sends (it's repeated elsewhere) */
1746 register struct rx_packet *p;
1748 int savelen = 0, saven = 0;
1749 int channel, callNumber;
1751 channel = call->channel;
1752 callNumber = *call->callNumber;
1753 /* BUSY packets refer to the next call on this connection */
1754 if (type == RX_PACKET_TYPE_BUSY) {
1763 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1764 if (!p) osi_Panic("rxi_SendSpecial failure");
1771 p->header.serviceId = conn->serviceId;
1772 p->header.securityIndex = conn->securityIndex;
1773 p->header.cid = (conn->cid | channel);
1774 p->header.callNumber = callNumber;
1776 p->header.epoch = conn->epoch;
1777 p->header.type = type;
1778 p->header.flags = 0;
1779 if (conn->type == RX_CLIENT_CONNECTION)
1780 p->header.flags |= RX_CLIENT_INITIATED;
1782 rx_packetwrite(p, 0, nbytes, data);
1784 for (i=1; i < p->niovecs; i++) {
1785 if (nbytes <= p->wirevec[i].iov_len) {
1786 savelen = p->wirevec[i].iov_len;
1788 p->wirevec[i].iov_len = nbytes;
1789 p->niovecs = i+1; /* so condition fails because i == niovecs */
1791 else nbytes -= p->wirevec[i].iov_len;
1794 if (call) rxi_Send(call, p, istack);
1795 else rxi_SendPacket(conn, p, istack);
1796 if (saven) { /* means we truncated the packet above. We probably don't */
1797 /* really need to do this, but it seems safer this way, given that */
1798 /* sneaky optionalPacket... */
1799 p->wirevec[i-1].iov_len = savelen;
1802 if (!optionalPacket) rxi_FreePacket(p);
1803 return optionalPacket;
1807 /* Encode the packet's header (from the struct header in the packet to
1808 * the net byte order representation in the wire representation of the
1809 * packet, which is what is actually sent out on the wire) */
1810 void rxi_EncodePacketHeader(p)
1811 register struct rx_packet *p;
1813 register afs_uint32 *buf = (afs_uint32 *)(p->wirevec[0].iov_base); /* MTUXXX */
1815 bzero((char *)buf, RX_HEADER_SIZE);
1816 *buf++ = htonl(p->header.epoch);
1817 *buf++ = htonl(p->header.cid);
1818 *buf++ = htonl(p->header.callNumber);
1819 *buf++ = htonl(p->header.seq);
1820 *buf++ = htonl(p->header.serial);
1821 *buf++ = htonl( (((afs_uint32)p->header.type)<<24)
1822 | (((afs_uint32)p->header.flags)<<16)
1823 | (p->header.userStatus<<8) | p->header.securityIndex);
1824 /* Note: top 16 bits of this next word were reserved */
1825 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId&0xffff));
1828 /* Decode the packet's header (from net byte order to a struct header) */
1829 void rxi_DecodePacketHeader(p)
1830 register struct rx_packet *p;
1832 register afs_uint32 *buf = (afs_uint32*)(p->wirevec[0].iov_base); /* MTUXXX */
1835 p->header.epoch = ntohl(*buf++);
1836 p->header.cid = ntohl(*buf++);
1837 p->header.callNumber = ntohl(*buf++);
1838 p->header.seq = ntohl(*buf++);
1839 p->header.serial = ntohl(*buf++);
1840 temp = ntohl(*buf++);
1841 /* C will truncate byte fields to bytes for me */
1842 p->header.type = temp>>24;
1843 p->header.flags = temp>>16;
1844 p->header.userStatus = temp>>8;
1845 p->header.securityIndex = temp>>0;
1846 temp = ntohl(*buf++);
1847 p->header.serviceId = (temp&0xffff);
1848 p->header.spare = temp>>16;
1849 /* Note: top 16 bits of this last word are the security checksum */
1852 void rxi_PrepareSendPacket(call, p, last)
1853 register struct rx_call *call;
1854 register struct rx_packet *p;
1857 register struct rx_connection *conn = call->conn;
1859 ssize_t len; /* len must be a signed type; it can go negative */
1862 p->header.cid = (conn->cid | call->channel);
1863 p->header.serviceId = conn->serviceId;
1864 p->header.securityIndex = conn->securityIndex;
1865 p->header.callNumber = *call->callNumber;
1866 p->header.seq = call->tnext++;
1867 p->header.epoch = conn->epoch;
1868 p->header.type = RX_PACKET_TYPE_DATA;
1869 p->header.flags = 0;
1870 p->header.spare = 0;
1871 if (conn->type == RX_CLIENT_CONNECTION)
1872 p->header.flags |= RX_CLIENT_INITIATED;
1875 p->header.flags |= RX_LAST_PACKET;
1877 clock_Zero(&p->retryTime); /* Never yet transmitted */
1878 clock_Zero(&p->firstSent); /* Never yet transmitted */
1879 p->header.serial = 0; /* Another way of saying never transmitted... */
1882 /* Now that we're sure this is the last data on the call, make sure
1883 * that the "length" and the sum of the iov_lens matches. */
1884 len = p->length + call->conn->securityHeaderSize;
1886 for (i=1; i < p->niovecs && len > 0; i++) {
1887 len -= p->wirevec[i].iov_len;
1890 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
1893 /* Free any extra elements in the wirevec */
1894 for (j = MAX(2,i) ; j < p->niovecs ; j++) {
1895 rxi_freeCBuf(RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p));
1898 p->wirevec[i-1].iov_len += len;
1900 RXS_PreparePacket(conn->securityObject, call, p);
1903 /* Given an interface MTU size, calculate an adjusted MTU size that
1904 * will make efficient use of the RX buffers when the peer is sending
1905 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
1906 int rxi_AdjustIfMTU(int mtu)
1911 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1912 if (mtu <= adjMTU) {
1919 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
1920 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
1923 /* Given an interface MTU size, and the peer's advertised max receive
1924 * size, calculate an adjisted maxMTU size that makes efficient use
1925 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
1926 int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
1928 int maxMTU = mtu * rxi_nSendFrags;
1929 maxMTU = MIN(maxMTU, peerMaxMTU);
1930 return rxi_AdjustIfMTU(maxMTU);
1933 /* Given a packet size, figure out how many datagram packet will fit.
1934 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
1935 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
1936 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
1937 int rxi_AdjustDgramPackets(int frags, int mtu)
1940 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
1943 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
1944 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
1945 /* subtract the size of the first and last packets */
1946 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
1950 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));