2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
24 #include "rx/rx_kcommon.h"
25 #include "rx/rx_clock.h"
26 #include "rx/rx_queue.h"
27 #include "rx/rx_packet.h"
28 #else /* defined(UKERNEL) */
29 #ifdef RX_KERNEL_TRACE
30 #include "../rx/rx_kcommon.h"
33 #ifndef AFS_LINUX20_ENV
36 #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV)
37 #include "afs/sysincludes.h"
39 #if defined(AFS_OBSD_ENV)
43 #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
44 #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
45 #include "sys/mount.h" /* it gets pulled in by something later anyway */
49 #include "netinet/in.h"
50 #include "afs/afs_osi.h"
51 #include "rx_kmutex.h"
52 #include "rx/rx_clock.h"
53 #include "rx/rx_queue.h"
55 #include <sys/sysmacros.h>
57 #include "rx/rx_packet.h"
58 #endif /* defined(UKERNEL) */
59 #include "rx/rx_globals.h"
61 #include "sys/types.h"
64 #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
68 #define EWOULDBLOCK WSAEWOULDBLOCK
71 #include <sys/socket.h>
72 #include <netinet/in.h>
73 #endif /* AFS_NT40_ENV */
75 #include "rx_xmit_nt.h"
78 #include <sys/socket.h>
79 #include <netinet/in.h>
85 #include <sys/sysmacros.h>
87 #include "rx_packet.h"
88 #include "rx_globals.h"
104 /* rxdb_fileID is used to identify the lock location, along with line#. */
105 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
106 #endif /* RX_LOCKS_DB */
107 struct rx_packet *rx_mallocedP = 0;
109 extern char cml_version_number[];
110 extern int (*rx_almostSent) ();
112 static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q);
114 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
115 afs_int32 ahost, short aport,
118 static int rxi_FreeDataBufsToQueue(struct rx_packet *p,
120 struct rx_queue * q);
122 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global);
125 /* some rules about packets:
126 * 1. When a packet is allocated, the final iov_buf contains room for
127 * a security trailer, but iov_len masks that fact. If the security
128 * package wants to add the trailer, it may do so, and then extend
129 * iov_len appropriately. For this reason, packet's niovecs and
130 * iov_len fields should be accurate before calling PreparePacket.
134 * all packet buffers (iov_base) are integral multiples of
136 * offset is an integral multiple of the word size.
139 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
143 for (l = 0, i = 1; i < packet->niovecs; i++) {
144 if (l + packet->wirevec[i].iov_len > offset) {
146 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
149 l += packet->wirevec[i].iov_len;
156 * all packet buffers (iov_base) are integral multiples of the word size.
157 * offset is an integral multiple of the word size.
160 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
164 for (l = 0, i = 1; i < packet->niovecs; i++) {
165 if (l + packet->wirevec[i].iov_len > offset) {
166 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
167 (offset - l))) = data;
170 l += packet->wirevec[i].iov_len;
177 * all packet buffers (iov_base) are integral multiples of the
179 * offset is an integral multiple of the word size.
181 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
184 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
187 unsigned int i, j, l, r;
188 for (l = 0, i = 1; i < packet->niovecs; i++) {
189 if (l + packet->wirevec[i].iov_len > offset) {
192 l += packet->wirevec[i].iov_len;
195 /* i is the iovec which contains the first little bit of data in which we
196 * are interested. l is the total length of everything prior to this iovec.
197 * j is the number of bytes we can safely copy out of this iovec.
198 * offset only applies to the first iovec.
201 while ((resid > 0) && (i < packet->niovecs)) {
202 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
203 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
206 l += packet->wirevec[i].iov_len;
211 return (resid ? (r - resid) : r);
216 * all packet buffers (iov_base) are integral multiples of the
218 * offset is an integral multiple of the word size.
221 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
226 for (l = 0, i = 1; i < packet->niovecs; i++) {
227 if (l + packet->wirevec[i].iov_len > offset) {
230 l += packet->wirevec[i].iov_len;
233 /* i is the iovec which contains the first little bit of data in which we
234 * are interested. l is the total length of everything prior to this iovec.
235 * j is the number of bytes we can safely copy out of this iovec.
236 * offset only applies to the first iovec.
239 while ((resid > 0) && (i < RX_MAXWVECS)) {
240 if (i >= packet->niovecs)
241 if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
244 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
245 j = MIN(resid, packet->wirevec[i].iov_len - (offset - l));
249 l += packet->wirevec[i].iov_len;
254 return (resid ? (r - resid) : r);
258 rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q)
260 register struct rx_packet *p, *np;
262 num_pkts = AllocPacketBufs(class, num_pkts, q);
264 for (queue_Scan(q, p, np, rx_packet)) {
265 RX_PACKET_IOV_FULLINIT(p);
271 #ifdef RX_ENABLE_TSFPQ
273 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
275 register struct rx_packet *c;
276 register struct rx_ts_info_t * rx_ts_info;
280 RX_TS_INFO_GET(rx_ts_info);
282 transfer = num_pkts - rx_ts_info->_FPQ.len;
285 MUTEX_ENTER(&rx_freePktQ_lock);
287 if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) {
288 transfer += rx_TSFPQGlobSize;
289 } else if (transfer <= rx_nFreePackets) {
290 transfer = rx_nFreePackets;
292 /* alloc enough for us, plus a few globs for other threads */
293 alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets;
294 rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow));
295 transfer += rx_TSFPQGlobSize;
298 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
300 MUTEX_EXIT(&rx_freePktQ_lock);
304 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
308 #else /* RX_ENABLE_TSFPQ */
310 AllocPacketBufs(int class, int num_pkts, struct rx_queue * q)
318 MUTEX_ENTER(&rx_freePktQ_lock);
321 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
322 num_pkts--, overq++);
325 rxi_NeedMorePackets = TRUE;
326 MUTEX_ENTER(&rx_stats_mutex);
328 case RX_PACKET_CLASS_RECEIVE:
329 rx_stats.receivePktAllocFailures++;
331 case RX_PACKET_CLASS_SEND:
332 rx_stats.sendPktAllocFailures++;
334 case RX_PACKET_CLASS_SPECIAL:
335 rx_stats.specialPktAllocFailures++;
337 case RX_PACKET_CLASS_RECV_CBUF:
338 rx_stats.receiveCbufPktAllocFailures++;
340 case RX_PACKET_CLASS_SEND_CBUF:
341 rx_stats.sendCbufPktAllocFailures++;
344 MUTEX_EXIT(&rx_stats_mutex);
347 if (rx_nFreePackets < num_pkts)
348 num_pkts = rx_nFreePackets;
351 rxi_NeedMorePackets = TRUE;
355 if (rx_nFreePackets < num_pkts) {
356 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow));
360 for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet);
362 i++, c=queue_Next(c, rx_packet)) {
366 queue_SplitBeforeAppend(&rx_freePacketQueue,q,c);
368 rx_nFreePackets -= num_pkts;
373 MUTEX_EXIT(&rx_freePktQ_lock);
378 #endif /* RX_ENABLE_TSFPQ */
381 * Free a packet currently used as a continuation buffer
383 #ifdef RX_ENABLE_TSFPQ
384 /* num_pkts=0 means queue length is unknown */
386 rxi_FreePackets(int num_pkts, struct rx_queue * q)
388 register struct rx_ts_info_t * rx_ts_info;
389 register struct rx_packet *c, *nc;
392 osi_Assert(num_pkts >= 0);
393 RX_TS_INFO_GET(rx_ts_info);
396 for (queue_Scan(q, c, nc, rx_packet), num_pkts++) {
397 rxi_FreeDataBufsTSFPQ(c, 2, 0);
400 for (queue_Scan(q, c, nc, rx_packet)) {
401 rxi_FreeDataBufsTSFPQ(c, 2, 0);
406 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
409 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
411 MUTEX_ENTER(&rx_freePktQ_lock);
413 RX_TS_FPQ_LTOG(rx_ts_info);
415 /* Wakeup anyone waiting for packets */
418 MUTEX_EXIT(&rx_freePktQ_lock);
424 #else /* RX_ENABLE_TSFPQ */
425 /* num_pkts=0 means queue length is unknown */
427 rxi_FreePackets(int num_pkts, struct rx_queue *q)
430 register struct rx_packet *p, *np;
434 osi_Assert(num_pkts >= 0);
438 for (queue_Scan(q, p, np, rx_packet), num_pkts++) {
439 if (p->niovecs > 2) {
440 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
447 for (queue_Scan(q, p, np, rx_packet)) {
448 if (p->niovecs > 2) {
449 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
456 queue_SpliceAppend(q, &cbs);
462 MUTEX_ENTER(&rx_freePktQ_lock);
464 queue_SpliceAppend(&rx_freePacketQueue, q);
465 rx_nFreePackets += qlen;
467 /* Wakeup anyone waiting for packets */
470 MUTEX_EXIT(&rx_freePktQ_lock);
475 #endif /* RX_ENABLE_TSFPQ */
477 /* this one is kind of awful.
478 * In rxkad, the packet has been all shortened, and everything, ready for
479 * sending. All of a sudden, we discover we need some of that space back.
480 * This isn't terribly general, because it knows that the packets are only
481 * rounded up to the EBS (userdata + security header).
484 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
488 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
489 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
490 p->wirevec[i].iov_len += nb;
494 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
495 p->wirevec[i].iov_len += nb;
503 /* get sufficient space to store nb bytes of data (or more), and hook
504 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
505 * returns the number of bytes >0 which it failed to come up with.
506 * Don't need to worry about locking on packet, since only
507 * one thread can manipulate one at a time. Locking on continution
508 * packets is handled by AllocPacketBufs */
509 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
511 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
515 register struct rx_packet *cb, *ncb;
517 /* compute the number of cbuf's we need */
518 nv = nb / RX_CBUFFERSIZE;
519 if ((nv * RX_CBUFFERSIZE) < nb)
521 if ((nv + p->niovecs) > RX_MAXWVECS)
522 nv = RX_MAXWVECS - p->niovecs;
526 /* allocate buffers */
528 nv = AllocPacketBufs(class, nv, &q);
530 /* setup packet iovs */
531 for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) {
533 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
534 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
537 nb -= (nv * RX_CBUFFERSIZE);
538 p->length += (nv * RX_CBUFFERSIZE);
544 /* Add more packet buffers */
545 #ifdef RX_ENABLE_TSFPQ
547 rxi_MorePackets(int apackets)
549 struct rx_packet *p, *e;
550 register struct rx_ts_info_t * rx_ts_info;
554 getme = apackets * sizeof(struct rx_packet);
555 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
557 PIN(p, getme); /* XXXXX */
558 memset((char *)p, 0, getme);
559 RX_TS_INFO_GET(rx_ts_info);
561 for (e = p + apackets; p < e; p++) {
562 RX_PACKET_IOV_INIT(p);
565 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
567 rx_ts_info->_FPQ.delta += apackets;
569 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
571 MUTEX_ENTER(&rx_freePktQ_lock);
573 RX_TS_FPQ_LTOG(rx_ts_info);
574 rxi_NeedMorePackets = FALSE;
577 MUTEX_EXIT(&rx_freePktQ_lock);
581 #else /* RX_ENABLE_TSFPQ */
583 rxi_MorePackets(int apackets)
585 struct rx_packet *p, *e;
589 getme = apackets * sizeof(struct rx_packet);
590 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
592 PIN(p, getme); /* XXXXX */
593 memset((char *)p, 0, getme);
595 MUTEX_ENTER(&rx_freePktQ_lock);
597 for (e = p + apackets; p < e; p++) {
598 RX_PACKET_IOV_INIT(p);
599 p->flags |= RX_PKTFLAG_FREE;
602 queue_Append(&rx_freePacketQueue, p);
604 rx_nFreePackets += apackets;
605 rxi_NeedMorePackets = FALSE;
608 MUTEX_EXIT(&rx_freePktQ_lock);
611 #endif /* RX_ENABLE_TSFPQ */
613 #ifdef RX_ENABLE_TSFPQ
615 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
617 struct rx_packet *p, *e;
618 register struct rx_ts_info_t * rx_ts_info;
622 getme = apackets * sizeof(struct rx_packet);
623 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
625 PIN(p, getme); /* XXXXX */
626 memset((char *)p, 0, getme);
627 RX_TS_INFO_GET(rx_ts_info);
629 for (e = p + apackets; p < e; p++) {
630 RX_PACKET_IOV_INIT(p);
633 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
635 rx_ts_info->_FPQ.delta += apackets;
638 (num_keep_local < apackets)) {
640 MUTEX_ENTER(&rx_freePktQ_lock);
642 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
643 rxi_NeedMorePackets = FALSE;
646 MUTEX_EXIT(&rx_freePktQ_lock);
650 #endif /* RX_ENABLE_TSFPQ */
653 /* Add more packet buffers */
655 rxi_MorePacketsNoLock(int apackets)
657 struct rx_packet *p, *e;
660 /* allocate enough packets that 1/4 of the packets will be able
661 * to hold maximal amounts of data */
662 apackets += (apackets / 4)
663 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
664 getme = apackets * sizeof(struct rx_packet);
665 p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme);
667 memset((char *)p, 0, getme);
669 for (e = p + apackets; p < e; p++) {
670 RX_PACKET_IOV_INIT(p);
671 p->flags |= RX_PKTFLAG_FREE;
674 queue_Append(&rx_freePacketQueue, p);
677 rx_nFreePackets += apackets;
678 #ifdef RX_ENABLE_TSFPQ
679 /* TSFPQ patch also needs to keep track of total packets */
680 MUTEX_ENTER(&rx_stats_mutex);
681 rx_nPackets += apackets;
682 RX_TS_FPQ_COMPUTE_LIMITS;
683 MUTEX_EXIT(&rx_stats_mutex);
684 #endif /* RX_ENABLE_TSFPQ */
685 rxi_NeedMorePackets = FALSE;
691 rxi_FreeAllPackets(void)
693 /* must be called at proper interrupt level, etcetera */
694 /* MTUXXX need to free all Packets */
695 osi_Free(rx_mallocedP,
696 (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
697 UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet));
700 #ifdef RX_ENABLE_TSFPQ
702 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
704 register struct rx_ts_info_t * rx_ts_info;
708 RX_TS_INFO_GET(rx_ts_info);
710 if (num_keep_local != rx_ts_info->_FPQ.len) {
712 MUTEX_ENTER(&rx_freePktQ_lock);
713 if (num_keep_local < rx_ts_info->_FPQ.len) {
714 xfer = rx_ts_info->_FPQ.len - num_keep_local;
715 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
718 xfer = num_keep_local - rx_ts_info->_FPQ.len;
719 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
720 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
721 if (rx_nFreePackets < xfer) {
722 rxi_MorePacketsNoLock(xfer - rx_nFreePackets);
724 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
726 MUTEX_EXIT(&rx_freePktQ_lock);
732 rxi_FlushLocalPacketsTSFPQ(void)
734 rxi_AdjustLocalPacketsTSFPQ(0, 0);
736 #endif /* RX_ENABLE_TSFPQ */
738 /* Allocate more packets iff we need more continuation buffers */
739 /* In kernel, can't page in memory with interrupts disabled, so we
740 * don't use the event mechanism. */
742 rx_CheckPackets(void)
744 if (rxi_NeedMorePackets) {
745 rxi_MorePackets(rx_initSendWindow);
749 /* In the packet freeing routine below, the assumption is that
750 we want all of the packets to be used equally frequently, so that we
751 don't get packet buffers paging out. It would be just as valid to
752 assume that we DO want them to page out if not many are being used.
753 In any event, we assume the former, and append the packets to the end
755 /* This explanation is bogus. The free list doesn't remain in any kind of
756 useful order for afs_int32: the packets in use get pretty much randomly scattered
757 across all the pages. In order to permit unused {packets,bufs} to page out, they
758 must be stored so that packets which are adjacent in memory are adjacent in the
759 free list. An array springs rapidly to mind.
762 /* Actually free the packet p. */
763 #ifdef RX_ENABLE_TSFPQ
765 rxi_FreePacketNoLock(struct rx_packet *p)
767 register struct rx_ts_info_t * rx_ts_info;
768 dpf(("Free %lx\n", (unsigned long)p));
770 RX_TS_INFO_GET(rx_ts_info);
771 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
772 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
773 RX_TS_FPQ_LTOG(rx_ts_info);
776 #else /* RX_ENABLE_TSFPQ */
778 rxi_FreePacketNoLock(struct rx_packet *p)
780 dpf(("Free %lx\n", (unsigned long)p));
784 queue_Append(&rx_freePacketQueue, p);
786 #endif /* RX_ENABLE_TSFPQ */
788 #ifdef RX_ENABLE_TSFPQ
790 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
792 register struct rx_ts_info_t * rx_ts_info;
793 dpf(("Free %lx\n", (unsigned long)p));
795 RX_TS_INFO_GET(rx_ts_info);
796 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
798 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
800 MUTEX_ENTER(&rx_freePktQ_lock);
802 RX_TS_FPQ_LTOG(rx_ts_info);
804 /* Wakeup anyone waiting for packets */
807 MUTEX_EXIT(&rx_freePktQ_lock);
811 #endif /* RX_ENABLE_TSFPQ */
814 * free continuation buffers off a packet into a queue
816 * [IN] p -- packet from which continuation buffers will be freed
817 * [IN] first -- iovec offset of first continuation buffer to free
818 * [IN] q -- queue into which continuation buffers will be chained
821 * number of continuation buffers freed
824 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct rx_queue * q)
827 struct rx_packet * cb;
830 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
831 iov = &p->wirevec[first];
833 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
834 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
835 RX_FPQ_MARK_FREE(cb);
845 * free packet continuation buffers into the global free packet pool
847 * [IN] p -- packet from which to free continuation buffers
848 * [IN] first -- iovec offset of first continuation buffer to free
854 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
858 for (first = MAX(2, first); first < p->niovecs; first++) {
859 iov = &p->wirevec[first];
861 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
862 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
870 #ifdef RX_ENABLE_TSFPQ
872 * free packet continuation buffers into the thread-local free pool
874 * [IN] p -- packet from which continuation buffers will be freed
875 * [IN] first -- iovec offset of first continuation buffer to free
876 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
877 * global free pool before returning
883 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
886 register struct rx_ts_info_t * rx_ts_info;
888 RX_TS_INFO_GET(rx_ts_info);
890 for (first = MAX(2, first); first < p->niovecs; first++) {
891 iov = &p->wirevec[first];
893 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
894 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
899 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
901 MUTEX_ENTER(&rx_freePktQ_lock);
903 RX_TS_FPQ_LTOG(rx_ts_info);
905 /* Wakeup anyone waiting for packets */
908 MUTEX_EXIT(&rx_freePktQ_lock);
913 #endif /* RX_ENABLE_TSFPQ */
915 int rxi_nBadIovecs = 0;
917 /* rxi_RestoreDataBufs
919 * Restore the correct sizes to the iovecs. Called when reusing a packet
920 * for reading off the wire.
923 rxi_RestoreDataBufs(struct rx_packet *p)
926 struct iovec *iov = &p->wirevec[2];
928 RX_PACKET_IOV_INIT(p);
930 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
931 if (!iov->iov_base) {
936 iov->iov_len = RX_CBUFFERSIZE;
940 #ifdef RX_ENABLE_TSFPQ
942 rxi_TrimDataBufs(struct rx_packet *p, int first)
945 struct iovec *iov, *end;
946 register struct rx_ts_info_t * rx_ts_info;
950 osi_Panic("TrimDataBufs 1: first must be 1");
952 /* Skip over continuation buffers containing message data */
953 iov = &p->wirevec[2];
954 end = iov + (p->niovecs - 2);
955 length = p->length - p->wirevec[1].iov_len;
956 for (; iov < end && length > 0; iov++) {
958 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
959 length -= iov->iov_len;
962 /* iov now points to the first empty data buffer. */
966 RX_TS_INFO_GET(rx_ts_info);
967 for (; iov < end; iov++) {
969 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
970 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
973 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
975 MUTEX_ENTER(&rx_freePktQ_lock);
977 RX_TS_FPQ_LTOG(rx_ts_info);
980 MUTEX_EXIT(&rx_freePktQ_lock);
986 #else /* RX_ENABLE_TSFPQ */
988 rxi_TrimDataBufs(struct rx_packet *p, int first)
991 struct iovec *iov, *end;
995 osi_Panic("TrimDataBufs 1: first must be 1");
997 /* Skip over continuation buffers containing message data */
998 iov = &p->wirevec[2];
999 end = iov + (p->niovecs - 2);
1000 length = p->length - p->wirevec[1].iov_len;
1001 for (; iov < end && length > 0; iov++) {
1003 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1004 length -= iov->iov_len;
1007 /* iov now points to the first empty data buffer. */
1012 MUTEX_ENTER(&rx_freePktQ_lock);
1014 for (; iov < end; iov++) {
1016 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1017 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1020 rxi_PacketsUnWait();
1022 MUTEX_EXIT(&rx_freePktQ_lock);
1027 #endif /* RX_ENABLE_TSFPQ */
1029 /* Free the packet p. P is assumed not to be on any queue, i.e.
1030 * remove it yourself first if you call this routine. */
1031 #ifdef RX_ENABLE_TSFPQ
1033 rxi_FreePacket(struct rx_packet *p)
1035 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1036 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1038 #else /* RX_ENABLE_TSFPQ */
1040 rxi_FreePacket(struct rx_packet *p)
1045 MUTEX_ENTER(&rx_freePktQ_lock);
1047 rxi_FreeDataBufsNoLock(p, 2);
1048 rxi_FreePacketNoLock(p);
1049 /* Wakeup anyone waiting for packets */
1050 rxi_PacketsUnWait();
1052 MUTEX_EXIT(&rx_freePktQ_lock);
1055 #endif /* RX_ENABLE_TSFPQ */
1057 /* rxi_AllocPacket sets up p->length so it reflects the number of
1058 * bytes in the packet at this point, **not including** the header.
1059 * The header is absolutely necessary, besides, this is the way the
1060 * length field is usually used */
1061 #ifdef RX_ENABLE_TSFPQ
1063 rxi_AllocPacketNoLock(int class)
1065 register struct rx_packet *p;
1066 register struct rx_ts_info_t * rx_ts_info;
1068 RX_TS_INFO_GET(rx_ts_info);
1071 if (rxi_OverQuota(class)) {
1072 rxi_NeedMorePackets = TRUE;
1073 MUTEX_ENTER(&rx_stats_mutex);
1075 case RX_PACKET_CLASS_RECEIVE:
1076 rx_stats.receivePktAllocFailures++;
1078 case RX_PACKET_CLASS_SEND:
1079 rx_stats.sendPktAllocFailures++;
1081 case RX_PACKET_CLASS_SPECIAL:
1082 rx_stats.specialPktAllocFailures++;
1084 case RX_PACKET_CLASS_RECV_CBUF:
1085 rx_stats.receiveCbufPktAllocFailures++;
1087 case RX_PACKET_CLASS_SEND_CBUF:
1088 rx_stats.sendCbufPktAllocFailures++;
1091 MUTEX_EXIT(&rx_stats_mutex);
1092 return (struct rx_packet *)0;
1096 MUTEX_ENTER(&rx_stats_mutex);
1097 rx_stats.packetRequests++;
1098 MUTEX_EXIT(&rx_stats_mutex);
1100 if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1103 if (queue_IsEmpty(&rx_freePacketQueue))
1104 osi_Panic("rxi_AllocPacket error");
1106 if (queue_IsEmpty(&rx_freePacketQueue))
1107 rxi_MorePacketsNoLock(rx_initSendWindow);
1111 RX_TS_FPQ_GTOL(rx_ts_info);
1114 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1116 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1119 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1120 * order to truncate outbound packets. In the near future, may need
1121 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1123 RX_PACKET_IOV_FULLINIT(p);
1126 #else /* RX_ENABLE_TSFPQ */
1128 rxi_AllocPacketNoLock(int class)
1130 register struct rx_packet *p;
1133 if (rxi_OverQuota(class)) {
1134 rxi_NeedMorePackets = TRUE;
1135 MUTEX_ENTER(&rx_stats_mutex);
1137 case RX_PACKET_CLASS_RECEIVE:
1138 rx_stats.receivePktAllocFailures++;
1140 case RX_PACKET_CLASS_SEND:
1141 rx_stats.sendPktAllocFailures++;
1143 case RX_PACKET_CLASS_SPECIAL:
1144 rx_stats.specialPktAllocFailures++;
1146 case RX_PACKET_CLASS_RECV_CBUF:
1147 rx_stats.receiveCbufPktAllocFailures++;
1149 case RX_PACKET_CLASS_SEND_CBUF:
1150 rx_stats.sendCbufPktAllocFailures++;
1153 MUTEX_EXIT(&rx_stats_mutex);
1154 return (struct rx_packet *)0;
1158 MUTEX_ENTER(&rx_stats_mutex);
1159 rx_stats.packetRequests++;
1160 MUTEX_EXIT(&rx_stats_mutex);
1163 if (queue_IsEmpty(&rx_freePacketQueue))
1164 osi_Panic("rxi_AllocPacket error");
1166 if (queue_IsEmpty(&rx_freePacketQueue))
1167 rxi_MorePacketsNoLock(rx_initSendWindow);
1171 p = queue_First(&rx_freePacketQueue, rx_packet);
1173 RX_FPQ_MARK_USED(p);
1175 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1178 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1179 * order to truncate outbound packets. In the near future, may need
1180 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1182 RX_PACKET_IOV_FULLINIT(p);
1185 #endif /* RX_ENABLE_TSFPQ */
1187 #ifdef RX_ENABLE_TSFPQ
1189 rxi_AllocPacketTSFPQ(int class, int pull_global)
1191 register struct rx_packet *p;
1192 register struct rx_ts_info_t * rx_ts_info;
1194 RX_TS_INFO_GET(rx_ts_info);
1196 MUTEX_ENTER(&rx_stats_mutex);
1197 rx_stats.packetRequests++;
1198 MUTEX_EXIT(&rx_stats_mutex);
1200 if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) {
1201 MUTEX_ENTER(&rx_freePktQ_lock);
1203 if (queue_IsEmpty(&rx_freePacketQueue))
1204 rxi_MorePacketsNoLock(rx_initSendWindow);
1206 RX_TS_FPQ_GTOL(rx_ts_info);
1208 MUTEX_EXIT(&rx_freePktQ_lock);
1209 } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) {
1213 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1215 dpf(("Alloc %lx, class %d\n", (unsigned long)p, class));
1217 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1218 * order to truncate outbound packets. In the near future, may need
1219 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1221 RX_PACKET_IOV_FULLINIT(p);
1224 #endif /* RX_ENABLE_TSFPQ */
1226 #ifdef RX_ENABLE_TSFPQ
1228 rxi_AllocPacket(int class)
1230 register struct rx_packet *p;
1232 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1235 #else /* RX_ENABLE_TSFPQ */
1237 rxi_AllocPacket(int class)
1239 register struct rx_packet *p;
1241 MUTEX_ENTER(&rx_freePktQ_lock);
1242 p = rxi_AllocPacketNoLock(class);
1243 MUTEX_EXIT(&rx_freePktQ_lock);
1246 #endif /* RX_ENABLE_TSFPQ */
1248 /* This guy comes up with as many buffers as it {takes,can get} given
1249 * the MTU for this call. It also sets the packet length before
1250 * returning. caution: this is often called at NETPRI
1251 * Called with call locked.
1254 rxi_AllocSendPacket(register struct rx_call *call, int want)
1256 register struct rx_packet *p = (struct rx_packet *)0;
1258 register unsigned delta;
1261 mud = call->MTU - RX_HEADER_SIZE;
1263 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1264 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1266 #ifdef RX_ENABLE_TSFPQ
1267 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1269 want = MIN(want, mud);
1271 if ((unsigned)want > p->length)
1272 (void)rxi_AllocDataBuf(p, (want - p->length),
1273 RX_PACKET_CLASS_SEND_CBUF);
1275 if ((unsigned)p->length > mud)
1278 if (delta >= p->length) {
1286 #endif /* RX_ENABLE_TSFPQ */
1288 while (!(call->error)) {
1289 MUTEX_ENTER(&rx_freePktQ_lock);
1290 /* if an error occurred, or we get the packet we want, we're done */
1291 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1292 MUTEX_EXIT(&rx_freePktQ_lock);
1295 want = MIN(want, mud);
1297 if ((unsigned)want > p->length)
1298 (void)rxi_AllocDataBuf(p, (want - p->length),
1299 RX_PACKET_CLASS_SEND_CBUF);
1301 if ((unsigned)p->length > mud)
1304 if (delta >= p->length) {
1313 /* no error occurred, and we didn't get a packet, so we sleep.
1314 * At this point, we assume that packets will be returned
1315 * sooner or later, as packets are acknowledged, and so we
1318 call->flags |= RX_CALL_WAIT_PACKETS;
1319 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1320 MUTEX_EXIT(&call->lock);
1321 rx_waitingForPackets = 1;
1323 #ifdef RX_ENABLE_LOCKS
1324 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1326 osi_rxSleep(&rx_waitingForPackets);
1328 MUTEX_EXIT(&rx_freePktQ_lock);
1329 MUTEX_ENTER(&call->lock);
1330 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1331 call->flags &= ~RX_CALL_WAIT_PACKETS;
1340 /* Windows does not use file descriptors. */
1341 #define CountFDs(amax) 0
1343 /* count the number of used FDs */
1345 CountFDs(register int amax)
1348 register int i, code;
1352 for (i = 0; i < amax; i++) {
1353 code = fstat(i, &tstat);
1359 #endif /* AFS_NT40_ENV */
1362 #define CountFDs(amax) amax
1366 #if !defined(KERNEL) || defined(UKERNEL)
1368 /* This function reads a single packet from the interface into the
1369 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1370 * (host,port) of the sender are stored in the supplied variables, and
1371 * the data length of the packet is stored in the packet structure.
1372 * The header is decoded. */
1374 rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host,
1377 struct sockaddr_in from;
1380 register afs_int32 tlen, savelen;
1382 rx_computelen(p, tlen);
1383 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1385 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1386 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1387 * it once in order to avoid races. */
1390 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1398 /* Extend the last iovec for padding, it's just to make sure that the
1399 * read doesn't return more data than we expect, and is done to get around
1400 * our problems caused by the lack of a length field in the rx header.
1401 * Use the extra buffer that follows the localdata in each packet
1403 savelen = p->wirevec[p->niovecs - 1].iov_len;
1404 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1406 memset((char *)&msg, 0, sizeof(msg));
1407 msg.msg_name = (char *)&from;
1408 msg.msg_namelen = sizeof(struct sockaddr_in);
1409 msg.msg_iov = p->wirevec;
1410 msg.msg_iovlen = p->niovecs;
1411 nbytes = rxi_Recvmsg(socket, &msg, 0);
1413 /* restore the vec to its correct state */
1414 p->wirevec[p->niovecs - 1].iov_len = savelen;
1416 p->length = (nbytes - RX_HEADER_SIZE);
1417 if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1418 if (nbytes < 0 && errno == EWOULDBLOCK) {
1419 MUTEX_ENTER(&rx_stats_mutex);
1420 rx_stats.noPacketOnRead++;
1421 MUTEX_EXIT(&rx_stats_mutex);
1422 } else if (nbytes <= 0) {
1423 MUTEX_ENTER(&rx_stats_mutex);
1424 rx_stats.bogusPacketOnRead++;
1425 rx_stats.bogusHost = from.sin_addr.s_addr;
1426 MUTEX_EXIT(&rx_stats_mutex);
1427 dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr),
1428 ntohs(from.sin_port), nbytes));
1433 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1434 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1435 rxi_DecodePacketHeader(p);
1437 *host = from.sin_addr.s_addr;
1438 *port = from.sin_port;
1440 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d",
1441 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1442 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1444 rxi_TrimDataBufs(p, 1);
1449 /* Extract packet header. */
1450 rxi_DecodePacketHeader(p);
1452 *host = from.sin_addr.s_addr;
1453 *port = from.sin_port;
1454 if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1455 struct rx_peer *peer;
1456 MUTEX_ENTER(&rx_stats_mutex);
1457 rx_stats.packetsRead[p->header.type - 1]++;
1458 MUTEX_EXIT(&rx_stats_mutex);
1460 * Try to look up this peer structure. If it doesn't exist,
1461 * don't create a new one -
1462 * we don't keep count of the bytes sent/received if a peer
1463 * structure doesn't already exist.
1465 * The peer/connection cleanup code assumes that there is 1 peer
1466 * per connection. If we actually created a peer structure here
1467 * and this packet was an rxdebug packet, the peer structure would
1468 * never be cleaned up.
1470 peer = rxi_FindPeer(*host, *port, 0, 0);
1471 /* Since this may not be associated with a connection,
1472 * it may have no refCount, meaning we could race with
1475 if (peer && (peer->refCount > 0)) {
1476 MUTEX_ENTER(&peer->peer_lock);
1477 hadd32(peer->bytesReceived, p->length);
1478 MUTEX_EXIT(&peer->peer_lock);
1482 /* Free any empty packet buffers at the end of this packet */
1483 rxi_TrimDataBufs(p, 1);
1489 #endif /* !KERNEL || UKERNEL */
1491 /* This function splits off the first packet in a jumbo packet.
1492 * As of AFS 3.5, jumbograms contain more than one fixed size
1493 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1494 * last packet header. All packets (except the last) are padded to
1495 * fall on RX_CBUFFERSIZE boundaries.
1496 * HACK: We store the length of the first n-1 packets in the
1497 * last two pad bytes. */
1500 rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port,
1503 struct rx_packet *np;
1504 struct rx_jumboHeader *jp;
1510 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1511 * bytes in length. All but the first packet are preceded by
1512 * an abbreviated four byte header. The length of the last packet
1513 * is calculated from the size of the jumbogram. */
1514 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1516 if ((int)p->length < length) {
1517 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1520 niov = p->niovecs - 2;
1522 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1525 iov = &p->wirevec[2];
1526 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1528 /* Get a pointer to the abbreviated packet header */
1529 jp = (struct rx_jumboHeader *)
1530 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1532 /* Set up the iovecs for the next packet */
1533 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1534 np->wirevec[0].iov_len = sizeof(struct rx_header);
1535 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1536 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1537 np->niovecs = niov + 1;
1538 for (i = 2, iov++; i <= niov; i++, iov++) {
1539 np->wirevec[i] = *iov;
1541 np->length = p->length - length;
1542 p->length = RX_JUMBOBUFFERSIZE;
1545 /* Convert the jumbo packet header to host byte order */
1546 temp = ntohl(*(afs_uint32 *) jp);
1547 jp->flags = (u_char) (temp >> 24);
1548 jp->cksum = (u_short) (temp);
1550 /* Fill in the packet header */
1551 np->header = p->header;
1552 np->header.serial = p->header.serial + 1;
1553 np->header.seq = p->header.seq + 1;
1554 np->header.flags = jp->flags;
1555 np->header.spare = jp->cksum;
1561 /* Send a udp datagram */
1563 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1564 int length, int istack)
1569 memset(&msg, 0, sizeof(msg));
1571 msg.msg_iovlen = nvecs;
1572 msg.msg_name = addr;
1573 msg.msg_namelen = sizeof(struct sockaddr_in);
1575 ret = rxi_Sendmsg(socket, &msg, 0);
1579 #elif !defined(UKERNEL)
1581 * message receipt is done in rxk_input or rx_put.
1584 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1586 * Copy an mblock to the contiguous area pointed to by cp.
1587 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1588 * but it doesn't really.
1589 * Returns the number of bytes not transferred.
1590 * The message is NOT changed.
1593 cpytoc(mblk_t * mp, register int off, register int len, register char *cp)
1597 for (; mp && len > 0; mp = mp->b_cont) {
1598 if (mp->b_datap->db_type != M_DATA) {
1601 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1602 memcpy(cp, (char *)mp->b_rptr, n);
1610 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1611 * but it doesn't really.
1612 * This sucks, anyway, do it like m_cpy.... below
1615 cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs,
1618 register int m, n, o, t, i;
1620 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1621 if (mp->b_datap->db_type != M_DATA) {
1624 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1630 t = iovs[i].iov_len;
1633 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1643 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1644 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1646 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1648 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1651 unsigned int l1, l2, i, t;
1653 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1654 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1657 if (m->m_len <= off) {
1667 p1 = mtod(m, caddr_t) + off;
1668 l1 = m->m_len - off;
1670 p2 = iovs[0].iov_base;
1671 l2 = iovs[0].iov_len;
1674 t = MIN(l1, MIN(l2, (unsigned int)len));
1685 p1 = mtod(m, caddr_t);
1691 p2 = iovs[i].iov_base;
1692 l2 = iovs[i].iov_len;
1700 #endif /* AFS_SUN5_ENV */
1702 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1704 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1705 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1711 struct rx_packet *phandle;
1712 int hdr_len, data_len;
1717 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1724 #endif /*KERNEL && !UKERNEL */
1727 /* send a response to a debug packet */
1730 rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket,
1731 afs_int32 ahost, short aport, int istack)
1733 struct rx_debugIn tin;
1735 struct rx_serverQueueEntry *np, *nqe;
1738 * Only respond to client-initiated Rx debug packets,
1739 * and clear the client flag in the response.
1741 if (ap->header.flags & RX_CLIENT_INITIATED) {
1742 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1743 rxi_EncodePacketHeader(ap);
1748 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1749 /* all done with packet, now set length to the truth, so we can
1750 * reuse this packet */
1751 rx_computelen(ap, ap->length);
1753 tin.type = ntohl(tin.type);
1754 tin.index = ntohl(tin.index);
1756 case RX_DEBUGI_GETSTATS:{
1757 struct rx_debugStats tstat;
1759 /* get basic stats */
1760 memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1761 tstat.version = RX_DEBUGI_VERSION;
1762 #ifndef RX_ENABLE_LOCKS
1763 tstat.waitingForPackets = rx_waitingForPackets;
1765 MUTEX_ENTER(&rx_serverPool_lock);
1766 tstat.nFreePackets = htonl(rx_nFreePackets);
1767 tstat.callsExecuted = htonl(rxi_nCalls);
1768 tstat.packetReclaims = htonl(rx_packetReclaims);
1769 tstat.usedFDs = CountFDs(64);
1770 tstat.nWaiting = htonl(rx_nWaiting);
1771 tstat.nWaited = htonl(rx_nWaited);
1772 queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry,
1774 MUTEX_EXIT(&rx_serverPool_lock);
1775 tstat.idleThreads = htonl(tstat.idleThreads);
1776 tl = sizeof(struct rx_debugStats) - ap->length;
1778 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1781 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1783 ap->length = sizeof(struct rx_debugStats);
1784 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1785 rx_computelen(ap, ap->length);
1790 case RX_DEBUGI_GETALLCONN:
1791 case RX_DEBUGI_GETCONN:{
1793 register struct rx_connection *tc;
1794 struct rx_call *tcall;
1795 struct rx_debugConn tconn;
1796 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1799 tl = sizeof(struct rx_debugConn) - ap->length;
1801 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1805 memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1806 /* get N'th (maybe) "interesting" connection info */
1807 for (i = 0; i < rx_hashTableSize; i++) {
1808 #if !defined(KERNEL)
1809 /* the time complexity of the algorithm used here
1810 * exponentially increses with the number of connections.
1812 #ifdef AFS_PTHREAD_ENV
1818 MUTEX_ENTER(&rx_connHashTable_lock);
1819 /* We might be slightly out of step since we are not
1820 * locking each call, but this is only debugging output.
1822 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1823 if ((all || rxi_IsConnInteresting(tc))
1824 && tin.index-- <= 0) {
1825 tconn.host = tc->peer->host;
1826 tconn.port = tc->peer->port;
1827 tconn.cid = htonl(tc->cid);
1828 tconn.epoch = htonl(tc->epoch);
1829 tconn.serial = htonl(tc->serial);
1830 for (j = 0; j < RX_MAXCALLS; j++) {
1831 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1832 if ((tcall = tc->call[j])) {
1833 tconn.callState[j] = tcall->state;
1834 tconn.callMode[j] = tcall->mode;
1835 tconn.callFlags[j] = tcall->flags;
1836 if (queue_IsNotEmpty(&tcall->rq))
1837 tconn.callOther[j] |= RX_OTHER_IN;
1838 if (queue_IsNotEmpty(&tcall->tq))
1839 tconn.callOther[j] |= RX_OTHER_OUT;
1841 tconn.callState[j] = RX_STATE_NOTINIT;
1844 tconn.natMTU = htonl(tc->peer->natMTU);
1845 tconn.error = htonl(tc->error);
1846 tconn.flags = tc->flags;
1847 tconn.type = tc->type;
1848 tconn.securityIndex = tc->securityIndex;
1849 if (tc->securityObject) {
1850 RXS_GetStats(tc->securityObject, tc,
1852 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1853 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1856 DOHTONL(packetsReceived);
1857 DOHTONL(packetsSent);
1858 DOHTONL(bytesReceived);
1862 sizeof(tconn.secStats.spares) /
1867 sizeof(tconn.secStats.sparel) /
1868 sizeof(afs_int32); i++)
1872 MUTEX_EXIT(&rx_connHashTable_lock);
1873 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1876 ap->length = sizeof(struct rx_debugConn);
1877 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1883 MUTEX_EXIT(&rx_connHashTable_lock);
1885 /* if we make it here, there are no interesting packets */
1886 tconn.cid = htonl(0xffffffff); /* means end */
1887 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1890 ap->length = sizeof(struct rx_debugConn);
1891 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1897 * Pass back all the peer structures we have available
1900 case RX_DEBUGI_GETPEER:{
1902 register struct rx_peer *tp;
1903 struct rx_debugPeer tpeer;
1906 tl = sizeof(struct rx_debugPeer) - ap->length;
1908 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1912 memset((char *)&tpeer, 0, sizeof(tpeer));
1913 for (i = 0; i < rx_hashTableSize; i++) {
1914 #if !defined(KERNEL)
1915 /* the time complexity of the algorithm used here
1916 * exponentially increses with the number of peers.
1918 * Yielding after processing each hash table entry
1919 * and dropping rx_peerHashTable_lock.
1920 * also increases the risk that we will miss a new
1921 * entry - but we are willing to live with this
1922 * limitation since this is meant for debugging only
1924 #ifdef AFS_PTHREAD_ENV
1930 MUTEX_ENTER(&rx_peerHashTable_lock);
1931 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1932 if (tin.index-- <= 0) {
1933 tpeer.host = tp->host;
1934 tpeer.port = tp->port;
1935 tpeer.ifMTU = htons(tp->ifMTU);
1936 tpeer.idleWhen = htonl(tp->idleWhen);
1937 tpeer.refCount = htons(tp->refCount);
1938 tpeer.burstSize = tp->burstSize;
1939 tpeer.burst = tp->burst;
1940 tpeer.burstWait.sec = htonl(tp->burstWait.sec);
1941 tpeer.burstWait.usec = htonl(tp->burstWait.usec);
1942 tpeer.rtt = htonl(tp->rtt);
1943 tpeer.rtt_dev = htonl(tp->rtt_dev);
1944 tpeer.timeout.sec = htonl(tp->timeout.sec);
1945 tpeer.timeout.usec = htonl(tp->timeout.usec);
1946 tpeer.nSent = htonl(tp->nSent);
1947 tpeer.reSends = htonl(tp->reSends);
1948 tpeer.inPacketSkew = htonl(tp->inPacketSkew);
1949 tpeer.outPacketSkew = htonl(tp->outPacketSkew);
1950 tpeer.rateFlag = htonl(tp->rateFlag);
1951 tpeer.natMTU = htons(tp->natMTU);
1952 tpeer.maxMTU = htons(tp->maxMTU);
1953 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
1954 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
1955 tpeer.MTU = htons(tp->MTU);
1956 tpeer.cwind = htons(tp->cwind);
1957 tpeer.nDgramPackets = htons(tp->nDgramPackets);
1958 tpeer.congestSeq = htons(tp->congestSeq);
1959 tpeer.bytesSent.high = htonl(tp->bytesSent.high);
1960 tpeer.bytesSent.low = htonl(tp->bytesSent.low);
1961 tpeer.bytesReceived.high =
1962 htonl(tp->bytesReceived.high);
1963 tpeer.bytesReceived.low =
1964 htonl(tp->bytesReceived.low);
1966 MUTEX_EXIT(&rx_peerHashTable_lock);
1967 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1970 ap->length = sizeof(struct rx_debugPeer);
1971 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1977 MUTEX_EXIT(&rx_peerHashTable_lock);
1979 /* if we make it here, there are no interesting packets */
1980 tpeer.host = htonl(0xffffffff); /* means end */
1981 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
1984 ap->length = sizeof(struct rx_debugPeer);
1985 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1990 case RX_DEBUGI_RXSTATS:{
1994 tl = sizeof(rx_stats) - ap->length;
1996 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2000 /* Since its all int32s convert to network order with a loop. */
2001 MUTEX_ENTER(&rx_stats_mutex);
2002 s = (afs_int32 *) & rx_stats;
2003 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2004 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2007 ap->length = sizeof(rx_stats);
2008 MUTEX_EXIT(&rx_stats_mutex);
2009 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2015 /* error response packet */
2016 tin.type = htonl(RX_DEBUGI_BADTYPE);
2017 tin.index = tin.type;
2018 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2020 ap->length = sizeof(struct rx_debugIn);
2021 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2029 rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket,
2030 afs_int32 ahost, short aport, int istack)
2035 * Only respond to client-initiated version requests, and
2036 * clear that flag in the response.
2038 if (ap->header.flags & RX_CLIENT_INITIATED) {
2041 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2042 rxi_EncodePacketHeader(ap);
2043 memset(buf, 0, sizeof(buf));
2044 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2045 rx_packetwrite(ap, 0, 65, buf);
2048 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2056 /* send a debug packet back to the sender */
2058 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2059 afs_int32 ahost, short aport, afs_int32 istack)
2061 struct sockaddr_in taddr;
2067 int waslocked = ISAFS_GLOCK();
2070 taddr.sin_family = AF_INET;
2071 taddr.sin_port = aport;
2072 taddr.sin_addr.s_addr = ahost;
2073 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2074 taddr.sin_len = sizeof(struct sockaddr_in);
2077 /* We need to trim the niovecs. */
2078 nbytes = apacket->length;
2079 for (i = 1; i < apacket->niovecs; i++) {
2080 if (nbytes <= apacket->wirevec[i].iov_len) {
2081 savelen = apacket->wirevec[i].iov_len;
2082 saven = apacket->niovecs;
2083 apacket->wirevec[i].iov_len = nbytes;
2084 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2086 nbytes -= apacket->wirevec[i].iov_len;
2089 #ifdef RX_KERNEL_TRACE
2090 if (ICL_SETACTIVE(afs_iclSetp)) {
2093 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2094 "before osi_NetSend()");
2102 /* debug packets are not reliably delivered, hence the cast below. */
2103 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2104 apacket->length + RX_HEADER_SIZE, istack);
2106 #ifdef RX_KERNEL_TRACE
2107 if (ICL_SETACTIVE(afs_iclSetp)) {
2109 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2110 "after osi_NetSend()");
2119 if (saven) { /* means we truncated the packet above. */
2120 apacket->wirevec[i - 1].iov_len = savelen;
2121 apacket->niovecs = saven;
2126 /* Send the packet to appropriate destination for the specified
2127 * call. The header is first encoded and placed in the packet.
2130 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2131 struct rx_packet *p, int istack)
2137 struct sockaddr_in addr;
2138 register struct rx_peer *peer = conn->peer;
2141 char deliveryType = 'S';
2143 /* The address we're sending the packet to */
2144 memset(&addr, 0, sizeof(addr));
2145 addr.sin_family = AF_INET;
2146 addr.sin_port = peer->port;
2147 addr.sin_addr.s_addr = peer->host;
2149 /* This stuff should be revamped, I think, so that most, if not
2150 * all, of the header stuff is always added here. We could
2151 * probably do away with the encode/decode routines. XXXXX */
2153 /* Stamp each packet with a unique serial number. The serial
2154 * number is maintained on a connection basis because some types
2155 * of security may be based on the serial number of the packet,
2156 * and security is handled on a per authenticated-connection
2158 /* Pre-increment, to guarantee no zero serial number; a zero
2159 * serial number means the packet was never sent. */
2160 MUTEX_ENTER(&conn->conn_data_lock);
2161 p->header.serial = ++conn->serial;
2162 MUTEX_EXIT(&conn->conn_data_lock);
2163 /* This is so we can adjust retransmit time-outs better in the face of
2164 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2166 if (p->firstSerial == 0) {
2167 p->firstSerial = p->header.serial;
2170 /* If an output tracer function is defined, call it with the packet and
2171 * network address. Note this function may modify its arguments. */
2172 if (rx_almostSent) {
2173 int drop = (*rx_almostSent) (p, &addr);
2174 /* drop packet if return value is non-zero? */
2176 deliveryType = 'D'; /* Drop the packet */
2180 /* Get network byte order header */
2181 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2182 * touch ALL the fields */
2184 /* Send the packet out on the same socket that related packets are being
2188 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2191 /* Possibly drop this packet, for testing purposes */
2192 if ((deliveryType == 'D')
2193 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2194 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2195 deliveryType = 'D'; /* Drop the packet */
2197 deliveryType = 'S'; /* Send the packet */
2198 #endif /* RXDEBUG */
2200 /* Loop until the packet is sent. We'd prefer just to use a
2201 * blocking socket, but unfortunately the interface doesn't
2202 * allow us to have the socket block in send mode, and not
2203 * block in receive mode */
2205 waslocked = ISAFS_GLOCK();
2206 #ifdef RX_KERNEL_TRACE
2207 if (ICL_SETACTIVE(afs_iclSetp)) {
2210 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2211 "before osi_NetSend()");
2220 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2221 p->length + RX_HEADER_SIZE, istack)) != 0) {
2222 /* send failed, so let's hurry up the resend, eh? */
2223 MUTEX_ENTER(&rx_stats_mutex);
2224 rx_stats.netSendFailures++;
2225 MUTEX_EXIT(&rx_stats_mutex);
2226 p->retryTime = p->timeSent; /* resend it very soon */
2227 clock_Addmsec(&(p->retryTime),
2228 10 + (((afs_uint32) p->backoff) << 8));
2229 /* Some systems are nice and tell us right away that we cannot
2230 * reach this recipient by returning an error code.
2231 * So, when this happens let's "down" the host NOW so
2232 * we don't sit around waiting for this host to timeout later.
2236 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2237 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2238 code == -ENETUNREACH
2239 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2240 code == EHOSTUNREACH
2245 call->lastReceiveTime = 0;
2248 #ifdef RX_KERNEL_TRACE
2249 if (ICL_SETACTIVE(afs_iclSetp)) {
2251 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2252 "after osi_NetSend()");
2263 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2265 MUTEX_ENTER(&rx_stats_mutex);
2266 rx_stats.packetsSent[p->header.type - 1]++;
2267 MUTEX_EXIT(&rx_stats_mutex);
2268 MUTEX_ENTER(&peer->peer_lock);
2269 hadd32(peer->bytesSent, p->length);
2270 MUTEX_EXIT(&peer->peer_lock);
2273 /* Send a list of packets to appropriate destination for the specified
2274 * connection. The headers are first encoded and placed in the packets.
2277 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2278 struct rx_packet **list, int len, int istack)
2280 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2283 struct sockaddr_in addr;
2284 register struct rx_peer *peer = conn->peer;
2286 struct rx_packet *p = NULL;
2287 struct iovec wirevec[RX_MAXIOVECS];
2288 int i, length, code;
2291 struct rx_jumboHeader *jp;
2293 char deliveryType = 'S';
2295 /* The address we're sending the packet to */
2296 addr.sin_family = AF_INET;
2297 addr.sin_port = peer->port;
2298 addr.sin_addr.s_addr = peer->host;
2300 if (len + 1 > RX_MAXIOVECS) {
2301 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2305 * Stamp the packets in this jumbogram with consecutive serial numbers
2307 MUTEX_ENTER(&conn->conn_data_lock);
2308 serial = conn->serial;
2309 conn->serial += len;
2310 MUTEX_EXIT(&conn->conn_data_lock);
2313 /* This stuff should be revamped, I think, so that most, if not
2314 * all, of the header stuff is always added here. We could
2315 * probably do away with the encode/decode routines. XXXXX */
2318 length = RX_HEADER_SIZE;
2319 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2320 wirevec[0].iov_len = RX_HEADER_SIZE;
2321 for (i = 0; i < len; i++) {
2324 /* The whole 3.5 jumbogram scheme relies on packets fitting
2325 * in a single packet buffer. */
2326 if (p->niovecs > 2) {
2327 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2330 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2333 if (p->length != RX_JUMBOBUFFERSIZE) {
2334 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2336 p->header.flags |= RX_JUMBO_PACKET;
2337 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2338 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2340 wirevec[i + 1].iov_len = p->length;
2341 length += p->length;
2343 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2345 /* Convert jumbo packet header to network byte order */
2346 temp = (afs_uint32) (p->header.flags) << 24;
2347 temp |= (afs_uint32) (p->header.spare);
2348 *(afs_uint32 *) jp = htonl(temp);
2350 jp = (struct rx_jumboHeader *)
2351 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2353 /* Stamp each packet with a unique serial number. The serial
2354 * number is maintained on a connection basis because some types
2355 * of security may be based on the serial number of the packet,
2356 * and security is handled on a per authenticated-connection
2358 /* Pre-increment, to guarantee no zero serial number; a zero
2359 * serial number means the packet was never sent. */
2360 p->header.serial = ++serial;
2361 /* This is so we can adjust retransmit time-outs better in the face of
2362 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2364 if (p->firstSerial == 0) {
2365 p->firstSerial = p->header.serial;
2368 /* If an output tracer function is defined, call it with the packet and
2369 * network address. Note this function may modify its arguments. */
2370 if (rx_almostSent) {
2371 int drop = (*rx_almostSent) (p, &addr);
2372 /* drop packet if return value is non-zero? */
2374 deliveryType = 'D'; /* Drop the packet */
2378 /* Get network byte order header */
2379 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2380 * touch ALL the fields */
2383 /* Send the packet out on the same socket that related packets are being
2387 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2390 /* Possibly drop this packet, for testing purposes */
2391 if ((deliveryType == 'D')
2392 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2393 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2394 deliveryType = 'D'; /* Drop the packet */
2396 deliveryType = 'S'; /* Send the packet */
2397 #endif /* RXDEBUG */
2399 /* Loop until the packet is sent. We'd prefer just to use a
2400 * blocking socket, but unfortunately the interface doesn't
2401 * allow us to have the socket block in send mode, and not
2402 * block in receive mode */
2403 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2404 waslocked = ISAFS_GLOCK();
2405 if (!istack && waslocked)
2409 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2411 /* send failed, so let's hurry up the resend, eh? */
2412 MUTEX_ENTER(&rx_stats_mutex);
2413 rx_stats.netSendFailures++;
2414 MUTEX_EXIT(&rx_stats_mutex);
2415 for (i = 0; i < len; i++) {
2417 p->retryTime = p->timeSent; /* resend it very soon */
2418 clock_Addmsec(&(p->retryTime),
2419 10 + (((afs_uint32) p->backoff) << 8));
2421 /* Some systems are nice and tell us right away that we cannot
2422 * reach this recipient by returning an error code.
2423 * So, when this happens let's "down" the host NOW so
2424 * we don't sit around waiting for this host to timeout later.
2428 code == -1 && WSAGetLastError() == WSAEHOSTUNREACH
2429 #elif defined(AFS_LINUX20_ENV) && defined(KERNEL)
2430 code == -ENETUNREACH
2431 #elif defined(AFS_DARWIN_ENV) && defined(KERNEL)
2432 code == EHOSTUNREACH
2437 call->lastReceiveTime = 0;
2439 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2440 if (!istack && waslocked)
2448 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length));
2451 MUTEX_ENTER(&rx_stats_mutex);
2452 rx_stats.packetsSent[p->header.type - 1]++;
2453 MUTEX_EXIT(&rx_stats_mutex);
2454 MUTEX_ENTER(&peer->peer_lock);
2456 hadd32(peer->bytesSent, p->length);
2457 MUTEX_EXIT(&peer->peer_lock);
2461 /* Send a "special" packet to the peer connection. If call is
2462 * specified, then the packet is directed to a specific call channel
2463 * associated with the connection, otherwise it is directed to the
2464 * connection only. Uses optionalPacket if it is supplied, rather than
2465 * allocating a new packet buffer. Nbytes is the length of the data
2466 * portion of the packet. If data is non-null, nbytes of data are
2467 * copied into the packet. Type is the type of the packet, as defined
2468 * in rx.h. Bug: there's a lot of duplication between this and other
2469 * routines. This needs to be cleaned up. */
2471 rxi_SendSpecial(register struct rx_call *call,
2472 register struct rx_connection *conn,
2473 struct rx_packet *optionalPacket, int type, char *data,
2474 int nbytes, int istack)
2476 /* Some of the following stuff should be common code for all
2477 * packet sends (it's repeated elsewhere) */
2478 register struct rx_packet *p;
2480 int savelen = 0, saven = 0;
2481 int channel, callNumber;
2483 channel = call->channel;
2484 callNumber = *call->callNumber;
2485 /* BUSY packets refer to the next call on this connection */
2486 if (type == RX_PACKET_TYPE_BUSY) {
2495 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2497 osi_Panic("rxi_SendSpecial failure");
2504 p->header.serviceId = conn->serviceId;
2505 p->header.securityIndex = conn->securityIndex;
2506 p->header.cid = (conn->cid | channel);
2507 p->header.callNumber = callNumber;
2509 p->header.epoch = conn->epoch;
2510 p->header.type = type;
2511 p->header.flags = 0;
2512 if (conn->type == RX_CLIENT_CONNECTION)
2513 p->header.flags |= RX_CLIENT_INITIATED;
2515 rx_packetwrite(p, 0, nbytes, data);
2517 for (i = 1; i < p->niovecs; i++) {
2518 if (nbytes <= p->wirevec[i].iov_len) {
2519 savelen = p->wirevec[i].iov_len;
2521 p->wirevec[i].iov_len = nbytes;
2522 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2524 nbytes -= p->wirevec[i].iov_len;
2528 rxi_Send(call, p, istack);
2530 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2531 if (saven) { /* means we truncated the packet above. We probably don't */
2532 /* really need to do this, but it seems safer this way, given that */
2533 /* sneaky optionalPacket... */
2534 p->wirevec[i - 1].iov_len = savelen;
2537 if (!optionalPacket)
2539 return optionalPacket;
2543 /* Encode the packet's header (from the struct header in the packet to
2544 * the net byte order representation in the wire representation of the
2545 * packet, which is what is actually sent out on the wire) */
2547 rxi_EncodePacketHeader(register struct rx_packet *p)
2549 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2551 memset((char *)buf, 0, RX_HEADER_SIZE);
2552 *buf++ = htonl(p->header.epoch);
2553 *buf++ = htonl(p->header.cid);
2554 *buf++ = htonl(p->header.callNumber);
2555 *buf++ = htonl(p->header.seq);
2556 *buf++ = htonl(p->header.serial);
2557 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2558 | (((afs_uint32) p->header.flags) << 16)
2559 | (p->header.userStatus << 8) | p->header.securityIndex);
2560 /* Note: top 16 bits of this next word were reserved */
2561 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2564 /* Decode the packet's header (from net byte order to a struct header) */
2566 rxi_DecodePacketHeader(register struct rx_packet *p)
2568 register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2571 p->header.epoch = ntohl(*buf);
2573 p->header.cid = ntohl(*buf);
2575 p->header.callNumber = ntohl(*buf);
2577 p->header.seq = ntohl(*buf);
2579 p->header.serial = ntohl(*buf);
2585 /* C will truncate byte fields to bytes for me */
2586 p->header.type = temp >> 24;
2587 p->header.flags = temp >> 16;
2588 p->header.userStatus = temp >> 8;
2589 p->header.securityIndex = temp >> 0;
2594 p->header.serviceId = (temp & 0xffff);
2595 p->header.spare = temp >> 16;
2596 /* Note: top 16 bits of this last word are the security checksum */
2600 rxi_PrepareSendPacket(register struct rx_call *call,
2601 register struct rx_packet *p, register int last)
2603 register struct rx_connection *conn = call->conn;
2605 ssize_t len; /* len must be a signed type; it can go negative */
2607 p->flags &= ~RX_PKTFLAG_ACKED;
2608 p->header.cid = (conn->cid | call->channel);
2609 p->header.serviceId = conn->serviceId;
2610 p->header.securityIndex = conn->securityIndex;
2612 /* No data packets on call 0. Where do these come from? */
2613 if (*call->callNumber == 0)
2614 *call->callNumber = 1;
2616 p->header.callNumber = *call->callNumber;
2617 p->header.seq = call->tnext++;
2618 p->header.epoch = conn->epoch;
2619 p->header.type = RX_PACKET_TYPE_DATA;
2620 p->header.flags = 0;
2621 p->header.spare = 0;
2622 if (conn->type == RX_CLIENT_CONNECTION)
2623 p->header.flags |= RX_CLIENT_INITIATED;
2626 p->header.flags |= RX_LAST_PACKET;
2628 clock_Zero(&p->retryTime); /* Never yet transmitted */
2629 clock_Zero(&p->firstSent); /* Never yet transmitted */
2630 p->header.serial = 0; /* Another way of saying never transmitted... */
2633 /* Now that we're sure this is the last data on the call, make sure
2634 * that the "length" and the sum of the iov_lens matches. */
2635 len = p->length + call->conn->securityHeaderSize;
2637 for (i = 1; i < p->niovecs && len > 0; i++) {
2638 len -= p->wirevec[i].iov_len;
2641 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2642 } else if (i < p->niovecs) {
2643 /* Free any extra elements in the wirevec */
2644 #if defined(RX_ENABLE_TSFPQ)
2645 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2646 #else /* !RX_ENABLE_TSFPQ */
2647 MUTEX_ENTER(&rx_freePktQ_lock);
2648 rxi_FreeDataBufsNoLock(p, i);
2649 MUTEX_EXIT(&rx_freePktQ_lock);
2650 #endif /* !RX_ENABLE_TSFPQ */
2654 p->wirevec[i - 1].iov_len += len;
2655 RXS_PreparePacket(conn->securityObject, call, p);
2658 /* Given an interface MTU size, calculate an adjusted MTU size that
2659 * will make efficient use of the RX buffers when the peer is sending
2660 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2662 rxi_AdjustIfMTU(int mtu)
2667 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2668 if (mtu <= adjMTU) {
2675 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2676 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2679 /* Given an interface MTU size, and the peer's advertised max receive
2680 * size, calculate an adjisted maxMTU size that makes efficient use
2681 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2683 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2685 int maxMTU = mtu * rxi_nSendFrags;
2686 maxMTU = MIN(maxMTU, peerMaxMTU);
2687 return rxi_AdjustIfMTU(maxMTU);
2690 /* Given a packet size, figure out how many datagram packet will fit.
2691 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2692 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2693 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2695 rxi_AdjustDgramPackets(int frags, int mtu)
2698 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2701 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2702 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2703 /* subtract the size of the first and last packets */
2704 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2708 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));