/* * Copyright 2000, International Business Machines Corporation and others. * All Rights Reserved. * * This software has been released under the terms of the IBM Public * License. For details, see the LICENSE file in the top-level source * directory or online at http://www.openafs.org/dl/license10.html */ #include #ifdef KERNEL #include "afs/param.h" #else #include #endif RCSID ("$Header$"); #ifdef KERNEL #if defined(UKERNEL) #include "afs/sysincludes.h" #include "afsincludes.h" #include "rx/rx_kcommon.h" #include "rx/rx_clock.h" #include "rx/rx_queue.h" #include "rx/rx_packet.h" #else /* defined(UKERNEL) */ #ifdef RX_KERNEL_TRACE #include "../rx/rx_kcommon.h" #endif #include "h/types.h" #ifndef AFS_LINUX20_ENV #include "h/systm.h" #endif #if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) #include "afs/sysincludes.h" #endif #if defined(AFS_OBSD_ENV) #include "h/proc.h" #endif #include "h/socket.h" #if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV) #if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV) #include "sys/mount.h" /* it gets pulled in by something later anyway */ #endif #include "h/mbuf.h" #endif #include "netinet/in.h" #include "afs/afs_osi.h" #include "rx_kmutex.h" #include "rx/rx_clock.h" #include "rx/rx_queue.h" #ifdef AFS_SUN5_ENV #include #endif #include "rx/rx_packet.h" #endif /* defined(UKERNEL) */ #include "rx/rx_globals.h" #else /* KERNEL */ #include "sys/types.h" #include #include #if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV) #ifdef AFS_NT40_ENV #include #ifndef EWOULDBLOCK #define EWOULDBLOCK WSAEWOULDBLOCK #endif #else #include #include #endif /* AFS_NT40_ENV */ #include "rx_user.h" #include "rx_xmit_nt.h" #include #else #include #include #endif #include "rx_clock.h" #include "rx.h" #include "rx_queue.h" #ifdef AFS_SUN5_ENV #include #endif #include "rx_packet.h" #include "rx_globals.h" #include #include #ifdef HAVE_STRING_H #include #else #ifdef HAVE_STRINGS_H #include #endif #endif #ifdef HAVE_UNISTD_H #include #endif #endif /* KERNEL */ #ifdef RX_LOCKS_DB /* rxdb_fileID is used to identify the lock location, along with line#. */ static int rxdb_fileID = RXDB_FILE_RX_PACKET; #endif /* RX_LOCKS_DB */ struct rx_packet *rx_mallocedP = 0; extern char cml_version_number[]; extern int (*rx_almostSent) (); static int AllocPacketBufs(int class, int num_pkts, struct rx_queue *q); static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket, afs_int32 ahost, short aport, afs_int32 istack); static int rxi_FreeDataBufsToQueue(struct rx_packet *p, int first, struct rx_queue * q); /* some rules about packets: * 1. When a packet is allocated, the final iov_buf contains room for * a security trailer, but iov_len masks that fact. If the security * package wants to add the trailer, it may do so, and then extend * iov_len appropriately. For this reason, packet's niovecs and * iov_len fields should be accurate before calling PreparePacket. */ /* Preconditions: * all packet buffers (iov_base) are integral multiples of * the word size. * offset is an integral multiple of the word size. */ afs_int32 rx_SlowGetInt32(struct rx_packet *packet, size_t offset) { unsigned int i; size_t l; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { return *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) + (offset - l))); } l += packet->wirevec[i].iov_len; } return 0; } /* Preconditions: * all packet buffers (iov_base) are integral multiples of the word size. * offset is an integral multiple of the word size. */ afs_int32 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data) { unsigned int i; size_t l; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) + (offset - l))) = data; return 0; } l += packet->wirevec[i].iov_len; } return 0; } /* Preconditions: * all packet buffers (iov_base) are integral multiples of the * word size. * offset is an integral multiple of the word size. * Packet Invariants: * all buffers are contiguously arrayed in the iovec from 0..niovecs-1 */ afs_int32 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid, char *out) { unsigned int i, j, l, r; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { break; } l += packet->wirevec[i].iov_len; } /* i is the iovec which contains the first little bit of data in which we * are interested. l is the total length of everything prior to this iovec. * j is the number of bytes we can safely copy out of this iovec. * offset only applies to the first iovec. */ r = resid; while ((resid > 0) && (i < packet->niovecs)) { j = MIN(resid, packet->wirevec[i].iov_len - (offset - l)); memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j); resid -= j; out += j; l += packet->wirevec[i].iov_len; offset = l; i++; } return (resid ? (r - resid) : r); } /* Preconditions: * all packet buffers (iov_base) are integral multiples of the * word size. * offset is an integral multiple of the word size. */ afs_int32 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in) { int i, j, l, r; char *b; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { break; } l += packet->wirevec[i].iov_len; } /* i is the iovec which contains the first little bit of data in which we * are interested. l is the total length of everything prior to this iovec. * j is the number of bytes we can safely copy out of this iovec. * offset only applies to the first iovec. */ r = resid; while ((resid > 0) && (i < RX_MAXWVECS)) { if (i >= packet->niovecs) if (rxi_AllocDataBuf(packet, resid, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */ break; b = (char *)(packet->wirevec[i].iov_base) + (offset - l); j = MIN(resid, packet->wirevec[i].iov_len - (offset - l)); memcpy(b, in, j); resid -= j; in += j; l += packet->wirevec[i].iov_len; offset = l; i++; } return (resid ? (r - resid) : r); } int rxi_AllocPackets(int class, int num_pkts, struct rx_queue * q) { register struct rx_packet *p, *np; num_pkts = AllocPacketBufs(class, num_pkts, q); for (queue_Scan(q, p, np, rx_packet)) { RX_PACKET_IOV_FULLINIT(p); } return num_pkts; } #ifdef RX_ENABLE_TSFPQ static int AllocPacketBufs(int class, int num_pkts, struct rx_queue * q) { register struct rx_packet *c; register struct rx_ts_info_t * rx_ts_info; int transfer, alloc; SPLVAR; RX_TS_INFO_GET(rx_ts_info); transfer = num_pkts - rx_ts_info->_FPQ.len; if (transfer > 0) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); if ((transfer + rx_TSFPQGlobSize) <= rx_nFreePackets) { transfer += rx_TSFPQGlobSize; } else if (transfer <= rx_nFreePackets) { transfer = rx_nFreePackets; } else { /* alloc enough for us, plus a few globs for other threads */ alloc = transfer + (3 * rx_TSFPQGlobSize) - rx_nFreePackets; rxi_MorePacketsNoLock(MAX(alloc, rx_initSendWindow)); transfer += rx_TSFPQGlobSize; } RX_TS_FPQ_GTOL2(rx_ts_info, transfer); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } RX_TS_FPQ_CHECKOUT2(rx_ts_info, num_pkts, q); return num_pkts; } #else /* RX_ENABLE_TSFPQ */ static int AllocPacketBufs(int class, int num_pkts, struct rx_queue * q) { struct rx_packet *c; int i, overq = 0; SPLVAR; NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); #ifdef KERNEL for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts)); num_pkts--, overq++); if (overq) { rxi_NeedMorePackets = TRUE; MUTEX_ENTER(&rx_stats_mutex); switch (class) { case RX_PACKET_CLASS_RECEIVE: rx_stats.receivePktAllocFailures++; break; case RX_PACKET_CLASS_SEND: rx_stats.sendPktAllocFailures++; break; case RX_PACKET_CLASS_SPECIAL: rx_stats.specialPktAllocFailures++; break; case RX_PACKET_CLASS_RECV_CBUF: rx_stats.receiveCbufPktAllocFailures++; break; case RX_PACKET_CLASS_SEND_CBUF: rx_stats.sendCbufPktAllocFailures++; break; } MUTEX_EXIT(&rx_stats_mutex); } if (rx_nFreePackets < num_pkts) num_pkts = rx_nFreePackets; if (!num_pkts) { rxi_NeedMorePackets = TRUE; goto done; } #else /* KERNEL */ if (rx_nFreePackets < num_pkts) { rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), rx_initSendWindow)); } #endif /* KERNEL */ for (i=0, c=queue_First(&rx_freePacketQueue, rx_packet); i < num_pkts; i++, c=queue_Next(c, rx_packet)) { RX_FPQ_MARK_USED(c); } queue_SplitBeforeAppend(&rx_freePacketQueue,q,c); rx_nFreePackets -= num_pkts; #ifdef KERNEL done: #endif MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; return num_pkts; } #endif /* RX_ENABLE_TSFPQ */ /* * Free a packet currently used as a continuation buffer */ #ifdef RX_ENABLE_TSFPQ /* num_pkts=0 means queue length is unknown */ int rxi_FreePackets(int num_pkts, struct rx_queue * q) { register struct rx_ts_info_t * rx_ts_info; register struct rx_packet *c, *nc; SPLVAR; osi_Assert(num_pkts >= 0); RX_TS_INFO_GET(rx_ts_info); if (!num_pkts) { for (queue_Scan(q, c, nc, rx_packet), num_pkts++) { rxi_FreeDataBufsTSFPQ(c, 1, 0); } } else { for (queue_Scan(q, c, nc, rx_packet)) { rxi_FreeDataBufsTSFPQ(c, 1, 0); } } if (num_pkts) { RX_TS_FPQ_CHECKIN2(rx_ts_info, num_pkts, q); } if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG(rx_ts_info); /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } return num_pkts; } #else /* RX_ENABLE_TSFPQ */ /* num_pkts=0 means queue length is unknown */ int rxi_FreePackets(int num_pkts, struct rx_queue *q) { struct rx_queue cbs; register struct rx_packet *p, *np; int qlen = 0; SPLVAR; osi_Assert(num_pkts >= 0); queue_Init(&cbs); if (!num_pkts) { for (queue_Scan(q, p, np, rx_packet), num_pkts++) { if (p->niovecs > 2) { qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs); } RX_FPQ_MARK_FREE(p); } if (!num_pkts) return 0; } else { for (queue_Scan(q, p, np, rx_packet)) { if (p->niovecs > 2) { qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs); } RX_FPQ_MARK_FREE(p); } } if (qlen) { queue_SpliceAppend(q, &cbs); qlen += num_pkts; } else qlen = num_pkts; NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); queue_SpliceAppend(&rx_freePacketQueue, q); rx_nFreePackets += qlen; /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; return num_pkts; } #endif /* RX_ENABLE_TSFPQ */ /* this one is kind of awful. * In rxkad, the packet has been all shortened, and everything, ready for * sending. All of a sudden, we discover we need some of that space back. * This isn't terribly general, because it knows that the packets are only * rounded up to the EBS (userdata + security header). */ int rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb) { int i; i = p->niovecs - 1; if (p->wirevec[i].iov_base == (caddr_t) p->localdata) { if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) { p->wirevec[i].iov_len += nb; return 0; } } else { if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) { p->wirevec[i].iov_len += nb; return 0; } } return 0; } /* get sufficient space to store nb bytes of data (or more), and hook * it into the supplied packet. Return nbytes<=0 if successful, otherwise * returns the number of bytes >0 which it failed to come up with. * Don't need to worry about locking on packet, since only * one thread can manipulate one at a time. Locking on continution * packets is handled by AllocPacketBufs */ /* MTUXXX don't need to go throught the for loop if we can trust niovecs */ int rxi_AllocDataBuf(struct rx_packet *p, int nb, int class) { int i, nv; struct rx_queue q; register struct rx_packet *cb, *ncb; /* compute the number of cbuf's we need */ nv = nb / RX_CBUFFERSIZE; if ((nv * RX_CBUFFERSIZE) < nb) nv++; if ((nv + p->niovecs) > RX_MAXWVECS) nv = RX_MAXWVECS - p->niovecs; if (nv < 1) return nb; /* allocate buffers */ queue_Init(&q); nv = AllocPacketBufs(class, nv, &q); /* setup packet iovs */ for (i = p->niovecs, queue_Scan(&q, cb, ncb, rx_packet), i++) { queue_Remove(cb); p->wirevec[i].iov_base = (caddr_t) cb->localdata; p->wirevec[i].iov_len = RX_CBUFFERSIZE; } nb -= (nv * RX_CBUFFERSIZE); p->length += (nv * RX_CBUFFERSIZE); p->niovecs += nv; return nb; } /* Add more packet buffers */ #ifdef RX_ENABLE_TSFPQ void rxi_MorePackets(int apackets) { struct rx_packet *p, *e; register struct rx_ts_info_t * rx_ts_info; int getme; SPLVAR; getme = apackets * sizeof(struct rx_packet); p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme); PIN(p, getme); /* XXXXX */ memset((char *)p, 0, getme); RX_TS_INFO_GET(rx_ts_info); for (e = p + apackets; p < e; p++) { RX_PACKET_IOV_INIT(p); p->niovecs = 2; RX_TS_FPQ_CHECKIN(rx_ts_info,p); } rx_ts_info->_FPQ.delta += apackets; if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG(rx_ts_info); rxi_NeedMorePackets = FALSE; rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } } #else /* RX_ENABLE_TSFPQ */ void rxi_MorePackets(int apackets) { struct rx_packet *p, *e; int getme; SPLVAR; getme = apackets * sizeof(struct rx_packet); p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme); PIN(p, getme); /* XXXXX */ memset((char *)p, 0, getme); NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); for (e = p + apackets; p < e; p++) { RX_PACKET_IOV_INIT(p); p->flags |= RX_PKTFLAG_FREE; p->niovecs = 2; queue_Append(&rx_freePacketQueue, p); } rx_nFreePackets += apackets; rxi_NeedMorePackets = FALSE; rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } #endif /* RX_ENABLE_TSFPQ */ #ifdef RX_ENABLE_TSFPQ void rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local) { struct rx_packet *p, *e; register struct rx_ts_info_t * rx_ts_info; int getme; SPLVAR; getme = apackets * sizeof(struct rx_packet); p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme); PIN(p, getme); /* XXXXX */ memset((char *)p, 0, getme); RX_TS_INFO_GET(rx_ts_info); for (e = p + apackets; p < e; p++) { RX_PACKET_IOV_INIT(p); p->niovecs = 2; RX_TS_FPQ_CHECKIN(rx_ts_info,p); } rx_ts_info->_FPQ.delta += apackets; if (flush_global && (num_keep_local < apackets)) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local)); rxi_NeedMorePackets = FALSE; rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } } #endif /* RX_ENABLE_TSFPQ */ #ifndef KERNEL /* Add more packet buffers */ void rxi_MorePacketsNoLock(int apackets) { struct rx_packet *p, *e; int getme; /* allocate enough packets that 1/4 of the packets will be able * to hold maximal amounts of data */ apackets += (apackets / 4) * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE); getme = apackets * sizeof(struct rx_packet); p = rx_mallocedP = (struct rx_packet *)osi_Alloc(getme); memset((char *)p, 0, getme); for (e = p + apackets; p < e; p++) { RX_PACKET_IOV_INIT(p); p->flags |= RX_PKTFLAG_FREE; p->niovecs = 2; queue_Append(&rx_freePacketQueue, p); } rx_nFreePackets += apackets; #ifdef RX_ENABLE_TSFPQ /* TSFPQ patch also needs to keep track of total packets */ MUTEX_ENTER(&rx_stats_mutex); rx_nPackets += apackets; RX_TS_FPQ_COMPUTE_LIMITS; MUTEX_EXIT(&rx_stats_mutex); #endif /* RX_ENABLE_TSFPQ */ rxi_NeedMorePackets = FALSE; rxi_PacketsUnWait(); } #endif /* !KERNEL */ void rxi_FreeAllPackets(void) { /* must be called at proper interrupt level, etcetera */ /* MTUXXX need to free all Packets */ osi_Free(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet)); UNPIN(rx_mallocedP, (rx_maxReceiveWindow + 2) * sizeof(struct rx_packet)); } #ifdef RX_ENABLE_TSFPQ void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit) { register struct rx_ts_info_t * rx_ts_info; register int xfer; SPLVAR; RX_TS_INFO_GET(rx_ts_info); if (num_keep_local != rx_ts_info->_FPQ.len) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); if (num_keep_local < rx_ts_info->_FPQ.len) { xfer = rx_ts_info->_FPQ.len - num_keep_local; RX_TS_FPQ_LTOG2(rx_ts_info, xfer); rxi_PacketsUnWait(); } else { xfer = num_keep_local - rx_ts_info->_FPQ.len; if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit) xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len; if (rx_nFreePackets < xfer) { rxi_MorePacketsNoLock(xfer - rx_nFreePackets); } RX_TS_FPQ_GTOL2(rx_ts_info, xfer); } MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } } void rxi_FlushLocalPacketsTSFPQ(void) { rxi_AdjustLocalPacketsTSFPQ(0, 0); } #endif /* RX_ENABLE_TSFPQ */ /* Allocate more packets iff we need more continuation buffers */ /* In kernel, can't page in memory with interrupts disabled, so we * don't use the event mechanism. */ void rx_CheckPackets(void) { if (rxi_NeedMorePackets) { rxi_MorePackets(rx_initSendWindow); } } /* In the packet freeing routine below, the assumption is that we want all of the packets to be used equally frequently, so that we don't get packet buffers paging out. It would be just as valid to assume that we DO want them to page out if not many are being used. In any event, we assume the former, and append the packets to the end of the free list. */ /* This explanation is bogus. The free list doesn't remain in any kind of useful order for afs_int32: the packets in use get pretty much randomly scattered across all the pages. In order to permit unused {packets,bufs} to page out, they must be stored so that packets which are adjacent in memory are adjacent in the free list. An array springs rapidly to mind. */ /* Actually free the packet p. */ #ifdef RX_ENABLE_TSFPQ void rxi_FreePacketNoLock(struct rx_packet *p) { register struct rx_ts_info_t * rx_ts_info; dpf(("Free %lx\n", (unsigned long)p)); RX_TS_INFO_GET(rx_ts_info); RX_TS_FPQ_CHECKIN(rx_ts_info,p); if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) { RX_TS_FPQ_LTOG(rx_ts_info); } } #else /* RX_ENABLE_TSFPQ */ void rxi_FreePacketNoLock(struct rx_packet *p) { dpf(("Free %lx\n", (unsigned long)p)); RX_FPQ_MARK_FREE(p); rx_nFreePackets++; queue_Append(&rx_freePacketQueue, p); } #endif /* RX_ENABLE_TSFPQ */ #ifdef RX_ENABLE_TSFPQ void rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global) { register struct rx_ts_info_t * rx_ts_info; dpf(("Free %lx\n", (unsigned long)p)); RX_TS_INFO_GET(rx_ts_info); RX_TS_FPQ_CHECKIN(rx_ts_info,p); if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG(rx_ts_info); /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } } #endif /* RX_ENABLE_TSFPQ */ /* free continuation buffers off a packet into a queue of buffers */ static int rxi_FreeDataBufsToQueue(struct rx_packet *p, int first, struct rx_queue * q) { struct iovec *iov; struct rx_packet * cb; int count = 0; if (first < 2) first = 2; for (; first < p->niovecs; first++, count++) { iov = &p->wirevec[first]; if (!iov->iov_base) osi_Panic("rxi_PacketIOVToQueue: unexpected NULL iov"); cb = RX_CBUF_TO_PACKET(iov->iov_base, p); RX_FPQ_MARK_FREE(cb); queue_Append(q, cb); } p->length = 0; p->niovecs = 0; return count; } int rxi_FreeDataBufsNoLock(struct rx_packet *p, int first) { struct iovec *iov, *end; if (first != 1) /* MTUXXX */ osi_Panic("FreeDataBufs 1: first must be 1"); iov = &p->wirevec[1]; end = iov + (p->niovecs - 1); if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */ osi_Panic("FreeDataBufs 2: vec 1 must be localdata"); for (iov++; iov < end; iov++) { if (!iov->iov_base) osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL"); rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p)); } p->length = 0; p->niovecs = 0; return 0; } #ifdef RX_ENABLE_TSFPQ int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, int first, int flush_global) { struct iovec *iov, *end; register struct rx_ts_info_t * rx_ts_info; RX_TS_INFO_GET(rx_ts_info); if (first != 1) /* MTUXXX */ osi_Panic("FreeDataBufs 1: first must be 1"); iov = &p->wirevec[1]; end = iov + (p->niovecs - 1); if (iov->iov_base != (caddr_t) p->localdata) /* MTUXXX */ osi_Panic("FreeDataBufs 2: vec 1 must be localdata"); for (iov++; iov < end; iov++) { if (!iov->iov_base) osi_Panic("FreeDataBufs 3: vecs 2-niovecs must not be NULL"); RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p)); } p->length = 0; p->niovecs = 0; if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG(rx_ts_info); /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } return 0; } #endif /* RX_ENABLE_TSFPQ */ int rxi_nBadIovecs = 0; /* rxi_RestoreDataBufs * * Restore the correct sizes to the iovecs. Called when reusing a packet * for reading off the wire. */ void rxi_RestoreDataBufs(struct rx_packet *p) { int i; struct iovec *iov = &p->wirevec[2]; RX_PACKET_IOV_INIT(p); for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) { if (!iov->iov_base) { rxi_nBadIovecs++; p->niovecs = i; break; } iov->iov_len = RX_CBUFFERSIZE; } } #ifdef RX_ENABLE_TSFPQ int rxi_TrimDataBufs(struct rx_packet *p, int first) { int length; struct iovec *iov, *end; register struct rx_ts_info_t * rx_ts_info; SPLVAR; if (first != 1) osi_Panic("TrimDataBufs 1: first must be 1"); /* Skip over continuation buffers containing message data */ iov = &p->wirevec[2]; end = iov + (p->niovecs - 2); length = p->length - p->wirevec[1].iov_len; for (; iov < end && length > 0; iov++) { if (!iov->iov_base) osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL"); length -= iov->iov_len; } /* iov now points to the first empty data buffer. */ if (iov >= end) return 0; RX_TS_INFO_GET(rx_ts_info); for (; iov < end; iov++) { if (!iov->iov_base) osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL"); RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p)); p->niovecs--; } if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) { NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); RX_TS_FPQ_LTOG(rx_ts_info); rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } return 0; } #else /* RX_ENABLE_TSFPQ */ int rxi_TrimDataBufs(struct rx_packet *p, int first) { int length; struct iovec *iov, *end; SPLVAR; if (first != 1) osi_Panic("TrimDataBufs 1: first must be 1"); /* Skip over continuation buffers containing message data */ iov = &p->wirevec[2]; end = iov + (p->niovecs - 2); length = p->length - p->wirevec[1].iov_len; for (; iov < end && length > 0; iov++) { if (!iov->iov_base) osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL"); length -= iov->iov_len; } /* iov now points to the first empty data buffer. */ if (iov >= end) return 0; NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); for (; iov < end; iov++) { if (!iov->iov_base) osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL"); rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p)); p->niovecs--; } rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; return 0; } #endif /* RX_ENABLE_TSFPQ */ /* Free the packet p. P is assumed not to be on any queue, i.e. * remove it yourself first if you call this routine. */ #ifdef RX_ENABLE_TSFPQ void rxi_FreePacket(struct rx_packet *p) { rxi_FreeDataBufsTSFPQ(p, 1, 0); rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL); } #else /* RX_ENABLE_TSFPQ */ void rxi_FreePacket(struct rx_packet *p) { SPLVAR; NETPRI; MUTEX_ENTER(&rx_freePktQ_lock); rxi_FreeDataBufsNoLock(p, 1); rxi_FreePacketNoLock(p); /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MUTEX_EXIT(&rx_freePktQ_lock); USERPRI; } #endif /* RX_ENABLE_TSFPQ */ /* rxi_AllocPacket sets up p->length so it reflects the number of * bytes in the packet at this point, **not including** the header. * The header is absolutely necessary, besides, this is the way the * length field is usually used */ #ifdef RX_ENABLE_TSFPQ struct rx_packet * rxi_AllocPacketNoLock(int class) { register struct rx_packet *p; register struct rx_ts_info_t * rx_ts_info; RX_TS_INFO_GET(rx_ts_info); #ifdef KERNEL if (rxi_OverQuota(class)) { rxi_NeedMorePackets = TRUE; MUTEX_ENTER(&rx_stats_mutex); switch (class) { case RX_PACKET_CLASS_RECEIVE: rx_stats.receivePktAllocFailures++; break; case RX_PACKET_CLASS_SEND: rx_stats.sendPktAllocFailures++; break; case RX_PACKET_CLASS_SPECIAL: rx_stats.specialPktAllocFailures++; break; case RX_PACKET_CLASS_RECV_CBUF: rx_stats.receiveCbufPktAllocFailures++; break; case RX_PACKET_CLASS_SEND_CBUF: rx_stats.sendCbufPktAllocFailures++; break; } MUTEX_EXIT(&rx_stats_mutex); return (struct rx_packet *)0; } #endif /* KERNEL */ MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetRequests++; MUTEX_EXIT(&rx_stats_mutex); if (queue_IsEmpty(&rx_ts_info->_FPQ)) { #ifdef KERNEL if (queue_IsEmpty(&rx_freePacketQueue)) osi_Panic("rxi_AllocPacket error"); #else /* KERNEL */ if (queue_IsEmpty(&rx_freePacketQueue)) rxi_MorePacketsNoLock(rx_initSendWindow); #endif /* KERNEL */ RX_TS_FPQ_GTOL(rx_ts_info); } RX_TS_FPQ_CHECKOUT(rx_ts_info,p); dpf(("Alloc %lx, class %d\n", (unsigned long)p, class)); /* have to do this here because rx_FlushWrite fiddles with the iovs in * order to truncate outbound packets. In the near future, may need * to allocate bufs from a static pool here, and/or in AllocSendPacket */ RX_PACKET_IOV_FULLINIT(p); return p; } #else /* RX_ENABLE_TSFPQ */ struct rx_packet * rxi_AllocPacketNoLock(int class) { register struct rx_packet *p; #ifdef KERNEL if (rxi_OverQuota(class)) { rxi_NeedMorePackets = TRUE; MUTEX_ENTER(&rx_stats_mutex); switch (class) { case RX_PACKET_CLASS_RECEIVE: rx_stats.receivePktAllocFailures++; break; case RX_PACKET_CLASS_SEND: rx_stats.sendPktAllocFailures++; break; case RX_PACKET_CLASS_SPECIAL: rx_stats.specialPktAllocFailures++; break; case RX_PACKET_CLASS_RECV_CBUF: rx_stats.receiveCbufPktAllocFailures++; break; case RX_PACKET_CLASS_SEND_CBUF: rx_stats.sendCbufPktAllocFailures++; break; } MUTEX_EXIT(&rx_stats_mutex); return (struct rx_packet *)0; } #endif /* KERNEL */ MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetRequests++; MUTEX_EXIT(&rx_stats_mutex); #ifdef KERNEL if (queue_IsEmpty(&rx_freePacketQueue)) osi_Panic("rxi_AllocPacket error"); #else /* KERNEL */ if (queue_IsEmpty(&rx_freePacketQueue)) rxi_MorePacketsNoLock(rx_initSendWindow); #endif /* KERNEL */ rx_nFreePackets--; p = queue_First(&rx_freePacketQueue, rx_packet); queue_Remove(p); RX_FPQ_MARK_USED(p); dpf(("Alloc %lx, class %d\n", (unsigned long)p, class)); /* have to do this here because rx_FlushWrite fiddles with the iovs in * order to truncate outbound packets. In the near future, may need * to allocate bufs from a static pool here, and/or in AllocSendPacket */ RX_PACKET_IOV_FULLINIT(p); return p; } #endif /* RX_ENABLE_TSFPQ */ #ifdef RX_ENABLE_TSFPQ struct rx_packet * rxi_AllocPacketTSFPQ(int class, int pull_global) { register struct rx_packet *p; register struct rx_ts_info_t * rx_ts_info; RX_TS_INFO_GET(rx_ts_info); MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetRequests++; MUTEX_EXIT(&rx_stats_mutex); if (pull_global && queue_IsEmpty(&rx_ts_info->_FPQ)) { MUTEX_ENTER(&rx_freePktQ_lock); if (queue_IsEmpty(&rx_freePacketQueue)) rxi_MorePacketsNoLock(rx_initSendWindow); RX_TS_FPQ_GTOL(rx_ts_info); MUTEX_EXIT(&rx_freePktQ_lock); } else if (queue_IsEmpty(&rx_ts_info->_FPQ)) { return NULL; } RX_TS_FPQ_CHECKOUT(rx_ts_info,p); dpf(("Alloc %lx, class %d\n", (unsigned long)p, class)); /* have to do this here because rx_FlushWrite fiddles with the iovs in * order to truncate outbound packets. In the near future, may need * to allocate bufs from a static pool here, and/or in AllocSendPacket */ RX_PACKET_IOV_FULLINIT(p); return p; } #endif /* RX_ENABLE_TSFPQ */ #ifdef RX_ENABLE_TSFPQ struct rx_packet * rxi_AllocPacket(int class) { register struct rx_packet *p; p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL); return p; } #else /* RX_ENABLE_TSFPQ */ struct rx_packet * rxi_AllocPacket(int class) { register struct rx_packet *p; MUTEX_ENTER(&rx_freePktQ_lock); p = rxi_AllocPacketNoLock(class); MUTEX_EXIT(&rx_freePktQ_lock); return p; } #endif /* RX_ENABLE_TSFPQ */ /* This guy comes up with as many buffers as it {takes,can get} given * the MTU for this call. It also sets the packet length before * returning. caution: this is often called at NETPRI * Called with call locked. */ struct rx_packet * rxi_AllocSendPacket(register struct rx_call *call, int want) { register struct rx_packet *p = (struct rx_packet *)0; register int mud; register unsigned delta; SPLVAR; mud = call->MTU - RX_HEADER_SIZE; delta = rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) + rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call)); #ifdef RX_ENABLE_TSFPQ if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) { want += delta; want = MIN(want, mud); if ((unsigned)want > p->length) (void)rxi_AllocDataBuf(p, (want - p->length), RX_PACKET_CLASS_SEND_CBUF); if ((unsigned)p->length > mud) p->length = mud; if (delta >= p->length) { rxi_FreePacket(p); p = NULL; } else { p->length -= delta; } return p; } #endif /* RX_ENABLE_TSFPQ */ while (!(call->error)) { MUTEX_ENTER(&rx_freePktQ_lock); /* if an error occurred, or we get the packet we want, we're done */ if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) { MUTEX_EXIT(&rx_freePktQ_lock); want += delta; want = MIN(want, mud); if ((unsigned)want > p->length) (void)rxi_AllocDataBuf(p, (want - p->length), RX_PACKET_CLASS_SEND_CBUF); if ((unsigned)p->length > mud) p->length = mud; if (delta >= p->length) { rxi_FreePacket(p); p = NULL; } else { p->length -= delta; } break; } /* no error occurred, and we didn't get a packet, so we sleep. * At this point, we assume that packets will be returned * sooner or later, as packets are acknowledged, and so we * just wait. */ NETPRI; call->flags |= RX_CALL_WAIT_PACKETS; CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET); MUTEX_EXIT(&call->lock); rx_waitingForPackets = 1; #ifdef RX_ENABLE_LOCKS CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock); #else osi_rxSleep(&rx_waitingForPackets); #endif MUTEX_EXIT(&rx_freePktQ_lock); MUTEX_ENTER(&call->lock); CALL_RELE(call, RX_CALL_REFCOUNT_PACKET); call->flags &= ~RX_CALL_WAIT_PACKETS; USERPRI; } return p; } #ifndef KERNEL #ifdef AFS_NT40_ENV /* Windows does not use file descriptors. */ #define CountFDs(amax) 0 #else /* count the number of used FDs */ static int CountFDs(register int amax) { struct stat tstat; register int i, code; register int count; count = 0; for (i = 0; i < amax; i++) { code = fstat(i, &tstat); if (code == 0) count++; } return count; } #endif /* AFS_NT40_ENV */ #else /* KERNEL */ #define CountFDs(amax) amax #endif /* KERNEL */ #if !defined(KERNEL) || defined(UKERNEL) /* This function reads a single packet from the interface into the * supplied packet buffer (*p). Return 0 if the packet is bogus. The * (host,port) of the sender are stored in the supplied variables, and * the data length of the packet is stored in the packet structure. * The header is decoded. */ int rxi_ReadPacket(osi_socket socket, register struct rx_packet *p, afs_uint32 * host, u_short * port) { struct sockaddr_in from; int nbytes; afs_int32 rlen; register afs_int32 tlen, savelen; struct msghdr msg; rx_computelen(p, tlen); rx_SetDataSize(p, tlen); /* this is the size of the user data area */ tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */ rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check * it once in order to avoid races. */ tlen = rlen - tlen; if (tlen > 0) { tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF); if (tlen > 0) { tlen = rlen - tlen; } else tlen = rlen; } else tlen = rlen; /* Extend the last iovec for padding, it's just to make sure that the * read doesn't return more data than we expect, and is done to get around * our problems caused by the lack of a length field in the rx header. * Use the extra buffer that follows the localdata in each packet * structure. */ savelen = p->wirevec[p->niovecs - 1].iov_len; p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE; memset((char *)&msg, 0, sizeof(msg)); msg.msg_name = (char *)&from; msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = p->wirevec; msg.msg_iovlen = p->niovecs; nbytes = rxi_Recvmsg(socket, &msg, 0); /* restore the vec to its correct state */ p->wirevec[p->niovecs - 1].iov_len = savelen; p->length = (nbytes - RX_HEADER_SIZE); if ((nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */ if (nbytes > 0) rxi_MorePackets(rx_initSendWindow); else if (nbytes < 0 && errno == EWOULDBLOCK) { MUTEX_ENTER(&rx_stats_mutex); rx_stats.noPacketOnRead++; MUTEX_EXIT(&rx_stats_mutex); } else { MUTEX_ENTER(&rx_stats_mutex); rx_stats.bogusPacketOnRead++; rx_stats.bogusHost = from.sin_addr.s_addr; MUTEX_EXIT(&rx_stats_mutex); dpf(("B: bogus packet from [%x,%d] nb=%d", ntohl(from.sin_addr.s_addr), ntohs(from.sin_port), nbytes)); } return 0; } #ifdef RXDEBUG else if ((rx_intentionallyDroppedOnReadPer100 > 0) && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) { rxi_DecodePacketHeader(p); *host = from.sin_addr.s_addr; *port = from.sin_port; dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d", p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(host), ntohs(port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, p->length)); rxi_TrimDataBufs(p, 1); return 0; } #endif else { /* Extract packet header. */ rxi_DecodePacketHeader(p); *host = from.sin_addr.s_addr; *port = from.sin_port; if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) { struct rx_peer *peer; MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetsRead[p->header.type - 1]++; MUTEX_EXIT(&rx_stats_mutex); /* * Try to look up this peer structure. If it doesn't exist, * don't create a new one - * we don't keep count of the bytes sent/received if a peer * structure doesn't already exist. * * The peer/connection cleanup code assumes that there is 1 peer * per connection. If we actually created a peer structure here * and this packet was an rxdebug packet, the peer structure would * never be cleaned up. */ peer = rxi_FindPeer(*host, *port, 0, 0); /* Since this may not be associated with a connection, * it may have no refCount, meaning we could race with * ReapConnections */ if (peer && (peer->refCount > 0)) { MUTEX_ENTER(&peer->peer_lock); hadd32(peer->bytesReceived, p->length); MUTEX_EXIT(&peer->peer_lock); } } /* Free any empty packet buffers at the end of this packet */ rxi_TrimDataBufs(p, 1); return 1; } } #endif /* !KERNEL || UKERNEL */ /* This function splits off the first packet in a jumbo packet. * As of AFS 3.5, jumbograms contain more than one fixed size * packet, and the RX_JUMBO_PACKET flag is set in all but the * last packet header. All packets (except the last) are padded to * fall on RX_CBUFFERSIZE boundaries. * HACK: We store the length of the first n-1 packets in the * last two pad bytes. */ struct rx_packet * rxi_SplitJumboPacket(register struct rx_packet *p, afs_int32 host, short port, int first) { struct rx_packet *np; struct rx_jumboHeader *jp; int niov, i; struct iovec *iov; int length; afs_uint32 temp; /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE * bytes in length. All but the first packet are preceded by * an abbreviated four byte header. The length of the last packet * is calculated from the size of the jumbogram. */ length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE; if ((int)p->length < length) { dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length)); return NULL; } niov = p->niovecs - 2; if (niov < 1) { dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs)); return NULL; } iov = &p->wirevec[2]; np = RX_CBUF_TO_PACKET(iov->iov_base, p); /* Get a pointer to the abbreviated packet header */ jp = (struct rx_jumboHeader *) ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE); /* Set up the iovecs for the next packet */ np->wirevec[0].iov_base = (char *)(&np->wirehead[0]); np->wirevec[0].iov_len = sizeof(struct rx_header); np->wirevec[1].iov_base = (char *)(&np->localdata[0]); np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE; np->niovecs = niov + 1; for (i = 2, iov++; i <= niov; i++, iov++) { np->wirevec[i] = *iov; } np->length = p->length - length; p->length = RX_JUMBOBUFFERSIZE; p->niovecs = 2; /* Convert the jumbo packet header to host byte order */ temp = ntohl(*(afs_uint32 *) jp); jp->flags = (u_char) (temp >> 24); jp->cksum = (u_short) (temp); /* Fill in the packet header */ np->header = p->header; np->header.serial = p->header.serial + 1; np->header.seq = p->header.seq + 1; np->header.flags = jp->flags; np->header.spare = jp->cksum; return np; } #ifndef KERNEL /* Send a udp datagram */ int osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs, int length, int istack) { struct msghdr msg; int ret; memset(&msg, 0, sizeof(msg)); msg.msg_iov = dvec; msg.msg_iovlen = nvecs; msg.msg_name = addr; msg.msg_namelen = sizeof(struct sockaddr_in); ret = rxi_Sendmsg(socket, &msg, 0); return ret; } #elif !defined(UKERNEL) /* * message receipt is done in rxk_input or rx_put. */ #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV) /* * Copy an mblock to the contiguous area pointed to by cp. * MTUXXX Supposed to skip bytes and copy bytes, * but it doesn't really. * Returns the number of bytes not transferred. * The message is NOT changed. */ static int cpytoc(mblk_t * mp, register int off, register int len, register char *cp) { register int n; for (; mp && len > 0; mp = mp->b_cont) { if (mp->b_datap->db_type != M_DATA) { return -1; } n = MIN(len, (mp->b_wptr - mp->b_rptr)); memcpy(cp, (char *)mp->b_rptr, n); cp += n; len -= n; mp->b_rptr += n; } return (len); } /* MTUXXX Supposed to skip bytes and copy bytes, * but it doesn't really. * This sucks, anyway, do it like m_cpy.... below */ static int cpytoiovec(mblk_t * mp, int off, int len, register struct iovec *iovs, int niovs) { register int m, n, o, t, i; for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) { if (mp->b_datap->db_type != M_DATA) { return -1; } n = MIN(len, (mp->b_wptr - mp->b_rptr)); len -= n; while (n) { if (!t) { o = 0; i++; t = iovs[i].iov_len; } m = MIN(n, t); memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m); mp->b_rptr += m; o += m; t -= m; n -= m; } } return (len); } #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d) #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e) #else #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV) static int m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs) { caddr_t p1, p2; unsigned int l1, l2, i, t; if (m == NULL || off < 0 || len < 0 || iovs == NULL) osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */ while (off && m) if (m->m_len <= off) { off -= m->m_len; m = m->m_next; continue; } else break; if (m == NULL) return len; p1 = mtod(m, caddr_t) + off; l1 = m->m_len - off; i = 0; p2 = iovs[0].iov_base; l2 = iovs[0].iov_len; while (len) { t = MIN(l1, MIN(l2, (unsigned int)len)); memcpy(p2, p1, t); p1 += t; p2 += t; l1 -= t; l2 -= t; len -= t; if (!l1) { m = m->m_next; if (!m) break; p1 = mtod(m, caddr_t); l1 = m->m_len; } if (!l2) { if (++i >= niovs) break; p2 = iovs[i].iov_base; l2 = iovs[i].iov_len; } } return len; } #endif /* LINUX */ #endif /* AFS_SUN5_ENV */ #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV) int rx_mb_to_packet(amb, free, hdr_len, data_len, phandle) #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV) mblk_t *amb; #else struct mbuf *amb; #endif void (*free) (); struct rx_packet *phandle; int hdr_len, data_len; { register int code; code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs); (*free) (amb); return code; } #endif /* LINUX */ #endif /*KERNEL && !UKERNEL */ /* send a response to a debug packet */ struct rx_packet * rxi_ReceiveDebugPacket(register struct rx_packet *ap, osi_socket asocket, afs_int32 ahost, short aport, int istack) { struct rx_debugIn tin; afs_int32 tl; struct rx_serverQueueEntry *np, *nqe; /* * Only respond to client-initiated Rx debug packets, * and clear the client flag in the response. */ if (ap->header.flags & RX_CLIENT_INITIATED) { ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED; rxi_EncodePacketHeader(ap); } else { return ap; } rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin); /* all done with packet, now set length to the truth, so we can * reuse this packet */ rx_computelen(ap, ap->length); tin.type = ntohl(tin.type); tin.index = ntohl(tin.index); switch (tin.type) { case RX_DEBUGI_GETSTATS:{ struct rx_debugStats tstat; /* get basic stats */ memset((char *)&tstat, 0, sizeof(tstat)); /* make sure spares are zero */ tstat.version = RX_DEBUGI_VERSION; #ifndef RX_ENABLE_LOCKS tstat.waitingForPackets = rx_waitingForPackets; #endif MUTEX_ENTER(&rx_serverPool_lock); tstat.nFreePackets = htonl(rx_nFreePackets); tstat.callsExecuted = htonl(rxi_nCalls); tstat.packetReclaims = htonl(rx_packetReclaims); tstat.usedFDs = CountFDs(64); tstat.nWaiting = htonl(rx_nWaiting); tstat.nWaited = htonl(rx_nWaited); queue_Count(&rx_idleServerQueue, np, nqe, rx_serverQueueEntry, tstat.idleThreads); MUTEX_EXIT(&rx_serverPool_lock); tstat.idleThreads = htonl(tstat.idleThreads); tl = sizeof(struct rx_debugStats) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF); if (tl <= 0) { rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *)&tstat); ap->length = sizeof(struct rx_debugStats); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); rx_computelen(ap, ap->length); } break; } case RX_DEBUGI_GETALLCONN: case RX_DEBUGI_GETCONN:{ int i, j; register struct rx_connection *tc; struct rx_call *tcall; struct rx_debugConn tconn; int all = (tin.type == RX_DEBUGI_GETALLCONN); tl = sizeof(struct rx_debugConn) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF); if (tl > 0) return ap; memset((char *)&tconn, 0, sizeof(tconn)); /* make sure spares are zero */ /* get N'th (maybe) "interesting" connection info */ for (i = 0; i < rx_hashTableSize; i++) { #if !defined(KERNEL) /* the time complexity of the algorithm used here * exponentially increses with the number of connections. */ #ifdef AFS_PTHREAD_ENV pthread_yield(); #else (void)IOMGR_Poll(); #endif #endif MUTEX_ENTER(&rx_connHashTable_lock); /* We might be slightly out of step since we are not * locking each call, but this is only debugging output. */ for (tc = rx_connHashTable[i]; tc; tc = tc->next) { if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) { tconn.host = tc->peer->host; tconn.port = tc->peer->port; tconn.cid = htonl(tc->cid); tconn.epoch = htonl(tc->epoch); tconn.serial = htonl(tc->serial); for (j = 0; j < RX_MAXCALLS; j++) { tconn.callNumber[j] = htonl(tc->callNumber[j]); if ((tcall = tc->call[j])) { tconn.callState[j] = tcall->state; tconn.callMode[j] = tcall->mode; tconn.callFlags[j] = tcall->flags; if (queue_IsNotEmpty(&tcall->rq)) tconn.callOther[j] |= RX_OTHER_IN; if (queue_IsNotEmpty(&tcall->tq)) tconn.callOther[j] |= RX_OTHER_OUT; } else tconn.callState[j] = RX_STATE_NOTINIT; } tconn.natMTU = htonl(tc->peer->natMTU); tconn.error = htonl(tc->error); tconn.flags = tc->flags; tconn.type = tc->type; tconn.securityIndex = tc->securityIndex; if (tc->securityObject) { RXS_GetStats(tc->securityObject, tc, &tconn.secStats); #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a)) #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a)) DOHTONL(flags); DOHTONL(expires); DOHTONL(packetsReceived); DOHTONL(packetsSent); DOHTONL(bytesReceived); DOHTONL(bytesSent); for (i = 0; i < sizeof(tconn.secStats.spares) / sizeof(short); i++) DOHTONS(spares[i]); for (i = 0; i < sizeof(tconn.secStats.sparel) / sizeof(afs_int32); i++) DOHTONL(sparel[i]); } MUTEX_EXIT(&rx_connHashTable_lock); rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn); tl = ap->length; ap->length = sizeof(struct rx_debugConn); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; return ap; } } MUTEX_EXIT(&rx_connHashTable_lock); } /* if we make it here, there are no interesting packets */ tconn.cid = htonl(0xffffffff); /* means end */ rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *)&tconn); tl = ap->length; ap->length = sizeof(struct rx_debugConn); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; break; } /* * Pass back all the peer structures we have available */ case RX_DEBUGI_GETPEER:{ int i; register struct rx_peer *tp; struct rx_debugPeer tpeer; tl = sizeof(struct rx_debugPeer) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF); if (tl > 0) return ap; memset((char *)&tpeer, 0, sizeof(tpeer)); for (i = 0; i < rx_hashTableSize; i++) { #if !defined(KERNEL) /* the time complexity of the algorithm used here * exponentially increses with the number of peers. * * Yielding after processing each hash table entry * and dropping rx_peerHashTable_lock. * also increases the risk that we will miss a new * entry - but we are willing to live with this * limitation since this is meant for debugging only */ #ifdef AFS_PTHREAD_ENV pthread_yield(); #else (void)IOMGR_Poll(); #endif #endif MUTEX_ENTER(&rx_peerHashTable_lock); for (tp = rx_peerHashTable[i]; tp; tp = tp->next) { if (tin.index-- <= 0) { tpeer.host = tp->host; tpeer.port = tp->port; tpeer.ifMTU = htons(tp->ifMTU); tpeer.idleWhen = htonl(tp->idleWhen); tpeer.refCount = htons(tp->refCount); tpeer.burstSize = tp->burstSize; tpeer.burst = tp->burst; tpeer.burstWait.sec = htonl(tp->burstWait.sec); tpeer.burstWait.usec = htonl(tp->burstWait.usec); tpeer.rtt = htonl(tp->rtt); tpeer.rtt_dev = htonl(tp->rtt_dev); tpeer.timeout.sec = htonl(tp->timeout.sec); tpeer.timeout.usec = htonl(tp->timeout.usec); tpeer.nSent = htonl(tp->nSent); tpeer.reSends = htonl(tp->reSends); tpeer.inPacketSkew = htonl(tp->inPacketSkew); tpeer.outPacketSkew = htonl(tp->outPacketSkew); tpeer.rateFlag = htonl(tp->rateFlag); tpeer.natMTU = htons(tp->natMTU); tpeer.maxMTU = htons(tp->maxMTU); tpeer.maxDgramPackets = htons(tp->maxDgramPackets); tpeer.ifDgramPackets = htons(tp->ifDgramPackets); tpeer.MTU = htons(tp->MTU); tpeer.cwind = htons(tp->cwind); tpeer.nDgramPackets = htons(tp->nDgramPackets); tpeer.congestSeq = htons(tp->congestSeq); tpeer.bytesSent.high = htonl(tp->bytesSent.high); tpeer.bytesSent.low = htonl(tp->bytesSent.low); tpeer.bytesReceived.high = htonl(tp->bytesReceived.high); tpeer.bytesReceived.low = htonl(tp->bytesReceived.low); MUTEX_EXIT(&rx_peerHashTable_lock); rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer); tl = ap->length; ap->length = sizeof(struct rx_debugPeer); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; return ap; } } MUTEX_EXIT(&rx_peerHashTable_lock); } /* if we make it here, there are no interesting packets */ tpeer.host = htonl(0xffffffff); /* means end */ rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer), (char *)&tpeer); tl = ap->length; ap->length = sizeof(struct rx_debugPeer); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; break; } case RX_DEBUGI_RXSTATS:{ int i; afs_int32 *s; tl = sizeof(rx_stats) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF); if (tl > 0) return ap; /* Since its all int32s convert to network order with a loop. */ MUTEX_ENTER(&rx_stats_mutex); s = (afs_int32 *) & rx_stats; for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++) rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s)); tl = ap->length; ap->length = sizeof(rx_stats); MUTEX_EXIT(&rx_stats_mutex); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; break; } default: /* error response packet */ tin.type = htonl(RX_DEBUGI_BADTYPE); tin.index = tin.type; rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin); tl = ap->length; ap->length = sizeof(struct rx_debugIn); rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; break; } return ap; } struct rx_packet * rxi_ReceiveVersionPacket(register struct rx_packet *ap, osi_socket asocket, afs_int32 ahost, short aport, int istack) { afs_int32 tl; /* * Only respond to client-initiated version requests, and * clear that flag in the response. */ if (ap->header.flags & RX_CLIENT_INITIATED) { char buf[66]; ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED; rxi_EncodePacketHeader(ap); memset(buf, 0, sizeof(buf)); strncpy(buf, cml_version_number + 4, sizeof(buf) - 1); rx_packetwrite(ap, 0, 65, buf); tl = ap->length; ap->length = 65; rxi_SendDebugPacket(ap, asocket, ahost, aport, istack); ap->length = tl; } return ap; } /* send a debug packet back to the sender */ static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket, afs_int32 ahost, short aport, afs_int32 istack) { struct sockaddr_in taddr; int i; int nbytes; int saven = 0; size_t savelen = 0; #ifdef KERNEL int waslocked = ISAFS_GLOCK(); #endif taddr.sin_family = AF_INET; taddr.sin_port = aport; taddr.sin_addr.s_addr = ahost; #ifdef STRUCT_SOCKADDR_HAS_SA_LEN taddr.sin_len = sizeof(struct sockaddr_in); #endif /* We need to trim the niovecs. */ nbytes = apacket->length; for (i = 1; i < apacket->niovecs; i++) { if (nbytes <= apacket->wirevec[i].iov_len) { savelen = apacket->wirevec[i].iov_len; saven = apacket->niovecs; apacket->wirevec[i].iov_len = nbytes; apacket->niovecs = i + 1; /* so condition fails because i == niovecs */ } else nbytes -= apacket->wirevec[i].iov_len; } #ifdef KERNEL #ifdef RX_KERNEL_TRACE if (ICL_SETACTIVE(afs_iclSetp)) { if (!waslocked) AFS_GLOCK(); afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING, "before osi_NetSend()"); AFS_GUNLOCK(); } else #else if (waslocked) AFS_GUNLOCK(); #endif #endif /* debug packets are not reliably delivered, hence the cast below. */ (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs, apacket->length + RX_HEADER_SIZE, istack); #ifdef KERNEL #ifdef RX_KERNEL_TRACE if (ICL_SETACTIVE(afs_iclSetp)) { AFS_GLOCK(); afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING, "after osi_NetSend()"); if (!waslocked) AFS_GUNLOCK(); } else #else if (waslocked) AFS_GLOCK(); #endif #endif if (saven) { /* means we truncated the packet above. */ apacket->wirevec[i - 1].iov_len = savelen; apacket->niovecs = saven; } } /* Send the packet to appropriate destination for the specified * call. The header is first encoded and placed in the packet. */ void rxi_SendPacket(struct rx_call *call, struct rx_connection *conn, struct rx_packet *p, int istack) { #if defined(KERNEL) int waslocked; #endif int code; struct sockaddr_in addr; register struct rx_peer *peer = conn->peer; osi_socket socket; #ifdef RXDEBUG char deliveryType = 'S'; #endif /* The address we're sending the packet to */ memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = peer->port; addr.sin_addr.s_addr = peer->host; /* This stuff should be revamped, I think, so that most, if not * all, of the header stuff is always added here. We could * probably do away with the encode/decode routines. XXXXX */ /* Stamp each packet with a unique serial number. The serial * number is maintained on a connection basis because some types * of security may be based on the serial number of the packet, * and security is handled on a per authenticated-connection * basis. */ /* Pre-increment, to guarantee no zero serial number; a zero * serial number means the packet was never sent. */ MUTEX_ENTER(&conn->conn_data_lock); p->header.serial = ++conn->serial; MUTEX_EXIT(&conn->conn_data_lock); /* This is so we can adjust retransmit time-outs better in the face of * rapidly changing round-trip times. RTO estimation is not a la Karn. */ if (p->firstSerial == 0) { p->firstSerial = p->header.serial; } #ifdef RXDEBUG /* If an output tracer function is defined, call it with the packet and * network address. Note this function may modify its arguments. */ if (rx_almostSent) { int drop = (*rx_almostSent) (p, &addr); /* drop packet if return value is non-zero? */ if (drop) deliveryType = 'D'; /* Drop the packet */ } #endif /* Get network byte order header */ rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to * touch ALL the fields */ /* Send the packet out on the same socket that related packets are being * received on */ socket = (conn->type == RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket); #ifdef RXDEBUG /* Possibly drop this packet, for testing purposes */ if ((deliveryType == 'D') || ((rx_intentionallyDroppedPacketsPer100 > 0) && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) { deliveryType = 'D'; /* Drop the packet */ } else { deliveryType = 'S'; /* Send the packet */ #endif /* RXDEBUG */ /* Loop until the packet is sent. We'd prefer just to use a * blocking socket, but unfortunately the interface doesn't * allow us to have the socket block in send mode, and not * block in receive mode */ #ifdef KERNEL waslocked = ISAFS_GLOCK(); #ifdef RX_KERNEL_TRACE if (ICL_SETACTIVE(afs_iclSetp)) { if (!waslocked) AFS_GLOCK(); afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING, "before osi_NetSend()"); AFS_GUNLOCK(); } else #else if (waslocked) AFS_GUNLOCK(); #endif #endif if ((code = osi_NetSend(socket, &addr, p->wirevec, p->niovecs, p->length + RX_HEADER_SIZE, istack)) != 0) { /* send failed, so let's hurry up the resend, eh? */ MUTEX_ENTER(&rx_stats_mutex); rx_stats.netSendFailures++; MUTEX_EXIT(&rx_stats_mutex); p->retryTime = p->timeSent; /* resend it very soon */ clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8)); #ifdef AFS_NT40_ENV /* Windows is nice -- it can tell us right away that we cannot * reach this recipient by returning an WSAEHOSTUNREACH error * code. So, when this happens let's "down" the host NOW so * we don't sit around waiting for this host to timeout later. */ if (call && code == -1 && errno == WSAEHOSTUNREACH) call->lastReceiveTime = 0; #endif #if defined(KERNEL) && defined(AFS_LINUX20_ENV) /* Linux is nice -- it can tell us right away that we cannot * reach this recipient by returning an ENETUNREACH error * code. So, when this happens let's "down" the host NOW so * we don't sit around waiting for this host to timeout later. */ if (call && code == -ENETUNREACH) call->lastReceiveTime = 0; #endif } #ifdef KERNEL #ifdef RX_KERNEL_TRACE if (ICL_SETACTIVE(afs_iclSetp)) { AFS_GLOCK(); afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING, "after osi_NetSend()"); if (!waslocked) AFS_GUNLOCK(); } else #else if (waslocked) AFS_GLOCK(); #endif #endif #ifdef RXDEBUG } dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length)); #endif MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetsSent[p->header.type - 1]++; MUTEX_EXIT(&rx_stats_mutex); MUTEX_ENTER(&peer->peer_lock); hadd32(peer->bytesSent, p->length); MUTEX_EXIT(&peer->peer_lock); } /* Send a list of packets to appropriate destination for the specified * connection. The headers are first encoded and placed in the packets. */ void rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn, struct rx_packet **list, int len, int istack) { #if defined(AFS_SUN5_ENV) && defined(KERNEL) int waslocked; #endif struct sockaddr_in addr; register struct rx_peer *peer = conn->peer; osi_socket socket; struct rx_packet *p = NULL; struct iovec wirevec[RX_MAXIOVECS]; int i, length, code; afs_uint32 serial; afs_uint32 temp; struct rx_jumboHeader *jp; #ifdef RXDEBUG char deliveryType = 'S'; #endif /* The address we're sending the packet to */ addr.sin_family = AF_INET; addr.sin_port = peer->port; addr.sin_addr.s_addr = peer->host; if (len + 1 > RX_MAXIOVECS) { osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n"); } /* * Stamp the packets in this jumbogram with consecutive serial numbers */ MUTEX_ENTER(&conn->conn_data_lock); serial = conn->serial; conn->serial += len; MUTEX_EXIT(&conn->conn_data_lock); /* This stuff should be revamped, I think, so that most, if not * all, of the header stuff is always added here. We could * probably do away with the encode/decode routines. XXXXX */ jp = NULL; length = RX_HEADER_SIZE; wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]); wirevec[0].iov_len = RX_HEADER_SIZE; for (i = 0; i < len; i++) { p = list[i]; /* The whole 3.5 jumbogram scheme relies on packets fitting * in a single packet buffer. */ if (p->niovecs > 2) { osi_Panic("rxi_SendPacketList, niovecs > 2\n"); } /* Set the RX_JUMBO_PACKET flags in all but the last packets * in this chunk. */ if (i < len - 1) { if (p->length != RX_JUMBOBUFFERSIZE) { osi_Panic("rxi_SendPacketList, length != jumbo size\n"); } p->header.flags |= RX_JUMBO_PACKET; length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE; wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE; } else { wirevec[i + 1].iov_len = p->length; length += p->length; } wirevec[i + 1].iov_base = (char *)(&p->localdata[0]); if (jp != NULL) { /* Convert jumbo packet header to network byte order */ temp = (afs_uint32) (p->header.flags) << 24; temp |= (afs_uint32) (p->header.spare); *(afs_uint32 *) jp = htonl(temp); } jp = (struct rx_jumboHeader *) ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE); /* Stamp each packet with a unique serial number. The serial * number is maintained on a connection basis because some types * of security may be based on the serial number of the packet, * and security is handled on a per authenticated-connection * basis. */ /* Pre-increment, to guarantee no zero serial number; a zero * serial number means the packet was never sent. */ p->header.serial = ++serial; /* This is so we can adjust retransmit time-outs better in the face of * rapidly changing round-trip times. RTO estimation is not a la Karn. */ if (p->firstSerial == 0) { p->firstSerial = p->header.serial; } #ifdef RXDEBUG /* If an output tracer function is defined, call it with the packet and * network address. Note this function may modify its arguments. */ if (rx_almostSent) { int drop = (*rx_almostSent) (p, &addr); /* drop packet if return value is non-zero? */ if (drop) deliveryType = 'D'; /* Drop the packet */ } #endif /* Get network byte order header */ rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to * touch ALL the fields */ } /* Send the packet out on the same socket that related packets are being * received on */ socket = (conn->type == RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket); #ifdef RXDEBUG /* Possibly drop this packet, for testing purposes */ if ((deliveryType == 'D') || ((rx_intentionallyDroppedPacketsPer100 > 0) && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) { deliveryType = 'D'; /* Drop the packet */ } else { deliveryType = 'S'; /* Send the packet */ #endif /* RXDEBUG */ /* Loop until the packet is sent. We'd prefer just to use a * blocking socket, but unfortunately the interface doesn't * allow us to have the socket block in send mode, and not * block in receive mode */ #if defined(AFS_SUN5_ENV) && defined(KERNEL) waslocked = ISAFS_GLOCK(); if (!istack && waslocked) AFS_GUNLOCK(); #endif if ((code = osi_NetSend(socket, &addr, &wirevec[0], len + 1, length, istack)) != 0) { /* send failed, so let's hurry up the resend, eh? */ MUTEX_ENTER(&rx_stats_mutex); rx_stats.netSendFailures++; MUTEX_EXIT(&rx_stats_mutex); for (i = 0; i < len; i++) { p = list[i]; p->retryTime = p->timeSent; /* resend it very soon */ clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8)); } #ifdef AFS_NT40_ENV /* Windows is nice -- it can tell us right away that we cannot * reach this recipient by returning an WSAEHOSTUNREACH error * code. So, when this happens let's "down" the host NOW so * we don't sit around waiting for this host to timeout later. */ if (call && code == -1 && errno == WSAEHOSTUNREACH) call->lastReceiveTime = 0; #endif #if defined(KERNEL) && defined(AFS_LINUX20_ENV) /* Linux is nice -- it can tell us right away that we cannot * reach this recipient by returning an ENETUNREACH error * code. So, when this happens let's "down" the host NOW so * we don't sit around waiting for this host to timeout later. */ if (call && code == -ENETUNREACH) call->lastReceiveTime = 0; #endif } #if defined(AFS_SUN5_ENV) && defined(KERNEL) if (!istack && waslocked) AFS_GLOCK(); #endif #ifdef RXDEBUG } assert(p != NULL); dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host), ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, (unsigned long)p, p->retryTime.sec, p->retryTime.usec / 1000, p->length)); #endif MUTEX_ENTER(&rx_stats_mutex); rx_stats.packetsSent[p->header.type - 1]++; MUTEX_EXIT(&rx_stats_mutex); MUTEX_ENTER(&peer->peer_lock); hadd32(peer->bytesSent, p->length); MUTEX_EXIT(&peer->peer_lock); } /* Send a "special" packet to the peer connection. If call is * specified, then the packet is directed to a specific call channel * associated with the connection, otherwise it is directed to the * connection only. Uses optionalPacket if it is supplied, rather than * allocating a new packet buffer. Nbytes is the length of the data * portion of the packet. If data is non-null, nbytes of data are * copied into the packet. Type is the type of the packet, as defined * in rx.h. Bug: there's a lot of duplication between this and other * routines. This needs to be cleaned up. */ struct rx_packet * rxi_SendSpecial(register struct rx_call *call, register struct rx_connection *conn, struct rx_packet *optionalPacket, int type, char *data, int nbytes, int istack) { /* Some of the following stuff should be common code for all * packet sends (it's repeated elsewhere) */ register struct rx_packet *p; unsigned int i = 0; int savelen = 0, saven = 0; int channel, callNumber; if (call) { channel = call->channel; callNumber = *call->callNumber; /* BUSY packets refer to the next call on this connection */ if (type == RX_PACKET_TYPE_BUSY) { callNumber++; } } else { channel = 0; callNumber = 0; } p = optionalPacket; if (!p) { p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL); if (!p) osi_Panic("rxi_SendSpecial failure"); } if (nbytes != -1) p->length = nbytes; else nbytes = p->length; p->header.serviceId = conn->serviceId; p->header.securityIndex = conn->securityIndex; p->header.cid = (conn->cid | channel); p->header.callNumber = callNumber; p->header.seq = 0; p->header.epoch = conn->epoch; p->header.type = type; p->header.flags = 0; if (conn->type == RX_CLIENT_CONNECTION) p->header.flags |= RX_CLIENT_INITIATED; if (data) rx_packetwrite(p, 0, nbytes, data); for (i = 1; i < p->niovecs; i++) { if (nbytes <= p->wirevec[i].iov_len) { savelen = p->wirevec[i].iov_len; saven = p->niovecs; p->wirevec[i].iov_len = nbytes; p->niovecs = i + 1; /* so condition fails because i == niovecs */ } else nbytes -= p->wirevec[i].iov_len; } if (call) rxi_Send(call, p, istack); else rxi_SendPacket((struct rx_call *)0, conn, p, istack); if (saven) { /* means we truncated the packet above. We probably don't */ /* really need to do this, but it seems safer this way, given that */ /* sneaky optionalPacket... */ p->wirevec[i - 1].iov_len = savelen; p->niovecs = saven; } if (!optionalPacket) rxi_FreePacket(p); return optionalPacket; } /* Encode the packet's header (from the struct header in the packet to * the net byte order representation in the wire representation of the * packet, which is what is actually sent out on the wire) */ void rxi_EncodePacketHeader(register struct rx_packet *p) { register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */ memset((char *)buf, 0, RX_HEADER_SIZE); *buf++ = htonl(p->header.epoch); *buf++ = htonl(p->header.cid); *buf++ = htonl(p->header.callNumber); *buf++ = htonl(p->header.seq); *buf++ = htonl(p->header.serial); *buf++ = htonl((((afs_uint32) p->header.type) << 24) | (((afs_uint32) p->header.flags) << 16) | (p->header.userStatus << 8) | p->header.securityIndex); /* Note: top 16 bits of this next word were reserved */ *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff)); } /* Decode the packet's header (from net byte order to a struct header) */ void rxi_DecodePacketHeader(register struct rx_packet *p) { register afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */ afs_uint32 temp; p->header.epoch = ntohl(*buf); buf++; p->header.cid = ntohl(*buf); buf++; p->header.callNumber = ntohl(*buf); buf++; p->header.seq = ntohl(*buf); buf++; p->header.serial = ntohl(*buf); buf++; temp = ntohl(*buf); buf++; /* C will truncate byte fields to bytes for me */ p->header.type = temp >> 24; p->header.flags = temp >> 16; p->header.userStatus = temp >> 8; p->header.securityIndex = temp >> 0; temp = ntohl(*buf); buf++; p->header.serviceId = (temp & 0xffff); p->header.spare = temp >> 16; /* Note: top 16 bits of this last word are the security checksum */ } void rxi_PrepareSendPacket(register struct rx_call *call, register struct rx_packet *p, register int last) { register struct rx_connection *conn = call->conn; int i, j; ssize_t len; /* len must be a signed type; it can go negative */ p->flags &= ~RX_PKTFLAG_ACKED; p->header.cid = (conn->cid | call->channel); p->header.serviceId = conn->serviceId; p->header.securityIndex = conn->securityIndex; /* No data packets on call 0. Where do these come from? */ if (*call->callNumber == 0) *call->callNumber = 1; p->header.callNumber = *call->callNumber; p->header.seq = call->tnext++; p->header.epoch = conn->epoch; p->header.type = RX_PACKET_TYPE_DATA; p->header.flags = 0; p->header.spare = 0; if (conn->type == RX_CLIENT_CONNECTION) p->header.flags |= RX_CLIENT_INITIATED; if (last) p->header.flags |= RX_LAST_PACKET; clock_Zero(&p->retryTime); /* Never yet transmitted */ clock_Zero(&p->firstSent); /* Never yet transmitted */ p->header.serial = 0; /* Another way of saying never transmitted... */ p->backoff = 0; /* Now that we're sure this is the last data on the call, make sure * that the "length" and the sum of the iov_lens matches. */ len = p->length + call->conn->securityHeaderSize; for (i = 1; i < p->niovecs && len > 0; i++) { len -= p->wirevec[i].iov_len; } if (len > 0) { osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */ } else { struct rx_queue q; int nb; queue_Init(&q); /* Free any extra elements in the wirevec */ for (j = MAX(2, i), nb = p->niovecs - j; j < p->niovecs; j++) { queue_Append(&q,RX_CBUF_TO_PACKET(p->wirevec[j].iov_base, p)); } if (nb) rxi_FreePackets(nb, &q); p->niovecs = i; p->wirevec[i - 1].iov_len += len; } RXS_PreparePacket(conn->securityObject, call, p); } /* Given an interface MTU size, calculate an adjusted MTU size that * will make efficient use of the RX buffers when the peer is sending * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */ int rxi_AdjustIfMTU(int mtu) { int adjMTU; int frags; adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE; if (mtu <= adjMTU) { return mtu; } mtu -= adjMTU; if (mtu <= 0) { return adjMTU; } frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE); return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE))); } /* Given an interface MTU size, and the peer's advertised max receive * size, calculate an adjisted maxMTU size that makes efficient use * of our packet buffers when we are sending AFS 3.4a jumbograms. */ int rxi_AdjustMaxMTU(int mtu, int peerMaxMTU) { int maxMTU = mtu * rxi_nSendFrags; maxMTU = MIN(maxMTU, peerMaxMTU); return rxi_AdjustIfMTU(maxMTU); } /* Given a packet size, figure out how many datagram packet will fit. * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+ * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+ * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */ int rxi_AdjustDgramPackets(int frags, int mtu) { int maxMTU; if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) { return 1; } maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE; maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE); /* subtract the size of the first and last packets */ maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE; if (maxMTU < 0) { return 1; } return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE))); }