/* rx_user.c contains routines specific to the user space UNIX implementation of rx */
+/* rxi_syscall is currently not prototyped */
+
#include <afsconfig.h>
#include <afs/param.h>
-RCSID
- ("$Header$");
# include <sys/types.h>
# include <errno.h>
# include <signal.h>
+# include <string.h>
#ifdef AFS_NT40_ENV
# include <WINNT/syscfg.h>
#else
# include <sys/time.h>
# include <net/if.h>
# include <sys/ioctl.h>
+# include <unistd.h>
#endif
# include <fcntl.h>
-#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV)
# include <sys/syscall.h>
#endif
#include <afs/afs_args.h>
#include <afs/afsutil.h>
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
#ifndef IPPORT_USERRESERVED
/* If in.h doesn't define this, define it anyway. Unfortunately, defining
#define IPPORT_USERRESERVED 5000
# endif
+#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
+#include <linux/types.h>
+#include <linux/errqueue.h>
+#ifndef IP_MTU
+#define IP_MTU 14
+#endif
+#endif
+
#ifndef AFS_NT40_ENV
# include <sys/time.h>
#endif
* Inited
*/
-pthread_mutex_t rx_if_init_mutex;
-#define LOCK_IF_INIT assert(pthread_mutex_lock(&rx_if_init_mutex)==0)
-#define UNLOCK_IF_INIT assert(pthread_mutex_unlock(&rx_if_init_mutex)==0)
+afs_kmutex_t rx_if_init_mutex;
+#define LOCK_IF_INIT MUTEX_ENTER(&rx_if_init_mutex)
+#define UNLOCK_IF_INIT MUTEX_EXIT(&rx_if_init_mutex)
/*
* The rx_if_mutex mutex protects the following global variables:
* myNetMasks
*/
-pthread_mutex_t rx_if_mutex;
-#define LOCK_IF assert(pthread_mutex_lock(&rx_if_mutex)==0)
-#define UNLOCK_IF assert(pthread_mutex_unlock(&rx_if_mutex)==0)
+afs_kmutex_t rx_if_mutex;
+#define LOCK_IF MUTEX_ENTER(&rx_if_mutex)
+#define UNLOCK_IF MUTEX_EXIT(&rx_if_mutex)
#else
#define LOCK_IF_INIT
#define UNLOCK_IF_INIT
struct sockaddr_in taddr;
char *name = "rxi_GetUDPSocket: ";
#ifdef AFS_LINUX22_ENV
+#if defined(ADAPT_PMTU)
+ int pmtu=IP_PMTUDISC_WANT;
+ int recverr=1;
+#else
int pmtu=IP_PMTUDISC_DONT;
#endif
+#endif
-#if !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_NT40_ENV)
if (ntohs(port) >= IPPORT_RESERVED && ntohs(port) < IPPORT_USERRESERVED) {
/* (osi_Msg "%s*WARNING* port number %d is not a reserved port number. Use port numbers above %d\n", name, port, IPPORT_USERRESERVED);
*/ ;
goto error;
}
+#ifdef AFS_NT40_ENV
+ rxi_xmit_init(socketFd);
+#endif /* AFS_NT40_ENV */
+
taddr.sin_addr.s_addr = ahost;
taddr.sin_family = AF_INET;
taddr.sin_port = (u_short) port;
(osi_Msg "%sbind failed\n", name);
goto error;
}
-#if !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_NT40_ENV)
/*
* Set close-on-exec on rx socket
*/
fcntl(socketFd, F_SETFD, 1);
#endif
-#ifndef AFS_DJGPP_ENV
/* Use one of three different ways of getting a socket buffer expanded to
* a reasonable size.
*/
len1 = 32766;
len2 = rx_UdpBufSize;
- greedy =
- (setsockopt
- (socketFd, SOL_SOCKET, SO_RCVBUF, (char *)&len2,
- sizeof(len2)) >= 0);
- if (!greedy) {
- len2 = 32766; /* fall back to old size... uh-oh! */
- }
- greedy =
+ /* find the size closest to rx_UdpBufSize that will be accepted */
+ while (!greedy && len2 > len1) {
+ greedy =
+ (setsockopt
+ (socketFd, SOL_SOCKET, SO_RCVBUF, (char *)&len2,
+ sizeof(len2)) >= 0);
+ if (!greedy)
+ len2 /= 2;
+ }
+
+ /* but do not let it get smaller than 32K */
+ if (len2 < len1)
+ len2 = len1;
+
+ if (len1 < len2)
+ len1 = len2;
+
+
+ greedy =
(setsockopt
(socketFd, SOL_SOCKET, SO_SNDBUF, (char *)&len1,
sizeof(len1)) >= 0)
if (!greedy)
(osi_Msg "%s*WARNING* Unable to increase buffering on socket\n",
name);
- MUTEX_ENTER(&rx_stats_mutex);
- rx_stats.socketGreedy = greedy;
- MUTEX_EXIT(&rx_stats_mutex);
+ if (rx_stats_active) {
+ MUTEX_ENTER(&rx_stats_mutex);
+ rx_stats.socketGreedy = greedy;
+ MUTEX_EXIT(&rx_stats_mutex);
+ }
}
-#endif /* AFS_DJGPP_ENV */
#ifdef AFS_LINUX22_ENV
setsockopt(socketFd, SOL_IP, IP_MTU_DISCOVER, &pmtu, sizeof(pmtu));
+#if defined(ADAPT_PMTU)
+ setsockopt(socketFd, SOL_IP, IP_RECVERR, &recverr, sizeof(recverr));
+#endif
#endif
-
if (rxi_Listen(socketFd) < 0) {
goto error;
}
}
void
-osi_Panic(msg, a1, a2, a3)
- char *msg;
+osi_Panic(char *msg, ...)
{
+ va_list ap;
+ va_start(ap, msg);
(osi_Msg "Fatal Rx error: ");
- (osi_Msg msg, a1, a2, a3);
+ (osi_VMsg msg, ap);
+ va_end(ap);
fflush(stderr);
fflush(stdout);
afs_abort();
static u_int rxi_numNetAddrs;
static int Inited = 0;
-#if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
+#if defined(AFS_NT40_ENV)
int
rxi_getaddr(void)
{
** maxSize - max number of interfaces to return.
*/
int
-rx_getAllAddr(afs_int32 * buffer, int maxSize)
+rx_getAllAddr(afs_uint32 * buffer, int maxSize)
{
int count = 0, offset = 0;
/* The IP address list can change so we must query for it */
rx_GetIFInfo();
- /* we don't want to use the loopback adapter which is first */
- /* this is a bad bad hack */
- if ( rxi_numNetAddrs > 1 )
- offset = 1;
-
for (count = 0; offset < rxi_numNetAddrs && maxSize > 0;
count++, offset++, maxSize--)
buffer[count] = htonl(rxi_NetAddrs[offset]);
return count;
}
+
+/* this function returns the total number of interface addresses
+ * the buffer has to be passed in by the caller. It also returns
+ * the matching interface mask and mtu. All values are returned
+ * in network byte order.
+ */
+int
+rx_getAllAddrMaskMtu(afs_uint32 addrBuffer[], afs_uint32 maskBuffer[],
+ afs_uint32 mtuBuffer[], int maxSize)
+{
+ int count = 0, offset = 0;
+
+ /* The IP address list can change so we must query for it */
+ rx_GetIFInfo();
+
+ for (count = 0;
+ offset < rxi_numNetAddrs && maxSize > 0;
+ count++, offset++, maxSize--) {
+ addrBuffer[count] = htonl(rxi_NetAddrs[offset]);
+ maskBuffer[count] = htonl(myNetMasks[offset]);
+ mtuBuffer[count] = htonl(myNetMTUs[offset]);
+ }
+ return count;
+}
#endif
#ifdef AFS_NT40_ENV
+extern int rxinit_status;
+void
+rxi_InitMorePackets(void) {
+ int npackets, ncbufs;
+
+ ncbufs = (rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE);
+ if (ncbufs > 0) {
+ ncbufs = ncbufs / RX_CBUFFERSIZE;
+ npackets = rx_initSendWindow - 1;
+ rxi_MorePackets(npackets * (ncbufs + 1));
+ }
+}
void
rx_GetIFInfo(void)
{
u_int maxsize;
u_int rxsize;
- int npackets, ncbufs;
afs_uint32 i;
LOCK_IF_INIT;
+ if (Inited) {
+ if (Inited < 2 && rxinit_status == 0) {
+ /* We couldn't initialize more packets earlier.
+ * Do it now. */
+ rxi_InitMorePackets();
+ Inited = 2;
+ }
+ UNLOCK_IF_INIT;
+ return;
+ }
Inited = 1;
UNLOCK_IF_INIT;
maxsize =
rxi_nRecvFrags * rxsize + (rxi_nRecvFrags - 1) * UDP_HDR_SIZE;
maxsize = rxi_AdjustMaxMTU(rxsize, maxsize);
- if (rx_maxReceiveSize < maxsize) {
+ if (rx_maxReceiveSize > maxsize) {
rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, maxsize);
rx_maxReceiveSize =
MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
}
-
+ if (rx_MyMaxSendSize > maxsize) {
+ rx_MyMaxSendSize = MIN(RX_MAX_PACKET_SIZE, maxsize);
+ }
}
UNLOCK_IF;
- ncbufs = (rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE);
- if (ncbufs > 0) {
- ncbufs = ncbufs / RX_CBUFFERSIZE;
- npackets = rx_initSendWindow - 1;
- rxi_MorePackets(npackets * (ncbufs + 1));
- }
+
+ /*
+ * If rxinit_status is still set, rx_InitHost() has yet to be called
+ * and we therefore do not have any mutex locks initialized. As a
+ * result we cannot call rxi_MorePackets() without crashing.
+ */
+ if (rxinit_status)
+ return;
+
+ rxi_InitMorePackets();
}
#endif
-#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_LINUX20_ENV)
int
-rxi_syscall(a3, a4, a5)
- afs_uint32 a3, a4;
- void *a5;
+rxi_syscall(afs_uint32 a3, afs_uint32 a4, void *a5)
{
afs_uint32 rcode;
- void (*old) ();
+ void (*old) (int);
- old = (void (*)())signal(SIGSYS, SIG_IGN);
+ old = signal(SIGSYS, SIG_IGN);
#if defined(AFS_SGI_ENV)
- rcode = afs_syscall(a3, a4, a5);
+ rcode = afs_syscall(AFS_SYSCALL, 28, a3, a4, a5);
#else
rcode = syscall(AFS_SYSCALL, 28 /* AFSCALL_CALL */ , a3, a4, a5);
#endif /* AFS_SGI_ENV */
{
int s;
int i, j, len, res;
-#ifndef AFS_DJGPP_ENV
struct ifconf ifc;
struct ifreq ifs[ADDRSPERSITE];
struct ifreq *ifr;
char buf[BUFSIZ], *cp, *cplim;
#endif
struct sockaddr_in *a;
-#endif /* AFS_DJGPP_ENV */
LOCK_IF_INIT;
if (Inited) {
if (s < 0)
return;
-#ifndef AFS_DJGPP_ENV
#ifdef AFS_AIX41_ENV
ifc.ifc_len = sizeof(buf);
ifc.ifc_buf = buf;
if (a->sin_family != AF_INET)
continue;
rxi_NetAddrs[rxi_numNetAddrs] = ntohl(a->sin_addr.s_addr);
- if (rxi_NetAddrs[rxi_numNetAddrs] == 0x7f000001) {
+ if (rx_IsLoopbackAddr(rxi_NetAddrs[rxi_numNetAddrs])) {
/* we don't really care about "localhost" */
continue;
}
#endif
}
- if (rxi_NetAddrs[rxi_numNetAddrs] != 0x7f000001) { /* ignore lo0 */
+ if (!rx_IsLoopbackAddr(rxi_NetAddrs[rxi_numNetAddrs])) { /* ignore lo0 */
int maxsize;
maxsize =
rxi_nRecvFrags * (myNetMTUs[rxi_numNetAddrs] - RX_IP_SIZE);
rxi_MorePackets(npackets * (ncbufs + 1));
}
}
-#else /* AFS_DJGPP_ENV */
- close(s);
- return;
-#endif /* AFS_DJGPP_ENV */
}
#endif /* AFS_NT40_ENV */
afs_uint32 ppaddr;
u_short rxmtu;
int ix;
+#if defined(ADAPT_PMTU) && defined(IP_MTU)
+ int sock;
+ struct sockaddr_in addr;
+#endif
pp->timeout.sec = 2;
pp->ifMTU = MIN(rx_MyMaxSendSize, OLD_MAX_PACKET_SIZE);
#endif /* ADAPT_MTU */
+#if defined(ADAPT_PMTU) && defined(IP_MTU)
+ sock=socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock >= 0) {
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = pp->host;
+ addr.sin_port = pp->port;
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
+ int mtu=0;
+ socklen_t s = sizeof(mtu);
+ if (getsockopt(sock, SOL_IP, IP_MTU, &mtu, &s)== 0) {
+ pp->ifMTU = MIN(mtu - RX_IPUDP_SIZE, pp->ifMTU);
+ }
+ }
+ close(sock);
+ }
+#endif
pp->ifMTU = rxi_AdjustIfMTU(pp->ifMTU);
pp->maxMTU = OLD_MAX_PACKET_SIZE; /* for compatibility with old guys */
pp->natMTU = MIN((int)pp->ifMTU, OLD_MAX_PACKET_SIZE);
pp->maxDgramPackets =
MIN(rxi_nDgramPackets,
- rxi_AdjustDgramPackets(RX_MAX_FRAGS, pp->ifMTU));
+ rxi_AdjustDgramPackets(rxi_nSendFrags, pp->ifMTU));
pp->ifDgramPackets =
MIN(rxi_nDgramPackets,
- rxi_AdjustDgramPackets(RX_MAX_FRAGS, pp->ifMTU));
+ rxi_AdjustDgramPackets(rxi_nSendFrags, pp->ifMTU));
pp->maxDgramPackets = 1;
/* Initialize slow start parameters */
pp->MTU = MIN(pp->natMTU, pp->maxMTU);
{
rx_MyMaxSendSize = rx_maxReceiveSizeUser = rx_maxReceiveSize = mtu;
}
+
+#if defined(ADAPT_PMTU)
+int
+rxi_HandleSocketError(int socket)
+{
+ int ret=0;
+#if defined(HAVE_LINUX_ERRQUEUE_H)
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ struct sock_extended_err *err;
+ struct sockaddr_in addr;
+ struct sockaddr *offender;
+ char controlmsgbuf[256];
+ int code;
+
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+ msg.msg_iov = NULL;
+ msg.msg_iovlen = 0;
+ msg.msg_control = controlmsgbuf;
+ msg.msg_controllen = 256;
+ msg.msg_flags = 0;
+ code = recvmsg(socket, &msg, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC);
+
+ if (code < 0 || !(msg.msg_flags & MSG_ERRQUEUE))
+ goto out;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if ((char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(0) ||
+ (char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(cmsg->cmsg_len) ||
+ cmsg->cmsg_len == 0) {
+ cmsg = 0;
+ break;
+ }
+ if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR)
+ break;
+ }
+ if (!cmsg)
+ goto out;
+ ret=1;
+ err =(struct sock_extended_err *) CMSG_DATA(cmsg);
+
+ if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) {
+ rxi_SetPeerMtu(NULL, addr.sin_addr.s_addr, addr.sin_port,
+ err->ee_info - RX_IPUDP_SIZE);
+ }
+ /* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */
+
+out:
+#endif
+ return ret;
+}
+#endif