# include <sys/types.h>
# include <errno.h>
# include <signal.h>
+# include <string.h>
#ifdef AFS_NT40_ENV
# include <WINNT/syscfg.h>
#else
# include <sys/ioctl.h>
#endif
# include <fcntl.h>
-#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV)
# include <sys/syscall.h>
#endif
#include <afs/afs_args.h>
#include <afs/afsutil.h>
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
#ifndef IPPORT_USERRESERVED
/* If in.h doesn't define this, define it anyway. Unfortunately, defining
#ifndef AFS_NT40_ENV
# include <sys/time.h>
#endif
+# include "rx_internal.h"
# include "rx.h"
# include "rx_globals.h"
struct sockaddr_in taddr;
char *name = "rxi_GetUDPSocket: ";
#ifdef AFS_LINUX22_ENV
+#if defined(ADAPT_PMTU)
+ int pmtu=IP_PMTUDISC_WANT;
+ int recverr=1;
+#else
int pmtu=IP_PMTUDISC_DONT;
#endif
+#endif
+#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
+#include <linux/types.h>
+#include <linux/errqueue.h>
+#ifndef IP_MTU
+#define IP_MTU 14
+#endif
+#endif
-#if !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_NT40_ENV)
if (ntohs(port) >= IPPORT_RESERVED && ntohs(port) < IPPORT_USERRESERVED) {
/* (osi_Msg "%s*WARNING* port number %d is not a reserved port number. Use port numbers above %d\n", name, port, IPPORT_USERRESERVED);
*/ ;
(osi_Msg "%sbind failed\n", name);
goto error;
}
-#if !defined(AFS_NT40_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_NT40_ENV)
/*
* Set close-on-exec on rx socket
*/
fcntl(socketFd, F_SETFD, 1);
#endif
-#ifndef AFS_DJGPP_ENV
/* Use one of three different ways of getting a socket buffer expanded to
* a reasonable size.
*/
rx_stats.socketGreedy = greedy;
MUTEX_EXIT(&rx_stats_mutex);
}
-#endif /* AFS_DJGPP_ENV */
#ifdef AFS_LINUX22_ENV
setsockopt(socketFd, SOL_IP, IP_MTU_DISCOVER, &pmtu, sizeof(pmtu));
+#if defined(ADAPT_PMTU)
+ setsockopt(socketFd, SOL_IP, IP_RECVERR, &recverr, sizeof(recverr));
+#endif
#endif
-
if (rxi_Listen(socketFd) < 0) {
goto error;
}
}
void
-osi_Panic(char *msg, int a1, int a2, int a3)
+osi_Panic(char *msg, ...)
{
+ va_list ap;
+ va_start(ap, msg);
(osi_Msg "Fatal Rx error: ");
- (osi_Msg msg, a1, a2, a3);
+ (osi_VMsg msg, ap);
+ va_end(ap);
fflush(stderr);
fflush(stdout);
afs_abort();
void
osi_AssertFailU(const char *expr, const char *file, int line)
{
- osi_Panic("assertion failed: %s, file: %s, line: %d\n", (int)expr,
- (int)file, line);
+ osi_Panic("assertion failed: %s, file: %s, line: %d\n", expr,
+ file, line);
}
-#ifdef AFS_AIX32_ENV
+#if defined(AFS_AIX32_ENV) && !defined(KERNEL)
#ifndef osi_Alloc
static const char memZero;
-char *
+void *
osi_Alloc(afs_int32 x)
{
/*
- * 0-length allocs may return NULL ptr from osi_kalloc, so we special-case
+ * 0-length allocs may return NULL ptr from malloc, so we special-case
* things so that NULL returned iff an error occurred
*/
if (x == 0)
- return &memZero;
- return ((char *)malloc(x));
+ return (void *)&memZero;
+ return(malloc(x));
}
void
-osi_Free(char *x, afs_int32 size)
+osi_Free(void *x, afs_int32 size)
{
if (x == &memZero)
return;
- free((char *)x);
+ free(x);
}
#endif
-#endif /* AFS_AIX32_ENV */
+#endif /* defined(AFS_AIX32_ENV) && !defined(KERNEL) */
#define ADDRSPERSITE 16
-afs_uint32 rxi_NetAddrs[ADDRSPERSITE]; /* host order */
+static afs_uint32 rxi_NetAddrs[ADDRSPERSITE]; /* host order */
static int myNetMTUs[ADDRSPERSITE];
static int myNetMasks[ADDRSPERSITE];
static int myNetFlags[ADDRSPERSITE];
-u_int rxi_numNetAddrs;
+static u_int rxi_numNetAddrs;
static int Inited = 0;
-#if defined(AFS_NT40_ENV) || defined(AFS_DJGPP_ENV)
+#if defined(AFS_NT40_ENV)
int
rxi_getaddr(void)
{
- if (rxi_numNetAddrs > 0)
+ /* The IP address list can change so we must query for it */
+ rx_GetIFInfo();
+
+ /* we don't want to use the loopback adapter which is first */
+ /* this is a bad bad hack */
+ if (rxi_numNetAddrs > 1)
+ return htonl(rxi_NetAddrs[1]);
+ else if (rxi_numNetAddrs > 0)
return htonl(rxi_NetAddrs[0]);
else
return 0;
** maxSize - max number of interfaces to return.
*/
int
-rx_getAllAddr(afs_int32 * buffer, int maxSize)
+rx_getAllAddr(afs_uint32 * buffer, int maxSize)
{
- int count = 0;
- for (count = 0; count < rxi_numNetAddrs && maxSize > 0;
- count++, maxSize--)
- buffer[count] = htonl(rxi_NetAddrs[count]);
+ int count = 0, offset = 0;
+
+ /* The IP address list can change so we must query for it */
+ rx_GetIFInfo();
+
+#ifdef AFS_DJGPP_ENV
+ /* we don't want to use the loopback adapter which is first */
+ /* this is a bad bad hack.
+ * and doesn't hold true on Windows.
+ */
+ if ( rxi_numNetAddrs > 1 )
+ offset = 1;
+#endif /* AFS_DJGPP_ENV */
+
+ for (count = 0; offset < rxi_numNetAddrs && maxSize > 0;
+ count++, offset++, maxSize--)
+ buffer[count] = htonl(rxi_NetAddrs[offset]);
return count;
}
+/* this function returns the total number of interface addresses
+ * the buffer has to be passed in by the caller. It also returns
+ * the matching interface mask and mtu. All values are returned
+ * in network byte order.
+ */
+int
+rx_getAllAddrMaskMtu(afs_uint32 addrBuffer[], afs_uint32 maskBuffer[],
+ afs_uint32 mtuBuffer[], int maxSize)
+{
+ int count = 0, offset = 0;
+
+ /* The IP address list can change so we must query for it */
+ rx_GetIFInfo();
+
+#ifdef AFS_DJGPP_ENV
+ /* we don't want to use the loopback adapter which is first */
+ /* this is a bad bad hack.
+ * and doesn't hold true on Windows.
+ */
+ if ( rxi_numNetAddrs > 1 )
+ offset = 1;
+#endif /* AFS_DJGPP_ENV */
+
+ for (count = 0;
+ offset < rxi_numNetAddrs && maxSize > 0;
+ count++, offset++, maxSize--) {
+ addrBuffer[count] = htonl(rxi_NetAddrs[offset]);
+ maskBuffer[count] = htonl(myNetMasks[offset]);
+ mtuBuffer[count] = htonl(myNetMTUs[offset]);
+ }
+ return count;
+}
#endif
#ifdef AFS_NT40_ENV
-
+extern int rxinit_status;
+void
+rxi_InitMorePackets(void) {
+ int npackets, ncbufs;
+
+ ncbufs = (rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE);
+ if (ncbufs > 0) {
+ ncbufs = ncbufs / RX_CBUFFERSIZE;
+ npackets = rx_initSendWindow - 1;
+ rxi_MorePackets(npackets * (ncbufs + 1));
+ }
+}
void
rx_GetIFInfo(void)
{
+ u_int maxsize;
+ u_int rxsize;
+ afs_uint32 i;
+
LOCK_IF_INIT;
if (Inited) {
- UNLOCK_IF_INIT;
+ if (Inited < 2 && rxinit_status == 0) {
+ /* We couldn't initialize more packets earlier.
+ * Do it now. */
+ rxi_InitMorePackets();
+ Inited = 2;
+ }
+ UNLOCK_IF_INIT;
return;
- } else {
- u_int maxsize;
- u_int rxsize;
- int npackets, ncbufs;
- afs_uint32 i;
+ }
+ Inited = 1;
+ UNLOCK_IF_INIT;
- Inited = 1;
- UNLOCK_IF_INIT;
- rxi_numNetAddrs = ADDRSPERSITE;
+ LOCK_IF;
+ rxi_numNetAddrs = ADDRSPERSITE;
+ (void)syscfg_GetIFInfo(&rxi_numNetAddrs, rxi_NetAddrs,
+ myNetMasks, myNetMTUs, myNetFlags);
+
+ for (i = 0; i < rxi_numNetAddrs; i++) {
+ rxsize = rxi_AdjustIfMTU(myNetMTUs[i] - RX_IPUDP_SIZE);
+ maxsize =
+ rxi_nRecvFrags * rxsize + (rxi_nRecvFrags - 1) * UDP_HDR_SIZE;
+ maxsize = rxi_AdjustMaxMTU(rxsize, maxsize);
+ if (rx_maxReceiveSize < maxsize) {
+ rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, maxsize);
+ rx_maxReceiveSize =
+ MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
+ }
- LOCK_IF;
- (void)syscfg_GetIFInfo(&rxi_numNetAddrs, rxi_NetAddrs,
- myNetMasks, myNetMTUs, myNetFlags);
+ }
+ UNLOCK_IF;
- for (i = 0; i < rxi_numNetAddrs; i++) {
- rxsize = rxi_AdjustIfMTU(myNetMTUs[i] - RX_IPUDP_SIZE);
- maxsize =
- rxi_nRecvFrags * rxsize + (rxi_nRecvFrags - 1) * UDP_HDR_SIZE;
- maxsize = rxi_AdjustMaxMTU(rxsize, maxsize);
- if (rx_maxReceiveSize < maxsize) {
- rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, maxsize);
- rx_maxReceiveSize =
- MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
- }
+ /*
+ * If rxinit_status is still set, rx_InitHost() has yet to be called
+ * and we therefore do not have any mutex locks initialized. As a
+ * result we cannot call rxi_MorePackets() without crashing.
+ */
+ if (rxinit_status)
+ return;
- }
- UNLOCK_IF;
- ncbufs = (rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE);
- if (ncbufs > 0) {
- ncbufs = ncbufs / RX_CBUFFERSIZE;
- npackets = rx_initSendWindow - 1;
- rxi_MorePackets(npackets * (ncbufs + 1));
- }
- }
+ rxi_InitMorePackets();
}
-
#endif
static afs_uint32
-#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_DJGPP_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_NT40_ENV) && !defined(AFS_LINUX20_ENV)
int
rxi_syscall(a3, a4, a5)
afs_uint32 a3, a4;
{
int s;
int i, j, len, res;
-#ifndef AFS_DJGPP_ENV
struct ifconf ifc;
struct ifreq ifs[ADDRSPERSITE];
struct ifreq *ifr;
char buf[BUFSIZ], *cp, *cplim;
#endif
struct sockaddr_in *a;
-#endif /* AFS_DJGPP_ENV */
LOCK_IF_INIT;
if (Inited) {
if (s < 0)
return;
-#ifndef AFS_DJGPP_ENV
#ifdef AFS_AIX41_ENV
ifc.ifc_len = sizeof(buf);
ifc.ifc_buf = buf;
rxi_MorePackets(npackets * (ncbufs + 1));
}
}
-#else /* AFS_DJGPP_ENV */
- close(s);
- return;
-#endif /* AFS_DJGPP_ENV */
}
#endif /* AFS_NT40_ENV */
afs_uint32 ppaddr;
u_short rxmtu;
int ix;
-
-
+#if defined(ADAPT_PMTU) && defined(IP_MTU)
+ int sock;
+ struct sockaddr_in addr;
+#endif
LOCK_IF_INIT;
if (!Inited) {
/* try to second-guess IP, and identify which link is most likely to
* be used for traffic to/from this host. */
ppaddr = ntohl(pp->host);
-
+
pp->ifMTU = 0;
pp->timeout.sec = 2;
- pp->rateFlag = 2; /* start timing after two full packets */
+ pp->rateFlag = 2; /* start timing after two full packets */
/* I don't initialize these, because I presume they are bzero'd...
* pp->burstSize pp->burst pp->burstWait.sec pp->burstWait.usec
* pp->timeout.usec */
-
+
LOCK_IF;
for (ix = 0; ix < rxi_numNetAddrs; ++ix) {
if ((rxi_NetAddrs[ix] & myNetMasks[ix]) == (ppaddr & myNetMasks[ix])) {
}
}
UNLOCK_IF;
- if (!pp->ifMTU) { /* not local */
+ if (!pp->ifMTU) { /* not local */
pp->timeout.sec = 3;
pp->ifMTU = MIN(rx_MyMaxSendSize, RX_REMOTE_PACKET_SIZE);
}
pp->timeout.sec = 2;
pp->ifMTU = MIN(rx_MyMaxSendSize, OLD_MAX_PACKET_SIZE);
#endif /* ADAPT_MTU */
+#if defined(ADAPT_PMTU) && defined(IP_MTU)
+ sock=socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock >= 0) {
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = pp->host;
+ addr.sin_port = pp->port;
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
+ int mtu=0;
+ socklen_t s = sizeof(mtu);
+ if (getsockopt(sock, SOL_IP, IP_MTU, &mtu, &s)== 0) {
+ pp->ifMTU = MIN(mtu - RX_IPUDP_SIZE, pp->ifMTU);
+ }
+ }
+ close(sock);
+ }
+#endif
pp->ifMTU = rxi_AdjustIfMTU(pp->ifMTU);
pp->maxMTU = OLD_MAX_PACKET_SIZE; /* for compatibility with old guys */
pp->natMTU = MIN((int)pp->ifMTU, OLD_MAX_PACKET_SIZE);
pp->maxDgramPackets =
MIN(rxi_nDgramPackets,
- rxi_AdjustDgramPackets(RX_MAX_FRAGS, pp->ifMTU));
+ rxi_AdjustDgramPackets(rxi_nSendFrags, pp->ifMTU));
pp->ifDgramPackets =
MIN(rxi_nDgramPackets,
- rxi_AdjustDgramPackets(RX_MAX_FRAGS, pp->ifMTU));
+ rxi_AdjustDgramPackets(rxi_nSendFrags, pp->ifMTU));
pp->maxDgramPackets = 1;
/* Initialize slow start parameters */
pp->MTU = MIN(pp->natMTU, pp->maxMTU);
{
rx_MyMaxSendSize = rx_maxReceiveSizeUser = rx_maxReceiveSize = mtu;
}
+
+#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
+int
+rxi_HandleSocketError(int socket)
+{
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ struct sock_extended_err *err;
+ struct sockaddr_in addr;
+ struct sockaddr *offender;
+ char controlmsgbuf[256];
+ int ret=0;
+ int code;
+
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+ msg.msg_iov = NULL;
+ msg.msg_iovlen = 0;
+ msg.msg_control = controlmsgbuf;
+ msg.msg_controllen = 256;
+ msg.msg_flags = 0;
+ code = recvmsg(socket, &msg, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC);
+
+ if (code < 0 || !(msg.msg_flags & MSG_ERRQUEUE))
+ goto out;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if ((char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(0) ||
+ (char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(cmsg->cmsg_len) ||
+ cmsg->cmsg_len == 0) {
+ cmsg = 0;
+ break;
+ }
+ if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR)
+ break;
+ }
+ if (!cmsg)
+ goto out;
+ ret=1;
+ err =(struct sock_extended_err *) CMSG_DATA(cmsg);
+
+ if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) {
+ rxi_SetPeerMtu(addr.sin_addr.s_addr, addr.sin_port,
+ err->ee_info - RX_IPUDP_SIZE);
+ }
+ /* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */
+
+out:
+ return ret;
+}
+#endif