From 5c9234694543f206174d30e21886286d419fd8df Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Mon, 2 Nov 2020 13:52:25 -0600 Subject: [PATCH] rx: For AFS_RXERRQ_ENV, retry sendmsg on error When AFS_RXERRQ_ENV is defined, we currently end up doing something like this for our sendmsg abstractions: if (sendmsg(...) < 0) { while (rxi_HandleSocketError(sock)) ; return error; } return success; This means that when sendmsg() returns an error, we process the socket error queue before returning an error. The problem with this is that when we receive an ICMP error on our socket, it creates a pending socket error that is returned for any operation on the socket. So, if we receive an ICMP error after trying to contact any peer, sendmsg() could return an error when trying to send for any other peer. Even though there is no issue preventing us from sending the packet, we'll fail to actually send the packet because sendmsg() returned an error. This effectively causes an extra outgoing packet drop, possibly delaying the related RPC. To avoid this, change Rx to retry the sendmsg call when it returns an error, since the error may be due to an unrelated ICMP error. To avoid needing to implement this retry loop in multiple places, move around our sendmsg code for AFS_RXERRQ_ENV, so that the higher-level function rxi_NetSend performs the retry and checks for socket errors (instead of the lower-level rxi_Sendmsg or osi_NetSend). Also change our functions to process socket errors to be more consistent between kernel and userspace: now we always have rxi_HandleSocketErrors, which runs a loop around the platform-specific osi_HandleSocketError. With this commit, osi_HandleSocketError is now required to be implemented when AFS_RXERRQ_ENV is defined. We hadn't been implementing this for UKERNEL, so just turn off AFS_RXERRQ_ENV for UKERNEL. Thanks to mbarbosa@sinenomine.net for discovering and providing information about the relevant issue. Change-Id: Iccceddcd2d28992ed7a00dc308816a0cb1a0195f Reviewed-on: https://gerrit.openafs.org/14424 Reviewed-by: Cheyenne Wills Reviewed-by: Benjamin Kaduk Tested-by: BuildBot --- src/config/param.linux26.h | 2 +- src/rx/LINUX/rx_knet.c | 30 +++------------------ src/rx/rx.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++ src/rx/rx_internal.h | 8 ++++++ src/rx/rx_lwp.c | 9 +------ src/rx/rx_prototypes.h | 1 - src/rx/rx_pthread.c | 10 ++----- src/rx/rx_user.c | 7 +++-- 8 files changed, 84 insertions(+), 49 deletions(-) diff --git a/src/config/param.linux26.h b/src/config/param.linux26.h index 3ea6a39..76c4b65 100644 --- a/src/config/param.linux26.h +++ b/src/config/param.linux26.h @@ -106,7 +106,7 @@ #include #endif -#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(HAVE_SETSOCKOPT_IP_RECVERR) +#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(HAVE_SETSOCKOPT_IP_RECVERR) && !defined(UKERNEL) # define AFS_RXERRQ_ENV #endif #ifdef AFS_RXERRQ_ENV diff --git a/src/rx/LINUX/rx_knet.c b/src/rx/LINUX/rx_knet.c index 50607c8..42444e5 100644 --- a/src/rx/LINUX/rx_knet.c +++ b/src/rx/LINUX/rx_knet.c @@ -98,8 +98,8 @@ rxk_FreeSocket(struct socket *asocket) } #ifdef AFS_RXERRQ_ENV -static int -osi_HandleSocketError(osi_socket so, char *cmsgbuf, size_t cmsgbuf_len) +int +osi_HandleSocketError(osi_socket so, void *cmsgbuf, size_t cmsgbuf_len) { struct msghdr msg; struct cmsghdr *cmsg; @@ -140,26 +140,6 @@ osi_HandleSocketError(osi_socket so, char *cmsgbuf, size_t cmsgbuf_len) } #endif -static void -do_handlesocketerror(osi_socket so) -{ -#ifdef AFS_RXERRQ_ENV - char *cmsgbuf; - size_t cmsgbuf_len; - - cmsgbuf_len = 256; - cmsgbuf = rxi_Alloc(cmsgbuf_len); - if (!cmsgbuf) { - return; - } - - while (osi_HandleSocketError(so, cmsgbuf, cmsgbuf_len)) - ; - - rxi_Free(cmsgbuf, cmsgbuf_len); -#endif -} - /* osi_NetSend * * Return codes: @@ -182,10 +162,6 @@ osi_NetSend(osi_socket sop, struct sockaddr_in *to, struct iovec *iovec, code = kernel_sendmsg(sop, &msg, (struct kvec *) iovec, iovcnt, size); - if (code < 0) { - do_handlesocketerror(sop); - } - return (code < 0) ? code : 0; } @@ -250,7 +226,7 @@ osi_NetReceive(osi_socket so, struct sockaddr_in *from, struct iovec *iov, rxk_lastSocketError = code; rxk_nSocketErrors++; - do_handlesocketerror(so); + rxi_HandleSocketErrors(so); } else { *lengthp = code; code = 0; diff --git a/src/rx/rx.c b/src/rx/rx.c index 0b548ff..73fb9c8 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -9417,12 +9417,78 @@ int rx_DumpCalls(FILE *outputFile, char *cookie) } #endif +#ifdef AFS_RXERRQ_ENV +void +rxi_HandleSocketErrors(osi_socket sock) +{ + size_t cmsgbuf_len = 256; + void *cmsgbuf; +# ifndef KERNEL + int errno_save = errno; +# endif + + cmsgbuf = rxi_Alloc(cmsgbuf_len); + if (cmsgbuf == NULL) { + goto done; + } + + while (osi_HandleSocketError(sock, cmsgbuf, cmsgbuf_len)) + ; + + rxi_Free(cmsgbuf, cmsgbuf_len); + + done: +# ifndef KERNEL + errno = errno_save; +# endif + return; +} + +static int +NetSend_retry(osi_socket sock, void *addr, struct iovec *dvec, int nvecs, + int length, int istack) +{ + int code; + int safety; + /* + * If an ICMP error comes in for any peer, sendmsg() can return -1 with an + * errno of EHOSTUNREACH, ENETUNREACH, etc. There may be no problem with + * sending this packet (an error is returned just to indicate we need to + * read in pending errors), but the packet wasn't actually sent. + * + * It's difficult to determine in general whether sendmsg() is returning an + * error due to a received ICMP error, or we're getting an actual error for + * this specific sendmsg() call, since there may be other threads running + * sendmsg/recvmsg/rxi_HandleSocketErrors at the same time. So, just retry + * the sendmsg a few times; make sure not to retry forever, in case we are + * getting an actual error from this sendmsg() call. + * + * Also note that if we accidentally drop a packet here that we didn't need + * to, it's not the end of the world. Packets get dropped, and we should be + * able to recover. + */ + for (safety = 0; safety < RXI_SENDMSG_RETRY; safety++) { + code = osi_NetSend(sock, addr, dvec, nvecs, length, istack); + if (code == 0) { + return 0; + } + rxi_HandleSocketErrors(sock); + } + return code; + +} +#endif + int rxi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs, int length, int istack) { if (rxi_IsRunning()) { +#ifdef AFS_RXERRQ_ENV + return NetSend_retry(socket, addr, dvec, nvecs, length, istack); +#else return osi_NetSend(socket, addr, dvec, nvecs, length, istack); +#endif } #ifdef AFS_NT40_ENV return WSAESHUTDOWN; diff --git a/src/rx/rx_internal.h b/src/rx/rx_internal.h index 8d5fd79..b420a23 100644 --- a/src/rx/rx_internal.h +++ b/src/rx/rx_internal.h @@ -17,6 +17,9 @@ extern rx_atomic_t rx_nWaiting; extern rx_atomic_t rx_nWaited; +/* How many times to retry sendmsg()-equivalent calls for AFS_RXERRQ_ENV. */ +#define RXI_SENDMSG_RETRY 8 + /* Prototypes for internal functions */ /* rx.c */ @@ -28,6 +31,11 @@ extern void rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, #ifdef AFS_RXERRQ_ENV extern void rxi_ProcessNetError(struct sock_extended_err *err, afs_uint32 addr, afs_uint16 port); +extern int osi_HandleSocketError(osi_socket sock, void *cmsgbuf, + size_t cmsgbuf_len); +extern void rxi_HandleSocketErrors(osi_socket sock); +#else +# define rxi_HandleSocketErrors(sock) do { } while (0) #endif extern struct rx_peer *rxi_FindPeer(afs_uint32 host, u_short port, int create); diff --git a/src/rx/rx_lwp.c b/src/rx/rx_lwp.c index f3378de..dfeb674 100644 --- a/src/rx/rx_lwp.c +++ b/src/rx/rx_lwp.c @@ -413,12 +413,9 @@ rxi_Recvmsg(osi_socket socket, struct msghdr *msg_p, int flags) int code; code = recvmsg(socket, msg_p, flags); -#ifdef AFS_RXERRQ_ENV if (code < 0) { - while((rxi_HandleSocketError(socket)) > 0) - ; + rxi_HandleSocketErrors(socket); } -#endif return code; } @@ -452,10 +449,6 @@ rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags) } FD_SET(socket, sfds); } -#ifdef AFS_RXERRQ_ENV - while((rxi_HandleSocketError(socket)) > 0) - ; -#endif #ifdef AFS_NT40_ENV if (err) #elif defined(AFS_LINUX22_ENV) diff --git a/src/rx/rx_prototypes.h b/src/rx/rx_prototypes.h index 314ba0a..4e5c0d0 100644 --- a/src/rx/rx_prototypes.h +++ b/src/rx/rx_prototypes.h @@ -499,7 +499,6 @@ extern afs_kmutex_t rx_if_mutex; #endif extern osi_socket rxi_GetUDPSocket(u_short port); extern void rxi_InitPeerParams(struct rx_peer *pp); -extern int rxi_HandleSocketError(int socket); #if defined(AFS_AIX32_ENV) && !defined(KERNEL) #ifndef osi_Alloc diff --git a/src/rx/rx_pthread.c b/src/rx/rx_pthread.c index 3e61245..2553988 100644 --- a/src/rx/rx_pthread.c +++ b/src/rx/rx_pthread.c @@ -399,12 +399,9 @@ rxi_Recvmsg(osi_socket socket, struct msghdr *msg_p, int flags) int ret; ret = recvmsg(socket, msg_p, flags); -#ifdef AFS_RXERRQ_ENV if (ret < 0) { - while (rxi_HandleSocketError(socket) > 0) - ; + rxi_HandleSocketErrors(socket); } -#endif return ret; } @@ -426,10 +423,7 @@ rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags) err = errno; #endif -#ifdef AFS_RXERRQ_ENV - while (rxi_HandleSocketError(socket) > 0) - ; -#else +#ifndef AFS_RXERRQ_ENV # ifdef AFS_LINUX22_ENV /* linux unfortunately returns ECONNREFUSED if the target port * is no longer in use */ diff --git a/src/rx/rx_user.c b/src/rx/rx_user.c index 0ba068b..7e6c06b 100644 --- a/src/rx/rx_user.c +++ b/src/rx/rx_user.c @@ -781,21 +781,20 @@ rx_SetMaxMTU(int mtu) #ifdef AFS_RXERRQ_ENV int -rxi_HandleSocketError(int socket) +osi_HandleSocketError(int socket, void *cmsgbuf, size_t cmsgbuf_len) { struct msghdr msg; struct cmsghdr *cmsg; struct sock_extended_err *err; struct sockaddr_in addr; - char controlmsgbuf[256]; int code; msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); msg.msg_iov = NULL; msg.msg_iovlen = 0; - msg.msg_control = controlmsgbuf; - msg.msg_controllen = 256; + msg.msg_control = cmsgbuf; + msg.msg_controllen = cmsgbuf_len; msg.msg_flags = 0; code = recvmsg(socket, &msg, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC); -- 1.9.4