From 9cd983799e622c9acf5dd6e0b9ae3a3a75eaa8ce Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Thu, 2 Aug 2012 11:58:12 -0400 Subject: [PATCH] rx: Process ICMP unreachable errors When a machine receives ICMP errors, we can detect them in AFS_RXERRQ_ENV environments. Many of these errors indicate that a machine is not reachable, so we are guaranteed to not get a response from them. When we get such an error for a particular peer, mark all relevant calls with an RX_CALL_DEAD error, since we know we won't get a response from them. This allows some calls to dead/unreachable hosts to fail much more quickly. Do not immediately kill new calls, since obviously the host may have come back up since then (or the routing/firewall/etc was fixed), but only calls that were started before the current error was received. Note that a call doesn't actually notice until the next rxi_CheckCall, since directly killing each of the relevant calls would be rather slow. So, we don't notice a dead peer immediately, though we notice much more quickly than we used to. Reorganize the error queue processing a little bit to make this easier to do. Change-Id: I403540e0677fe2d432901e4ecc19f7f385610b7f Reviewed-on: http://gerrit.openafs.org/7929 Tested-by: BuildBot Reviewed-by: Derrick Brashear --- src/rx/LINUX/rx_knet.c | 10 +----- src/rx/rx.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/rx/rx_call.h | 3 ++ src/rx/rx_internal.h | 12 ++++++++ src/rx/rx_peer.h | 3 ++ src/rx/rx_user.c | 8 +---- 6 files changed, 103 insertions(+), 16 deletions(-) diff --git a/src/rx/LINUX/rx_knet.c b/src/rx/LINUX/rx_knet.c index fdf042e..5d2fc4a 100644 --- a/src/rx/LINUX/rx_knet.c +++ b/src/rx/LINUX/rx_knet.c @@ -147,15 +147,7 @@ osi_HandleSocketError(osi_socket so) memcpy(&addr, offender, sizeof(addr)); -# ifdef AFS_ADAPT_PMTU - if (err->ee_origin == SO_EE_ORIGIN_ICMP && - err->ee_type == ICMP_DEST_UNREACH && - err->ee_code == ICMP_FRAG_NEEDED) { - rxi_SetPeerMtu(NULL, ntohl(addr.sin_addr.s_addr), ntohs(addr.sin_port), - err->ee_info); - } -# endif - /* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */ + rxi_ProcessNetError(err, addr.sin_addr.s_addr, addr.sin_port); out: if (controlmsgbuf) { diff --git a/src/rx/rx.c b/src/rx/rx.c index 17ab869..b2226a3 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -1626,6 +1626,13 @@ rx_NewCall(struct rx_connection *conn) else call->mode = RX_MODE_SENDING; +#ifdef AFS_RXERRQ_ENV + /* remember how many network errors the peer has when we started, so if + * more errors are encountered after the call starts, we know the other endpoint won't be + * responding to us */ + call->neterr_gen = rx_atomic_read(&conn->peer->neterrs); +#endif + /* remember start time for call in case we have hard dead time limit */ call->queueTime = queueTime; clock_GetTime(&call->startTime); @@ -2910,6 +2917,51 @@ rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, afs_uint32 port, int mtu) MUTEX_EXIT(&rx_peerHashTable_lock); } +#ifdef AFS_RXERRQ_ENV +static void +rxi_SetPeerDead(afs_uint32 host, afs_uint16 port) +{ + int hashIndex = PEER_HASH(host, port); + struct rx_peer *peer; + + MUTEX_ENTER(&rx_peerHashTable_lock); + + for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) { + if (peer->host == host && peer->port == port) { + break; + } + } + + if (peer) { + rx_atomic_inc(&peer->neterrs); + } + + MUTEX_EXIT(&rx_peerHashTable_lock); +} + +void +rxi_ProcessNetError(struct sock_extended_err *err, afs_uint32 addr, afs_uint16 port) +{ +# ifdef AFS_ADAPT_PMTU + if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) { + rxi_SetPeerMtu(NULL, addr, port, err->ee_info - RX_IPUDP_SIZE); + return; + } +# endif + if (err->ee_origin == SO_EE_ORIGIN_ICMP && err->ee_type == ICMP_DEST_UNREACH) { + switch (err->ee_code) { + case ICMP_NET_UNREACH: + case ICMP_HOST_UNREACH: + case ICMP_PORT_UNREACH: + case ICMP_NET_ANO: + case ICMP_HOST_ANO: + rxi_SetPeerDead(addr, port); + break; + } + } +} +#endif /* AFS_RXERRQ_ENV */ + /* Find the peer process represented by the supplied (host,port) * combination. If there is no appropriate active peer structure, a * new one will be allocated and initialized @@ -2933,6 +2985,9 @@ rxi_FindPeer(afs_uint32 host, u_short port, pp = rxi_AllocPeer(); /* This bzero's *pp */ pp->host = host; /* set here or in InitPeerParams is zero */ pp->port = port; +#ifdef AFS_RXERRQ_ENV + rx_atomic_set(&pp->neterrs, 0); +#endif MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0); queue_Init(&pp->rpcStats); pp->next = rx_peerHashTable[hashIndex]; @@ -3200,6 +3255,11 @@ rxi_ReceivePacket(struct rx_packet *np, osi_socket socket, */ if (peer && (peer->refCount > 0)) { +#ifdef AFS_RXERRQ_ENV + if (rx_atomic_read(&peer->neterrs)) { + rx_atomic_set(&peer->neterrs, 0); + } +#endif MUTEX_ENTER(&peer->peer_lock); peer->bytesReceived += np->length; MUTEX_EXIT(&peer->peer_lock); @@ -3254,6 +3314,12 @@ rxi_ReceivePacket(struct rx_packet *np, osi_socket socket, return np; } +#ifdef AFS_RXERRQ_ENV + if (rx_atomic_read(&conn->peer->neterrs)) { + rx_atomic_set(&conn->peer->neterrs, 0); + } +#endif + /* If we're doing statistics, then account for the incoming packet */ if (rx_stats_active) { MUTEX_ENTER(&conn->peer->peer_lock); @@ -6179,6 +6245,23 @@ rxi_CheckCall(struct rx_call *call) int idle_timeout = 0; afs_int32 clock_diff = 0; +#ifdef AFS_RXERRQ_ENV + int peererrs = rx_atomic_read(&call->conn->peer->neterrs); + if (call->neterr_gen < peererrs) { + /* we have received network errors since this call started; kill + * the call */ + if (call->state == RX_STATE_ACTIVE) { + rxi_CallError(call, RX_CALL_DEAD); + } + return -1; + } + if (call->neterr_gen > peererrs) { + /* someone has reset the number of peer errors; set the call error gen + * so we can detect if more errors are encountered */ + call->neterr_gen = peererrs; + } +#endif + now = clock_Sec(); /* Large swings in the clock can have a significant impact on diff --git a/src/rx/rx_call.h b/src/rx/rx_call.h index e873eb4..a1450f1 100644 --- a/src/rx/rx_call.h +++ b/src/rx/rx_call.h @@ -157,6 +157,9 @@ struct rx_call { #endif afs_uint32 call_id; #endif +#ifdef AFS_RXERRQ_ENV + int neterr_gen; +#endif }; /* Only include this once, even when re-loading for kdump. */ diff --git a/src/rx/rx_internal.h b/src/rx/rx_internal.h index e54d10c..83761d3 100644 --- a/src/rx/rx_internal.h +++ b/src/rx/rx_internal.h @@ -5,6 +5,14 @@ * customers of RX belong in rx_private.h, which is installed. */ +#ifdef AFS_RXERRQ_ENV +# if defined(AFS_LINUX26_ENV) || defined(AFS_USR_LINUX26_ENV) +# include +# include +# include +# include +# endif +#endif /* Globals that we don't want the world to know about */ extern rx_atomic_t rx_nWaiting; @@ -16,6 +24,10 @@ extern rx_atomic_t rx_nWaited; extern void rxi_PacketsUnWait(void); extern void rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, afs_uint32 port, int mtu); +#ifdef AFS_RXERRQ_ENV +extern void rxi_ProcessNetError(struct sock_extended_err *err, + afs_uint32 addr, afs_uint16 port); +#endif extern struct rx_peer *rxi_FindPeer(afs_uint32 host, u_short port, struct rx_peer *origPeer, int create); extern struct rx_packet *rxi_ReceivePacket(struct rx_packet *np, diff --git a/src/rx/rx_peer.h b/src/rx/rx_peer.h index 318cbe8..baf37b0 100644 --- a/src/rx/rx_peer.h +++ b/src/rx/rx_peer.h @@ -63,6 +63,9 @@ struct rx_peer { struct rx_queue rpcStats; /* rpc statistic list */ int lastReachTime; /* Last time we verified reachability */ afs_int32 maxPacketSize; /* peer packetsize hint */ +#ifdef AFS_RXERRQ_ENV + rx_atomic_t neterrs; +#endif }; #endif diff --git a/src/rx/rx_user.c b/src/rx/rx_user.c index a0427e4..be74a64 100644 --- a/src/rx/rx_user.c +++ b/src/rx/rx_user.c @@ -815,13 +815,7 @@ rxi_HandleSocketError(int socket) ret = 1; err = (struct sock_extended_err *) CMSG_DATA(cmsg); -# ifdef AFS_ADAPT_PMTU - if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) { - rxi_SetPeerMtu(NULL, addr.sin_addr.s_addr, addr.sin_port, - err->ee_info - RX_IPUDP_SIZE); - } -# endif - /* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */ + rxi_ProcessNetError(err, addr.sin_addr.s_addr, addr.sin_port); out: return ret; -- 1.9.4