public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: "Andy Chittenden" <andyc.bluearc-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>
To: "'Andy Chittenden'"
	<andyc.bluearc-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>,
	"'Andrew Morton'"
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public•gmane.org>
Cc: "'David Miller'" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public•gmane.org>,
	<kuznet-v/Mj1YrvjDBInbfyfbPRSQ@public•gmane.org>,
	<pekkas-UjJjq++bwZ7HOG6cAo2yLw@public•gmane.org>,
	<jmorris-gx6/JNMH7DfYtjvyW6yDsg@public•gmane.org>,
	<yoshfuji-VfPWfsRibaP+Ru+s062T9g@public•gmane.org>,
	<kaber-dcUjhNyLwpNeoWH0uzbU5w@public•gmane.org>,
	<eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>,
	<William.Allen.Simpson-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>,
	<gilad-f4XOiQkOAtcdH0auuBZGHA@public•gmane.org>,
	<ilpo.jarvinen-pxSi+dnQzZMxHbG02/KK1g@public•gmane.org>,
	<netdev-u79uwXL29TY76Z2rM5mHXA@public•gmane.org>,
	<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public•gmane.org>,
	<linux-nfs-u79uwXL29TY76Z2rM5mHXA@public•gmane.org>,
	"'Trond Myklebust'"
	<Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public•gmane.org>,
	"'J. Bruce Fields'"
	<bfields-uC3wQj2KruNg9hUCZPvPmw@public•gmane.org>,
	"'Neil Brown'" <neilb-l3A5Bk7waGM@public•gmane.org>,
	"'Chuck Lever'"
	<chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public•gmane.org>,
	"'Benny Halevy'"
	<bhalevy-C4P08NqkoRlBDgjK7y7TUQ@public•gmane.org>,
	"'Alexandros Batsakis'"
	<batsakis-HgOvQuBEEgTQT0dZR+AlfA@public•gmane.org>,
	"'Joe Perches'" <joe-6d6DIl74uiNBDgjK7y7TUQ@public•gmane.org>
Subject: RE: [PATCH] [Bug 16494] NFS client over TCP hangs due to packet loss
Date: Thu, 5 Aug 2010 15:55:17 +0100	[thread overview]
Message-ID: <4c5ad0d6.42ecd80a.47d7.0dfc@mx.google.com> (raw)
In-Reply-To: <4C57EE9A.7040308-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

> On 2010-08-03 10:11, Andrew Morton wrote:
> > (cc linux-nfs)
> >
> > On Tue, 03 Aug 2010 01:21:44 -0700 (PDT) David
> Miller<davem-fT/PcQaiUtIeIZ0/mPfg9Q@public•gmane.org>  wrote:
> >
> >> From: "Andy Chittenden"<andyc.bluearc-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>
> >> Date: Tue, 3 Aug 2010 09:14:31 +0100
> >>
> >>> I don't know whether this patch is the correct fix or not but it
> enables the
> >>> NFS client to recover.
> >>>
> >>> Kernel version: 2.6.34.1 and 2.6.32.
> >>>
> >>> Fixes<https://bugzilla.kernel.org/show_bug.cgi?id=16494>. It clears
> down
> >>> any previous shutdown attempts so that reconnects on a socket
> that's been
> >>> shutdown leave the socket in a usable state (otherwise
> tcp_sendmsg() returns
> >>> -EPIPE).
> >>
> >> If the SunRPC code wants to close a TCP socket then use it again,
> >> it should disconnect by doing a connect() with sa_family ==
> AF_UNSPEC
> 
> There is code to do that in the SunRPC code in xs_abort_connection()
> but
> that's conditionally called from xs_tcp_reuse_connection():
> 
> static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct
> sock_xprt *transport)
> {
> 	unsigned int state = transport->inet->sk_state;
> 
> 	if (state == TCP_CLOSE && transport->sock->state ==
> SS_UNCONNECTED)
> 		return;
> 	if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
> 		return;
> 	xs_abort_connection(xprt, transport);
> }
> 
> That's changed since 2.6.26 where it unconditionally did the connect()
> with sa_family == AF_UNSPEC. FWIW we cannot reproduce this problem with
> 2.6.26.

The problem is fixed with this patch which also prints out that sk_shutdown
can be non-zero on entry to xs_tcp_reuse_connection:

# diff -up /home/company/software/src/linux-2.6.34.2/net/sunrpc/xprtsock.c
net/sunrpc/xprtsock.c 
--- /home/company/software/src/linux-2.6.34.2/net/sunrpc/xprtsock.c
2010-08-02 18:30:51.000000000 +0100
+++ net/sunrpc/xprtsock.c       2010-08-05 12:21:11.000000000 +0100
@@ -1322,10 +1322,11 @@ static void xs_tcp_state_change(struct s
        if (!(xprt = xprt_from_sock(sk)))
                goto out;
        dprintk("RPC:       xs_tcp_state_change client %p...\n", xprt);
-       dprintk("RPC:       state %x conn %d dead %d zapped %d\n",
+       dprintk("RPC:       state %x conn %d dead %d zapped %d sk_shutdown
%d\n",
                        sk->sk_state, xprt_connected(xprt),
                        sock_flag(sk, SOCK_DEAD),
-                       sock_flag(sk, SOCK_ZAPPED));
+                       sock_flag(sk, SOCK_ZAPPED),
+                       sk->sk_shutdown);
 
        switch (sk->sk_state) {
        case TCP_ESTABLISHED:
@@ -1796,10 +1797,18 @@ static void xs_tcp_reuse_connection(stru
 {
        unsigned int state = transport->inet->sk_state;
 
-       if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
-               return;
-       if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
-               return;
+       if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
{
+               if (transport->inet->sk_shutdown == 0)
+                       return;
+               printk("%s: TCP_CLOSEd and sk_shutdown set to %d\n",
+                       __func__, transport->inet->sk_shutdown);
+       }
+       if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) {
+               if (transport->inet->sk_shutdown == 0)
+                       return;
+               printk("%s: sk_shutdown set to %d\n",
+                       __func__, transport->inet->sk_shutdown);
+       }
        xs_abort_connection(xprt, transport);
 }

Signed-off-by: Andy Chittenden <andyc.bluearc-Re5JQEeQqe8AvxtiuMwx3w@public•gmane.org>

dmesg displays:

[ 2840.896043] xs_tcp_reuse_connection: TCP_CLOSEd and sk_shutdown set to 2

so previously the code was attempting to reuse the connection but wasn't
aborting it and thus didn't clear down sk_shutdown.


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public•gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2010-08-05 14:55 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <4c57cfe8.887b0e0a.2f79.4772@mx.google.com>
2010-08-03  8:21 ` [PATCH] [Bug 16494] NFS client over TCP hangs due to packet loss David Miller
     [not found]   ` <20100803.012144.267950450.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2010-08-03  9:11     ` Andrew Morton
     [not found]       ` <20100803021110.f0b3877b.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2010-08-03 10:25         ` Andy Chittenden
     [not found]           ` <4C57EE9A.7040308-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-08-05 14:55             ` Andy Chittenden [this message]
     [not found]               ` <4c5ad0d6.42ecd80a.47d7.0dfc-ATjtLOhZ0NVl57MIdRCFDg@public.gmane.org>
2010-08-05 19:50                 ` Trond Myklebust
     [not found]                   ` <1281037822.2948.49.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>
2010-08-06  9:30                     ` Andy Chittenden
2010-08-09  9:27                       ` Andy Chittenden
2010-08-09 16:55                         ` Trond Myklebust
     [not found]                           ` <1281372927.8950.3.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>
2010-08-10  8:40                             ` Andy Chittenden
2018-06-19 21:56                         ` Joe Perches
2018-06-20 16:40                           ` Andy C
2010-08-03  8:14 Andy Chittenden

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4c5ad0d6.42ecd80a.47d7.0dfc@mx.google.com \
    --to=andyc.bluearc-re5jqeeqqe8avxtiumwx3w@public$(echo .)gmane.org \
    --cc=Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public$(echo .)gmane.org \
    --cc=William.Allen.Simpson-Re5JQEeQqe8AvxtiuMwx3w@public$(echo .)gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public$(echo .)gmane.org \
    --cc=batsakis-HgOvQuBEEgTQT0dZR+AlfA@public$(echo .)gmane.org \
    --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public$(echo .)gmane.org \
    --cc=bhalevy-C4P08NqkoRlBDgjK7y7TUQ@public$(echo .)gmane.org \
    --cc=chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public$(echo .)gmane.org \
    --cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public$(echo .)gmane.org \
    --cc=eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public$(echo .)gmane.org \
    --cc=gilad-f4XOiQkOAtcdH0auuBZGHA@public$(echo .)gmane.org \
    --cc=ilpo.jarvinen-pxSi+dnQzZMxHbG02/KK1g@public$(echo .)gmane.org \
    --cc=jmorris-gx6/JNMH7DfYtjvyW6yDsg@public$(echo .)gmane.org \
    --cc=joe-6d6DIl74uiNBDgjK7y7TUQ@public$(echo .)gmane.org \
    --cc=kaber-dcUjhNyLwpNeoWH0uzbU5w@public$(echo .)gmane.org \
    --cc=kuznet-v/Mj1YrvjDBInbfyfbPRSQ@public$(echo .)gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public$(echo .)gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public$(echo .)gmane.org \
    --cc=neilb-l3A5Bk7waGM@public$(echo .)gmane.org \
    --cc=netdev-u79uwXL29TY76Z2rM5mHXA@public$(echo .)gmane.org \
    --cc=pekkas-UjJjq++bwZ7HOG6cAo2yLw@public$(echo .)gmane.org \
    --cc=yoshfuji-VfPWfsRibaP+Ru+s062T9g@public$(echo .)gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox