From: Lawrence Brakmo <brakmo@fb•com>
To: netdev <netdev@vger•kernel.org>
Cc: Kernel Team <kernel-team@fb•com>, Blake Matheny <bmatheny@fb•com>,
Alexei Starovoitov <ast@fb•com>,
Daniel Borkmann <daniel@iogearbox•net>,
David Ahern <dsa@cumulusnetworks•com>
Subject: [RFC PATCH net-next v2 05/15] bpf: Support for setting initial receive window
Date: Thu, 15 Jun 2017 13:08:34 -0700 [thread overview]
Message-ID: <20170615200844.2752485-6-brakmo@fb.com> (raw)
In-Reply-To: <20170615200844.2752485-1-brakmo@fb.com>
This patch adds suppport for setting the initial advertized window from
within a BPF_SOCKET_OPS program. This can be used to support larger
initial cwnd values in environments where it is known to be safe.
Signed-off-by: Lawrence Brakmo <brakmo@fb•com>
---
include/net/tcp.h | 10 ++++++++++
include/uapi/linux/bpf.h | 4 ++++
net/ipv4/tcp_minisocks.c | 9 ++++++++-
net/ipv4/tcp_output.c | 7 ++++++-
4 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a726486..29c27dc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2029,4 +2029,14 @@ static inline u32 tcp_timeout_init(struct sock *sk, bool is_req_sock)
return timeout;
}
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk, bool is_req_sock)
+{
+ int rwnd;
+
+ rwnd = tcp_call_bpf(sk, is_req_sock, BPF_SOCKET_OPS_RWND_INIT);
+
+ if (rwnd < 0)
+ rwnd = 0;
+ return rwnd;
+}
#endif /* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 039f327..d945336 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -749,6 +749,10 @@ enum {
BPF_SOCKET_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
* -1 if default value should be used
*/
+ BPF_SOCKET_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31..bbaf3c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
int full_space = tcp_full_space(sk_listener);
u32 window_clamp;
__u8 rcv_wscale;
+ u32 rcv_wnd;
int mss;
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
req->rsk_window_clamp = full_space;
+ rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req, true);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+ else if (full_space < rcv_wnd * mss)
+ full_space = rcv_wnd * mss;
+
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
&req->rsk_window_clamp,
ireq->wscale_ok,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
ireq->rcv_wscale = rcv_wscale;
}
EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5e478a1..e5f623f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3267,6 +3267,7 @@ static void tcp_connect_init(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
+ u32 rcv_wnd;
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3301,17 @@ static void tcp_connect_init(struct sock *sk)
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk);
+ rcv_wnd = tcp_rwnd_init_bpf(sk, false);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
--
2.9.3
next prev parent reply other threads:[~2017-06-15 20:08 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-15 20:08 [RFC PATCH net-next v2 00/15] bpf: BPF support for socket ops Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 01/15] " Lawrence Brakmo
2017-06-16 12:07 ` Daniel Borkmann
2017-06-16 23:41 ` Lawrence Brakmo
2017-06-19 18:44 ` Daniel Borkmann
2017-06-19 20:49 ` Lawrence Brakmo
2017-06-17 21:48 ` Lawrence Brakmo
2017-06-19 18:52 ` Daniel Borkmann
2017-06-19 20:49 ` Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 02/15] bpf: program to load socketops BPF programs Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 03/15] bpf: Support for per connection SYN/SYN-ACK RTOs Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 04/15] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-15 20:08 ` Lawrence Brakmo [this message]
2017-06-15 20:08 ` [RFC PATCH net-next v2 06/15] bpf: Sample bpf program to set initial window Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 07/15] bpf: Add setsockopt helper function to bpf Lawrence Brakmo
2017-06-16 13:27 ` Daniel Borkmann
2017-06-17 23:17 ` Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 08/15] bpf: Add TCP connection BPF callbacks Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 09/15] bpf: Sample BPF program to set buffer sizes Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 10/15] bpf: Add support for changing congestion control Lawrence Brakmo
2017-06-16 13:58 ` Daniel Borkmann
2017-06-18 2:39 ` Lawrence Brakmo
2017-06-19 22:34 ` Daniel Borkmann
2017-06-20 0:35 ` Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 11/15] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 12/15] bpf: Adds support for setting initial cwnd Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 13/15] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 14/15] bpf: Adds support for setting sndcwnd clamp Lawrence Brakmo
2017-06-15 20:08 ` [RFC PATCH net-next v2 15/15] bpf: Sample bpf program to set " Lawrence Brakmo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170615200844.2752485-6-brakmo@fb.com \
--to=brakmo@fb$(echo .)com \
--cc=ast@fb$(echo .)com \
--cc=bmatheny@fb$(echo .)com \
--cc=daniel@iogearbox$(echo .)net \
--cc=dsa@cumulusnetworks$(echo .)com \
--cc=kernel-team@fb$(echo .)com \
--cc=netdev@vger$(echo .)kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox