public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper
@ 2018-05-21 16:08 dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 1/3] net/ipv4: Add helper to return path MTU based on fib result dsahern
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: dsahern @ 2018-05-21 16:08 UTC (permalink / raw)
  To: netdev, borkmann, ast; +Cc: davem, David Ahern

From: David Ahern <dsahern@gmail•com>

Packets that exceed the egress MTU can not be forwarded in the fast path.
Add IPv4 and IPv6 MTU helpers that take a FIB lookup result (versus the
typical dst path) and add the calls to bpf_ipv{4,6}_fib_lookup.

v2
- add ip6_mtu_from_fib6 to ipv6_stub
- only call the new MTU helpers for fib lookups in XDP path; skb
  path uses is_skb_forwardable to determine if the packet can be
  sent via the egress device from the FIB lookup

David Ahern (3):
  net/ipv4: Add helper to return path MTU based on fib result
  net/ipv6: Add helper to return path MTU based on fib result
  bpf: Add mtu checking to FIB forwarding helper

 include/net/addrconf.h   |  2 ++
 include/net/ip6_fib.h    |  6 ++++++
 include/net/ip6_route.h  |  3 +++
 include/net/ip_fib.h     |  2 ++
 net/core/filter.c        | 42 +++++++++++++++++++++++++++++++++++-------
 net/ipv4/route.c         | 31 +++++++++++++++++++++++++++++++
 net/ipv6/addrconf_core.c |  8 ++++++++
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/route.c         | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 136 insertions(+), 7 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 bpf-next 1/3] net/ipv4: Add helper to return path MTU based on fib result
  2018-05-21 16:08 [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper dsahern
@ 2018-05-21 16:08 ` dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 2/3] net/ipv6: " dsahern
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: dsahern @ 2018-05-21 16:08 UTC (permalink / raw)
  To: netdev, borkmann, ast; +Cc: davem, David Ahern

From: David Ahern <dsahern@gmail•com>

Determine path MTU from a FIB lookup result. Logic is a distillation of
ip_dst_mtu_maybe_forward.

Signed-off-by: David Ahern <dsahern@gmail•com>
---
 include/net/ip_fib.h |  2 ++
 net/ipv4/route.c     | 31 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 81d0f2107ff1..69c91d1934c1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
 }
 #endif
 
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
+
 #endif  /* _NET_FIB_H */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 29268efad247..ac3b22bc51b2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 	return NULL;
 }
 
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ */
+
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
+{
+	struct fib_info *fi = res->fi;
+	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
+	struct net_device *dev = nh->nh_dev;
+	u32 mtu = 0;
+
+	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+	    fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
+		mtu = fi->fib_mtu;
+
+	if (likely(!mtu)) {
+		struct fib_nh_exception *fnhe;
+
+		fnhe = find_exception(nh, daddr);
+		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
+			mtu = fnhe->fnhe_pmtu;
+	}
+
+	if (likely(!mtu))
+		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
+
+	return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
+}
+
 static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 			      __be32 daddr, const bool do_cache)
 {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 bpf-next 2/3] net/ipv6: Add helper to return path MTU based on fib result
  2018-05-21 16:08 [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 1/3] net/ipv4: Add helper to return path MTU based on fib result dsahern
@ 2018-05-21 16:08 ` dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 3/3] bpf: Add mtu checking to FIB forwarding helper dsahern
  2018-05-22  8:59 ` [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper Daniel Borkmann
  3 siblings, 0 replies; 5+ messages in thread
From: dsahern @ 2018-05-21 16:08 UTC (permalink / raw)
  To: netdev, borkmann, ast; +Cc: davem, David Ahern

From: David Ahern <dsahern@gmail•com>

Determine path MTU from a FIB lookup result. Logic is based on
ip6_dst_mtu_forward plus lookup of nexthop exception.

Add ip6_dst_mtu_forward to ipv6_stubs to handle access by core
bpf code.

Signed-off-by: David Ahern <dsahern@gmail•com>
---
 include/net/addrconf.h   |  2 ++
 include/net/ip6_fib.h    |  6 ++++++
 include/net/ip6_route.h  |  3 +++
 net/ipv6/addrconf_core.c |  8 ++++++++
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/route.c         | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 68 insertions(+)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index ff766ab207e0..c07d4dd09361 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -236,6 +236,8 @@ struct ipv6_stub {
 						   struct flowi6 *fl6, int oif,
 						   const struct sk_buff *skb,
 						   int strict);
+	u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
+				 struct in6_addr *saddr);
 
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cc70f6da8462..7897efe80727 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
 	return f6i->fib6_nh.nh_dev;
 }
 
+static inline
+struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
+{
+	return f6i->fib6_nh.nh_lwtstate;
+}
+
 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4cf1ef935ed9..7b9c82de11cc 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -300,6 +300,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 	return mtu;
 }
 
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+		      struct in6_addr *saddr);
+
 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 				   struct net_device *dev, struct sk_buff *skb,
 				   const void *daddr);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 2fe754fd4f5e..5cd0029d930e 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
 	return f6i;
 }
 
+static u32
+eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+			       struct in6_addr *saddr)
+{
+	return 0;
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
 	.fib6_get_table    = eafnosupport_fib6_get_table,
 	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
 	.fib6_lookup       = eafnosupport_fib6_lookup,
 	.fib6_multipath_select = eafnosupport_fib6_multipath_select,
+	.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 50de8b0d4f70..9ed0eae91758 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.fib6_table_lookup = fib6_table_lookup,
 	.fib6_lookup       = fib6_lookup,
 	.fib6_multipath_select = fib6_multipath_select,
+	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cc24ed3bc334..dc5d5c84dbef 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2603,6 +2603,54 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ *
+ * based on ip6_dst_mtu_forward and exception logic of
+ * rt6_find_cached_rt; called with rcu_read_lock
+ */
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+		      struct in6_addr *saddr)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	struct in6_addr *src_key;
+	struct inet6_dev *idev;
+	u32 mtu = 0;
+
+	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
+		mtu = f6i->fib6_pmtu;
+		if (mtu)
+			goto out;
+	}
+
+	src_key = NULL;
+#ifdef CONFIG_IPV6_SUBTREES
+	if (f6i->fib6_src.plen)
+		src_key = saddr;
+#endif
+
+	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
+
+	if (likely(!mtu)) {
+		struct net_device *dev = fib6_info_nh_dev(f6i);
+
+		mtu = IPV6_MIN_MTU;
+		idev = __in6_dev_get(dev);
+		if (idev && idev->cnf.mtu6 > mtu)
+			mtu = idev->cnf.mtu6;
+	}
+
+	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+out:
+	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+}
+
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 				  struct flowi6 *fl6)
 {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 bpf-next 3/3] bpf: Add mtu checking to FIB forwarding helper
  2018-05-21 16:08 [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 1/3] net/ipv4: Add helper to return path MTU based on fib result dsahern
  2018-05-21 16:08 ` [PATCH v2 bpf-next 2/3] net/ipv6: " dsahern
@ 2018-05-21 16:08 ` dsahern
  2018-05-22  8:59 ` [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper Daniel Borkmann
  3 siblings, 0 replies; 5+ messages in thread
From: dsahern @ 2018-05-21 16:08 UTC (permalink / raw)
  To: netdev, borkmann, ast; +Cc: davem, David Ahern

From: David Ahern <dsahern@gmail•com>

Add check that egress MTU can handle packet to be forwarded. If
the MTU is less than the packet length, return 0 meaning the
packet is expected to continue up the stack for help - eg.,
fragmenting the packet or sending an ICMP.

The XDP path needs to leverage the FIB entry for an MTU on the
route spec or an exception entry for a given destination. The
skb path lets is_skb_forwardable decide if the packet can be
sent.

Signed-off-by: David Ahern <dsahern@gmail•com>
---
 net/core/filter.c | 42 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index aec5ebafb262..ba3ff5aa575a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4089,7 +4089,7 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
 
 #if IS_ENABLED(CONFIG_INET)
 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
-			       u32 flags)
+			       u32 flags, bool check_mtu)
 {
 	struct in_device *in_dev;
 	struct neighbour *neigh;
@@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	struct fib_nh *nh;
 	struct flowi4 fl4;
 	int err;
+	u32 mtu;
 
 	dev = dev_get_by_index_rcu(net, params->ifindex);
 	if (unlikely(!dev))
@@ -4149,6 +4150,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (res.fi->fib_nhs > 1)
 		fib_select_path(net, &res, &fl4, NULL);
 
+	if (check_mtu) {
+		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
+		if (params->tot_len > mtu)
+			return 0;
+	}
+
 	nh = &res.fi->fib_nh[res.nh_sel];
 
 	/* do not handle lwt encaps right now */
@@ -4177,7 +4184,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 #if IS_ENABLED(CONFIG_IPV6)
 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
-			       u32 flags)
+			       u32 flags, bool check_mtu)
 {
 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
@@ -4188,6 +4195,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	struct flowi6 fl6;
 	int strict = 0;
 	int oif;
+	u32 mtu;
 
 	/* link local addresses are never forwarded */
 	if (rt6_need_strict(dst) || rt6_need_strict(src))
@@ -4250,6 +4258,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 						       fl6.flowi6_oif, NULL,
 						       strict);
 
+	if (check_mtu) {
+		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+		if (params->tot_len > mtu)
+			return 0;
+	}
+
 	if (f6i->fib6_nh.nh_lwtstate)
 		return 0;
 
@@ -4282,12 +4296,12 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
 #if IS_ENABLED(CONFIG_INET)
 	case AF_INET:
 		return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
-					   flags);
+					   flags, true);
 #endif
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
-					   flags);
+					   flags, true);
 #endif
 	}
 	return 0;
@@ -4306,20 +4320,34 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
 BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
 {
+	struct net *net = dev_net(skb->dev);
+	int index = 0;
+
 	if (plen < sizeof(*params))
 		return -EINVAL;
 
 	switch (params->family) {
 #if IS_ENABLED(CONFIG_INET)
 	case AF_INET:
-		return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
+		index = bpf_ipv4_fib_lookup(net, params, flags, false);
+		break;
 #endif
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
+		index = bpf_ipv6_fib_lookup(net, params, flags, false);
+		break;
 #endif
 	}
-	return -ENOTSUPP;
+
+	if (index > 0) {
+		struct net_device *dev;
+
+		dev = dev_get_by_index_rcu(net, index);
+		if (!is_skb_forwardable(dev, skb))
+			index = 0;
+	}
+
+	return index;
 }
 
 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper
  2018-05-21 16:08 [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper dsahern
                   ` (2 preceding siblings ...)
  2018-05-21 16:08 ` [PATCH v2 bpf-next 3/3] bpf: Add mtu checking to FIB forwarding helper dsahern
@ 2018-05-22  8:59 ` Daniel Borkmann
  3 siblings, 0 replies; 5+ messages in thread
From: Daniel Borkmann @ 2018-05-22  8:59 UTC (permalink / raw)
  To: dsahern, netdev, borkmann, ast; +Cc: davem, David Ahern

On 05/21/2018 06:08 PM, dsahern@kernel•org wrote:
> From: David Ahern <dsahern@gmail•com>
> 
> Packets that exceed the egress MTU can not be forwarded in the fast path.
> Add IPv4 and IPv6 MTU helpers that take a FIB lookup result (versus the
> typical dst path) and add the calls to bpf_ipv{4,6}_fib_lookup.
> 
> v2
> - add ip6_mtu_from_fib6 to ipv6_stub
> - only call the new MTU helpers for fib lookups in XDP path; skb
>   path uses is_skb_forwardable to determine if the packet can be
>   sent via the egress device from the FIB lookup
> 
> David Ahern (3):
>   net/ipv4: Add helper to return path MTU based on fib result
>   net/ipv6: Add helper to return path MTU based on fib result
>   bpf: Add mtu checking to FIB forwarding helper
> 
>  include/net/addrconf.h   |  2 ++
>  include/net/ip6_fib.h    |  6 ++++++
>  include/net/ip6_route.h  |  3 +++
>  include/net/ip_fib.h     |  2 ++
>  net/core/filter.c        | 42 +++++++++++++++++++++++++++++++++++-------
>  net/ipv4/route.c         | 31 +++++++++++++++++++++++++++++++
>  net/ipv6/addrconf_core.c |  8 ++++++++
>  net/ipv6/af_inet6.c      |  1 +
>  net/ipv6/route.c         | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  9 files changed, 136 insertions(+), 7 deletions(-)

Applied to bpf-next, thanks David!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-05-22  8:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-05-21 16:08 [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper dsahern
2018-05-21 16:08 ` [PATCH v2 bpf-next 1/3] net/ipv4: Add helper to return path MTU based on fib result dsahern
2018-05-21 16:08 ` [PATCH v2 bpf-next 2/3] net/ipv6: " dsahern
2018-05-21 16:08 ` [PATCH v2 bpf-next 3/3] bpf: Add mtu checking to FIB forwarding helper dsahern
2018-05-22  8:59 ` [PATCH v2 bpf-next 0/3] bpf: Add MTU check to fib lookup helper Daniel Borkmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox