From: "Toke Høiland-Jørgensen" <toke@redhat•com>
To: David Ahern <dsahern@gmail•com>, David Ahern <dsahern@kernel•org>,
netdev@vger•kernel.org
Cc: davem@davemloft•net, kuba@kernel•org,
prashantbhole.linux@gmail•com, jasowang@redhat•com,
brouer@redhat•com, toshiaki.makita1@gmail•com,
daniel@iogearbox•net, john.fastabend@gmail•com, ast@kernel•org,
kafai@fb•com, songliubraving@fb•com, yhs@fb•com, andriin@fb•com,
David Ahern <dahern@digitalocean•com>
Subject: Re: [PATCH bpf-next 06/16] net: Add IFLA_XDP_EGRESS for XDP programs in the egress path
Date: Tue, 21 Apr 2020 15:27:56 +0200 [thread overview]
Message-ID: <87mu757yub.fsf@toke.dk> (raw)
In-Reply-To: <aa39f863-a833-0f57-d09f-8bd1d0259123@gmail.com>
David Ahern <dsahern@gmail•com> writes:
> On 4/21/20 4:17 AM, Toke Høiland-Jørgensen wrote:
>> David Ahern <dsahern@kernel•org> writes:
>>
>>> From: David Ahern <dahern@digitalocean•com>
>>>
>>> Running programs in the egress path, on skbs or xdp_frames, does not
>>> require driver specific resources like Rx path. Accordingly, the
>>> programs can be run in core code, so add xdp_egress_prog to net_device
>>> to hold a reference to an attached program.
>>>
>>> For UAPI, add IFLA_XDP_EGRESS to if_link.h to specify egress programs,
>>> add a new attach flag, XDP_ATTACHED_EGRESS_CORE, to denote the
>>> attach point is at the core level (as opposed to driver or hardware)
>>> and add IFLA_XDP_EGRESS_CORE_PROG_ID for reporting the program id.
>>>
>>> Add egress argument to do_setlink_xdp to denote processing of
>>> IFLA_XDP_EGRESS versus IFLA_XDP, and add a check that none of the
>>> existing modes (SKB, DRV or HW) are set since those modes are not
>>> valid. The expectation is that XDP_FLAGS_HW_MODE will be used later
>>> (e.g., offloading guest programs).
>>>
>>> Add rtnl_xdp_egress_fill and helpers as the egress counterpart to the
>>> existing rtnl_xdp_fill.
>>>
>>> Signed-off-by: David Ahern <dahern@digitalocean•com>
>>> ---
>>> include/linux/netdevice.h | 1 +
>>> include/uapi/linux/if_link.h | 3 +
>>> net/core/rtnetlink.c | 96 ++++++++++++++++++++++++++++--
>>> tools/include/uapi/linux/if_link.h | 3 +
>>> 4 files changed, 99 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>> index d0bb9e09660a..3133247681fd 100644
>>> --- a/include/linux/netdevice.h
>>> +++ b/include/linux/netdevice.h
>>> @@ -1995,6 +1995,7 @@ struct net_device {
>>> unsigned int real_num_rx_queues;
>>>
>>> struct bpf_prog __rcu *xdp_prog;
>>> + struct bpf_prog __rcu *xdp_egress_prog;
>>> unsigned long gro_flush_timeout;
>>> rx_handler_func_t __rcu *rx_handler;
>>> void __rcu *rx_handler_data;
>>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>>> index 127c704eeba9..b3c6cb2f0f0a 100644
>>> --- a/include/uapi/linux/if_link.h
>>> +++ b/include/uapi/linux/if_link.h
>>> @@ -170,6 +170,7 @@ enum {
>>> IFLA_PROP_LIST,
>>> IFLA_ALT_IFNAME, /* Alternative ifname */
>>> IFLA_PERM_ADDRESS,
>>> + IFLA_XDP_EGRESS, /* nested attribute with 1 or more IFLA_XDP_ attrs */
>>> __IFLA_MAX
>>> };
>>>
>>> @@ -988,6 +989,7 @@ enum {
>>> XDP_ATTACHED_SKB,
>>> XDP_ATTACHED_HW,
>>> XDP_ATTACHED_MULTI,
>>> + XDP_ATTACHED_EGRESS_CORE,
>>> };
>>>
>>> enum {
>>> @@ -1000,6 +1002,7 @@ enum {
>>> IFLA_XDP_SKB_PROG_ID,
>>> IFLA_XDP_HW_PROG_ID,
>>> IFLA_XDP_EXPECTED_FD,
>>> + IFLA_XDP_EGRESS_CORE_PROG_ID,
>>> __IFLA_XDP_MAX,
>>> };
>>>
>>> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
>>> index dc44af16226a..e9bc5cee06c8 100644
>>> --- a/net/core/rtnetlink.c
>>> +++ b/net/core/rtnetlink.c
>>> @@ -1030,7 +1030,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
>>> + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
>>> + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
>>> + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
>>> - + rtnl_xdp_size() /* IFLA_XDP */
>>> + + rtnl_xdp_size() * 2 /* IFLA_XDP and IFLA_XDP_EGRESS */
>>> + nla_total_size(4) /* IFLA_EVENT */
>>> + nla_total_size(4) /* IFLA_NEW_NETNSID */
>>> + nla_total_size(4) /* IFLA_NEW_IFINDEX */
>>> @@ -1395,6 +1395,42 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
>>> return 0;
>>> }
>>>
>>> +static u32 rtnl_xdp_egress_prog(struct net_device *dev)
>>> +{
>>> + const struct bpf_prog *prog;
>>> +
>>> + ASSERT_RTNL();
>>> +
>>> + prog = rtnl_dereference(dev->xdp_egress_prog);
>>> + if (!prog)
>>> + return 0;
>>> + return prog->aux->id;
>>> +}
>>> +
>>> +static int rtnl_xdp_egress_report(struct sk_buff *skb, struct net_device *dev,
>>> + u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
>>> + u32 (*get_prog_id)(struct net_device *dev))
>>> +{
>>> + u32 curr_id;
>>> + int err;
>>> +
>>> + curr_id = get_prog_id(dev);
>>> + if (!curr_id)
>>> + return 0;
>>> +
>>> + *prog_id = curr_id;
>>> + err = nla_put_u32(skb, attr, curr_id);
>>> + if (err)
>>> + return err;
>>> +
>>> + if (*mode != XDP_ATTACHED_NONE)
>>> + *mode = XDP_ATTACHED_MULTI;
>>> + else
>>> + *mode = tgt_mode;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static u32 rtnl_xdp_prog_skb(struct net_device *dev)
>>> {
>>> const struct bpf_prog *generic_xdp_prog;
>>> @@ -1486,6 +1522,42 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
>>> return err;
>>> }
>>>
>>> +static int rtnl_xdp_egress_fill(struct sk_buff *skb, struct net_device *dev)
>>> +{
>>> + u8 mode = XDP_ATTACHED_NONE;
>>> + struct nlattr *xdp;
>>> + u32 prog_id = 0;
>>> + int err;
>>> +
>>> + xdp = nla_nest_start_noflag(skb, IFLA_XDP_EGRESS);
>>> + if (!xdp)
>>> + return -EMSGSIZE;
>>> +
>>> + err = rtnl_xdp_egress_report(skb, dev, &prog_id, &mode,
>>> + XDP_ATTACHED_EGRESS_CORE,
>>> + IFLA_XDP_EGRESS_CORE_PROG_ID,
>>> + rtnl_xdp_egress_prog);
>>> + if (err)
>>> + goto err_cancel;
>>> +
>>> + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
>>> + if (err)
>>> + goto err_cancel;
>>> +
>>> + if (prog_id && mode != XDP_ATTACHED_MULTI) {
>>> + err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
>>> + if (err)
>>> + goto err_cancel;
>>> + }
>>> +
>>> + nla_nest_end(skb, xdp);
>>> + return 0;
>>> +
>>> +err_cancel:
>>> + nla_nest_cancel(skb, xdp);
>>> + return err;
>>> +}
>>> +
>>> static u32 rtnl_get_event(unsigned long event)
>>> {
>>> u32 rtnl_event_type = IFLA_EVENT_NONE;
>>> @@ -1743,6 +1815,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
>>> if (rtnl_xdp_fill(skb, dev))
>>> goto nla_put_failure;
>>>
>>> + if (rtnl_xdp_egress_fill(skb, dev))
>>> + goto nla_put_failure;
>>> +
>>> if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
>>> if (rtnl_link_fill(skb, dev) < 0)
>>> goto nla_put_failure;
>>> @@ -1827,6 +1902,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
>>> [IFLA_ALT_IFNAME] = { .type = NLA_STRING,
>>> .len = ALTIFNAMSIZ - 1 },
>>> [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
>>> + [IFLA_XDP_EGRESS] = { .type = NLA_NESTED },
>>> };
>>>
>>> static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
>>> @@ -2482,7 +2558,8 @@ static int do_set_master(struct net_device *dev, int ifindex,
>>> #define DO_SETLINK_NOTIFY 0x03
>>>
>>> static int do_setlink_xdp(struct net_device *dev, struct nlattr *tb,
>>> - int *status, struct netlink_ext_ack *extack)
>>> + int *status, bool egress,
>>> + struct netlink_ext_ack *extack)
>>> {
>>> struct nlattr *xdp[IFLA_XDP_MAX + 1];
>>> u32 xdp_flags = 0;
>>> @@ -2498,6 +2575,10 @@ static int do_setlink_xdp(struct net_device *dev, struct nlattr *tb,
>>>
>>> if (xdp[IFLA_XDP_FLAGS]) {
>>> xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
>>> + if (egress && xdp_flags & XDP_FLAGS_MODES) {
>>> + NL_SET_ERR_MSG(extack, "XDP_FLAGS_MODES not valid for egress");
>>> + goto out_einval;
>>> + }
>>> if (xdp_flags & ~XDP_FLAGS_MASK)
>>> goto out_einval;
>>> if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1)
>>> @@ -2515,7 +2596,7 @@ static int do_setlink_xdp(struct net_device *dev, struct nlattr *tb,
>>>
>>> err = dev_change_xdp_fd(dev, extack,
>>> nla_get_s32(xdp[IFLA_XDP_FD]),
>>> - expected_fd, xdp_flags, false);
>>> + expected_fd, xdp_flags, egress);
>>> if (err)
>>> return err;
>>>
>>> @@ -2821,7 +2902,14 @@ static int do_setlink(const struct sk_buff *skb,
>>> }
>>>
>>> if (tb[IFLA_XDP]) {
>>> - err = do_setlink_xdp(dev, tb[IFLA_XDP], &status, extack);
>>> + err = do_setlink_xdp(dev, tb[IFLA_XDP], &status, false, extack);
>>> + if (err)
>>> + goto errout;
>>> + }
>>> +
>>> + if (tb[IFLA_XDP_EGRESS]) {
>>> + err = do_setlink_xdp(dev, tb[IFLA_XDP_EGRESS], &status, true,
>>> + extack);
>>> if (err)
>>> goto errout;
>>> }
>>
>> This means that IFLA_XDP and IFLA_XDP_EGRESS can be present in the same
>> netlink message, right? But then installation of the RX program could
>> succeed, but userspace would still get an error if the egress program
>> installation fails? That is probably not good?
>
> That's a good catch.
>
>>
>> Since I don't think we can atomically make sure both operations fail or
>> succeed, maybe it's better to disallow both entries being present in the
>> same netlink message?
>>
>
> I think so since there is no way to undo the setlink for IFLA_XDP if
> IFLA_XDP_EGRESS fails - exisitng program references are gone.
Yeah, and also even if we could undo it, there would be a window where
the new program could process packets before the revert. I'm fine with
just rejecting the combination; I guess the most common thing would be
to set these separately anyway.
> Although, existing do_setlink leaves the device in an incomplete state
> on any failure...
Hmm, ideally we should fix that as well (if possible)? Probably out of
scope for this patch series, though :)
-Toke
next prev parent reply other threads:[~2020-04-21 13:28 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-20 20:00 [PATCH bpf-next 00/16] net: Add support for XDP in egress path David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 01/16] net: Refactor convert_to_xdp_frame David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 02/16] net: Move handling of IFLA_XDP attribute out of do_setlink David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 03/16] net: Add XDP setup and query commands for Tx programs David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 04/16] net: Add BPF_XDP_EGRESS as a bpf_attach_type David Ahern
2020-04-21 10:14 ` Toke Høiland-Jørgensen
2020-04-21 12:50 ` David Ahern
2020-04-21 13:25 ` Toke Høiland-Jørgensen
2020-04-21 13:49 ` David Ahern
2020-04-22 11:21 ` Toke Høiland-Jørgensen
2020-04-22 14:51 ` David Ahern
2020-04-22 15:27 ` Toke Høiland-Jørgensen
2020-04-22 15:33 ` David Ahern
2020-04-22 15:51 ` Toke Høiland-Jørgensen
2020-04-22 15:56 ` David Ahern
2020-04-23 15:23 ` Toke Høiland-Jørgensen
2020-04-23 0:39 ` Alexei Starovoitov
2020-04-23 16:40 ` Toke Høiland-Jørgensen
2020-04-23 16:52 ` Alexei Starovoitov
2020-04-23 17:05 ` Toke Høiland-Jørgensen
2020-04-23 22:44 ` Alexei Starovoitov
2020-04-23 23:49 ` Toke Høiland-Jørgensen
2020-04-24 0:53 ` Alexei Starovoitov
2020-04-24 0:58 ` David Ahern
2020-04-24 8:55 ` Toke Høiland-Jørgensen
2020-04-20 20:00 ` [PATCH bpf-next 05/16] xdp: Add xdp_txq_info to xdp_buff David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 06/16] net: Add IFLA_XDP_EGRESS for XDP programs in the egress path David Ahern
2020-04-21 10:17 ` Toke Høiland-Jørgensen
2020-04-21 12:59 ` David Ahern
2020-04-21 13:27 ` Toke Høiland-Jørgensen [this message]
2020-04-20 20:00 ` [PATCH bpf-next 07/16] net: Rename do_xdp_generic to do_xdp_generic_rx David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 08/16] net: rename netif_receive_generic_xdp to do_generic_xdp_core David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 09/16] net: set XDP egress program on netdevice David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 10/16] net: Support xdp in the Tx path for packets as an skb David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 11/16] net: Support xdp in the Tx path for xdp_frames David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 12/16] libbpf: Add egress XDP support David Ahern
2020-04-21 10:20 ` Toke Høiland-Jørgensen
2020-04-21 13:03 ` David Ahern
2020-04-21 13:28 ` Toke Høiland-Jørgensen
2020-04-23 1:19 ` Andrii Nakryiko
2020-04-23 1:33 ` David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 13/16] bpftool: Add support for XDP egress David Ahern
2020-04-23 10:43 ` Quentin Monnet
2020-04-23 18:50 ` David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 14/16] selftest: Add test for xdp_egress David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 15/16] selftest: Add xdp_egress attach tests David Ahern
2020-04-20 20:00 ` [PATCH bpf-next 16/16] samples/bpf: add XDP egress support to xdp1 David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87mu757yub.fsf@toke.dk \
--to=toke@redhat$(echo .)com \
--cc=andriin@fb$(echo .)com \
--cc=ast@kernel$(echo .)org \
--cc=brouer@redhat$(echo .)com \
--cc=dahern@digitalocean$(echo .)com \
--cc=daniel@iogearbox$(echo .)net \
--cc=davem@davemloft$(echo .)net \
--cc=dsahern@gmail$(echo .)com \
--cc=dsahern@kernel$(echo .)org \
--cc=jasowang@redhat$(echo .)com \
--cc=john.fastabend@gmail$(echo .)com \
--cc=kafai@fb$(echo .)com \
--cc=kuba@kernel$(echo .)org \
--cc=netdev@vger$(echo .)kernel.org \
--cc=prashantbhole.linux@gmail$(echo .)com \
--cc=songliubraving@fb$(echo .)com \
--cc=toshiaki.makita1@gmail$(echo .)com \
--cc=yhs@fb$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox