From: Vlad Buslov <vladbu@nvidia•com>
To: Victor Nogueira <victor@mojatatu•com>
Cc: <jhs@mojatatu•com>, <xiyou.wangcong@gmail•com>,
<jiri@resnulli•us>, <davem@davemloft•net>, <edumazet@google•com>,
<kuba@kernel•org>, <pabeni@redhat•com>, <netdev@vger•kernel.org>,
<mleitner@redhat•com>, <horms@kernel•org>,
<pctammela@mojatatu•com>, <kernel@mojatatu•com>
Subject: Re: [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra
Date: Mon, 21 Aug 2023 22:12:51 +0300 [thread overview]
Message-ID: <871qfw6w8d.fsf@nvidia.com> (raw)
In-Reply-To: <20230819163515.2266246-2-victor@mojatatu.com>
On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu•com> wrote:
> The tc block is a collection of netdevs/ports which allow qdiscs to share
> filter block instances (as opposed to the traditional tc filter per port).
> Example:
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
>
> Now we can add a filter using the block index:
> $ tc filter add block 22 protocol ip pref 25 \
> flower dst_ip 192.168.0.0/16 action drop
>
> Up to this point, the block is unaware of its ports. This patch fixes that
> and makes the tc block ports available to the datapath as well as control
> path on offloading.
>
> Suggested-by: Jiri Pirko <jiri@nvidia•com>
> Co-developed-by: Jamal Hadi Salim <jhs@mojatatu•com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu•com>
> Co-developed-by: Pedro Tammela <pctammela@mojatatu•com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu•com>
> Signed-off-by: Victor Nogueira <victor@mojatatu•com>
> ---
> include/net/sch_generic.h | 4 ++
> net/sched/cls_api.c | 1 +
> net/sched/sch_api.c | 79 +++++++++++++++++++++++++++++++++++++--
> net/sched/sch_generic.c | 34 ++++++++++++++++-
> 4 files changed, 112 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index e92f73bb3198..824a0ecb5afc 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -19,6 +19,7 @@
> #include <net/gen_stats.h>
> #include <net/rtnetlink.h>
> #include <net/flow_offload.h>
> +#include <linux/xarray.h>
>
> struct Qdisc_ops;
> struct qdisc_walker;
> @@ -126,6 +127,8 @@ struct Qdisc {
>
> struct rcu_head rcu;
> netdevice_tracker dev_tracker;
> + netdevice_tracker in_block_tracker;
> + netdevice_tracker eg_block_tracker;
> /* private data */
> long privdata[] ____cacheline_aligned;
> };
> @@ -458,6 +461,7 @@ struct tcf_chain {
> };
>
> struct tcf_block {
> + struct xarray ports; /* datapath accessible */
> /* Lock protects tcf_block and lifetime-management data of chains
> * attached to the block (refcnt, action_refcnt, explicitly_created).
> */
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index a193cc7b3241..a976792ef02f 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> refcount_set(&block->refcnt, 1);
> block->net = net;
> block->index = block_index;
> + xa_init(&block->ports);
Missing dual call to xa_destroy() for this.
>
> /* Don't store q pointer for blocks which are shared */
> if (!tcf_block_shared(block))
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index aa6b1fe65151..6c0c220cdb21 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> return 0;
> }
>
> +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> +{
> + if (tca[TCA_INGRESS_BLOCK])
> + sch->ops->ingress_block_set(sch, 0);
> +
> + if (tca[TCA_EGRESS_BLOCK])
> + sch->ops->egress_block_set(sch, 0);
> +}
> +
> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> + struct nlattr **tca,
> + struct netlink_ext_ack *extack)
> +{
> + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> + struct tcf_block *in_block = NULL;
> + struct tcf_block *eg_block = NULL;
> + unsigned long cl = 0;
> + int err;
> +
> + if (tca[TCA_INGRESS_BLOCK]) {
> + /* works for both ingress and clsact */
> + cl = TC_H_MIN_INGRESS;
> + in_block = cl_ops->tcf_block(sch, cl, NULL);
> + if (!in_block) {
> + NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> + return -EINVAL;
> + }
> +
> + err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> + return err;
> + }
> +
> + netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> + }
> +
> + if (tca[TCA_EGRESS_BLOCK]) {
> + cl = TC_H_MIN_EGRESS;
> + eg_block = cl_ops->tcf_block(sch, cl, NULL);
> + if (!eg_block) {
> + NL_SET_ERR_MSG(extack, "Shared egress block missing");
> + err = -EINVAL;
> + goto err_out;
> + }
> +
> + err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + netdev_put(dev, &sch->eg_block_tracker);
> + NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> + goto err_out;
> + }
> + netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> + }
> +
> + return 0;
> +err_out:
> + if (in_block) {
> + xa_erase(&in_block->ports, dev->ifindex);
> + netdev_put(dev, &sch->in_block_tracker);
> + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> + }
> + return err;
> +}
> +
> static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> struct netlink_ext_ack *extack)
> {
> @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> sch = qdisc_alloc(dev_queue, ops, extack);
> if (IS_ERR(sch)) {
> err = PTR_ERR(sch);
> - goto err_out2;
> + goto err_out1;
> }
>
> sch->parent = parent;
> @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> if (handle == 0) {
> NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
> err = -ENOSPC;
> - goto err_out3;
> + goto err_out2;
> }
> }
> if (!netif_is_multiqueue(dev))
> @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>
> err = qdisc_block_indexes_set(sch, tca, extack);
> if (err)
> - goto err_out3;
> + goto err_out2;
>
> if (tca[TCA_STAB]) {
> stab = qdisc_get_stab(tca[TCA_STAB], extack);
> @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> qdisc_hash_add(sch, false);
> trace_qdisc_create(ops, dev, parent);
>
> + err = qdisc_block_add_dev(sch, dev, tca, extack);
> + if (err)
> + goto err_out4;
> +
> return sch;
>
> err_out4:
> @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> ops->destroy(sch);
> qdisc_put_stab(rtnl_dereference(sch->stab));
> err_out3:
> + qdisc_block_undo_set(sch, tca);
Is this a bugfix? This new call is for all sites that jump to
err_out{3|4} even though you only added new code to the end of the
function.
> +err_out2:
> netdev_put(dev, &sch->dev_tracker);
> qdisc_free(sch);
> -err_out2:
> +err_out1:
> module_put(ops->owner);
> err_out:
> *errp = err;
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 5d7e23f4cc0e..0fb51fd6f01e 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
>
> static void __qdisc_destroy(struct Qdisc *qdisc)
> {
> - const struct Qdisc_ops *ops = qdisc->ops;
> + struct net_device *dev = qdisc_dev(qdisc);
> + const struct Qdisc_ops *ops = qdisc->ops;
> + const struct Qdisc_class_ops *cops;
> + struct tcf_block *block;
> + unsigned long cl;
> + u32 block_index;
>
> #ifdef CONFIG_NET_SCHED
> qdisc_hash_del(qdisc);
> @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>
> qdisc_reset(qdisc);
>
> + cops = ops->cl_ops;
> + if (ops->ingress_block_get) {
> + block_index = ops->ingress_block_get(qdisc);
> + if (block_index) {
> + cl = TC_H_MIN_INGRESS;
> + block = cops->tcf_block(qdisc, cl, NULL);
> + if (block) {
> + if (xa_erase(&block->ports, dev->ifindex))
> + netdev_put(dev, &qdisc->in_block_tracker);
> + }
> + }
> + }
> +
> + if (ops->egress_block_get) {
> + block_index = ops->egress_block_get(qdisc);
> + if (block_index) {
> + cl = TC_H_MIN_EGRESS;
> + block = cops->tcf_block(qdisc, cl, NULL);
> + if (block) {
> + if (xa_erase(&block->ports, dev->ifindex))
> + netdev_put(dev, &qdisc->eg_block_tracker);
> + }
> + }
> + }
> +
> if (ops->destroy)
> ops->destroy(qdisc);
>
> module_put(ops->owner);
> - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> + netdev_put(dev, &qdisc->dev_tracker);
>
> trace_qdisc_destroy(qdisc);
next prev parent reply other threads:[~2023-08-21 19:18 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-19 16:35 [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Victor Nogueira
2023-08-19 16:35 ` [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
2023-08-21 19:12 ` Vlad Buslov [this message]
2023-08-24 14:05 ` Jamal Hadi Salim
2023-08-19 16:35 ` [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath Victor Nogueira
2023-08-23 17:33 ` Marcelo Ricardo Leitner
2023-08-24 14:09 ` Jamal Hadi Salim
2023-08-19 16:35 ` [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action Victor Nogueira
2023-08-23 17:58 ` Marcelo Ricardo Leitner
2023-08-24 14:19 ` Jamal Hadi Salim
2023-08-24 14:30 ` Weird sparse error WAS( " Jamal Hadi Salim
2023-08-24 14:41 ` Paolo Abeni
2023-08-24 14:57 ` Jamal Hadi Salim
2023-09-05 9:18 ` Dan Carpenter
2023-08-21 19:07 ` [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Vlad Buslov
2023-08-24 13:47 ` Jamal Hadi Salim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=871qfw6w8d.fsf@nvidia.com \
--to=vladbu@nvidia$(echo .)com \
--cc=davem@davemloft$(echo .)net \
--cc=edumazet@google$(echo .)com \
--cc=horms@kernel$(echo .)org \
--cc=jhs@mojatatu$(echo .)com \
--cc=jiri@resnulli$(echo .)us \
--cc=kernel@mojatatu$(echo .)com \
--cc=kuba@kernel$(echo .)org \
--cc=mleitner@redhat$(echo .)com \
--cc=netdev@vger$(echo .)kernel.org \
--cc=pabeni@redhat$(echo .)com \
--cc=pctammela@mojatatu$(echo .)com \
--cc=victor@mojatatu$(echo .)com \
--cc=xiyou.wangcong@gmail$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox