public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: John Fastabend <john.r.fastabend@intel•com>
To: Neil Horman <nhorman@tuxdriver•com>
Cc: "netdev@vger•kernel.org" <netdev@vger•kernel.org>,
	"Love, Robert W" <robert.w.love@intel•com>,
	"David S. Miller" <davem@davemloft•net>
Subject: Re: [PATCH 1/2] net: add network priority cgroup infrastructure (v2)
Date: Thu, 17 Nov 2011 19:17:28 -0800	[thread overview]
Message-ID: <4EC5CE48.8020603@intel.com> (raw)
In-Reply-To: <1321566472-28969-2-git-send-email-nhorman@tuxdriver.com>

On 11/17/2011 1:47 PM, Neil Horman wrote:
> This patch adds in the infrastructure code to create the network priority
> cgroup.  The cgroup, in addition to the standard processes file creates two
> control files:
> 
> 1) prioidx - This is a read-only file that exports the index of this cgroup.
> This is a value that is both arbitrary and unique to a cgroup in this subsystem,
> and is used to index the per-device priority map
> 
> 2) priomap - This is a writeable file.  On read it reports a table of 2-tuples
> <name:priority> where name is the name of a network interface and priority is
> indicates the priority assigned to frames egresessing on the named interface and
> originating from a pid in this cgroup
> 
> This cgroup allows for skb priority to be set prior to a root qdisc getting
> selected. This is benenficial for DCB enabled systems, in that it allows for any
> application to use dcb configured priorities so without application modification
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver•com>
> Signed-off-by: John Fastabend <john.r.fastabend@intel•com>
> CC: Robert Love <robert.w.love@intel•com>
> CC: "David S. Miller" <davem@davemloft•net>
> ---

one more nit... can we convert the rcu_dereference() into rtnl_dereference()
where it is relevant?

/**
 * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
 * @p: The pointer to read, prior to dereferencing
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
 * caller holds RTNL.
 */
#define rtnl_dereference(p)                                     \
        rcu_dereference_protected(p, lockdep_rtnl_is_held())

[...]

> +
> +static void extend_netdev_table(struct net_device *dev, u32 new_len)
> +{
> +       size_t new_size = sizeof(struct netprio_map) +
> +                          ((sizeof(u32) * new_len));
> +       struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
> +       struct netprio_map *old_priomap;
> +       int i;
> +
> +       old_priomap  = rcu_dereference(dev->priomap);
> +

This could be rtnl_dereference(dev->priomap) to annotate that we always
have the rtnl lock here.

> +       if (!new_priomap) {
> +               printk(KERN_WARNING "Unable to alloc new priomap!\n");
> +               return;
> +       }
> +
> +       for (i = 0;
> +            old_priomap && (i < old_priomap->priomap_len);
> +            i++)
> +               new_priomap->priomap[i] = old_priomap->priomap[i];
> +
> +       new_priomap->priomap_len = new_len;
> +
> +       rcu_assign_pointer(dev->priomap, new_priomap);
> +       if (old_priomap)
> +               kfree_rcu(old_priomap, rcu);
> +}
> +
> +static void update_netdev_tables(void)
> +{
> +       struct net_device *dev;
> +       u32 max_len = atomic_read(&max_prioidx);
> +       struct netprio_map *map;
> +
> +       rtnl_lock();
          ^^^^^^^^^^^
> +
> +
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

same here rtnl_dereference(dev->priomap);

> +               if ((!map) ||
> +                   (map->priomap_len < max_len))
> +                       extend_netdev_table(dev, max_len);
> +       }
> +
> +       rtnl_unlock();
> +}
> +
> +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
> +                                                struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       int ret;
> +
> +       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
> +       if (!cs)
> +               return ERR_PTR(-ENOMEM);
> +
> +       if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
> +               kfree(cs);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       ret = get_prioidx(&cs->prioidx);
> +       if (ret != 0) {
> +               printk(KERN_WARNING "No space in priority index array\n");
> +               kfree(cs);
> +               return ERR_PTR(ret);
> +       }
> +
> +       return &cs->css;
> +}
> +
> +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       struct net_device *dev;
> +       struct netprio_map *map;
> +
> +       cs = cgrp_netprio_state(cgrp);
> +       rtnl_lock();
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

map = rtnl_dereference(dev->priomap)

> +               if (map)
> +                       map->priomap[cs->prioidx] = 0;
> +       }
> +       rtnl_unlock();
> +       put_prioidx(cs->prioidx);
> +       kfree(cs);
> +}
> +
> +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
> +{
> +       return (u64)cgrp_netprio_state(cgrp)->prioidx;
> +}
> +
> +static int read_priomap(struct cgroup *cont, struct cftype *cft,
> +                       struct cgroup_map_cb *cb)
> +{
> +       struct net_device *dev;
> +       u32 prioidx = cgrp_netprio_state(cont)->prioidx;
> +       u32 priority;
> +       struct netprio_map *map;
> +
> +       rcu_read_lock();
> +
> +       for_each_netdev_rcu(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);
> +               priority = map ? map->priomap[prioidx] : 0;
> +               cb->fill(cb, dev->name, priority);
> +       }
> +       rcu_read_unlock();
> +       return 0;
> +}
> +

[...]

> +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
> +}
> +
> +static int netprio_device_event(struct notifier_block *unused,
> +                               unsigned long event, void *ptr)
> +{
> +       struct net_device *dev = ptr;
> +       struct netprio_map *old;
> +       u32 max_len = atomic_read(&max_prioidx);
> +
> +       old = rcu_dereference_protected(dev->priomap, 1);

This is protected because of the rtnl lock so use,

old = rtnl_dereference(dev->priomap);

> +       /*
> +        * Note this is called with rtnl_lock held so we have update side
> +        * protection on our rcu assignments
> +        */
> +
> +       switch (event) {
> +
> +       case NETDEV_REGISTER:
> +               if (max_len)
> +                       extend_netdev_table(dev, max_len);
> +               break;
> +       case NETDEV_UNREGISTER:
> +               rcu_assign_pointer(dev->priomap, NULL);
> +               if (old)
> +                       kfree_rcu(old, rcu);
> +               break;
> +       }
> +       return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block netprio_device_notifier = {
> +       .notifier_call = netprio_device_event
> +};
> +

I can roll an update if you want, just let me know.

Thanks,
John

  reply	other threads:[~2011-11-18  3:17 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-16 20:51 [PATCH 0/2] net: Add network priority cgroup Neil Horman
2011-11-16 20:51 ` [PATCH 1/2] net: add network priority cgroup infrastructure Neil Horman
2011-11-17  9:29   ` WANG Cong
2011-11-17 11:25   ` John Fastabend
2011-11-17 11:56     ` Neil Horman
2011-11-16 20:51 ` [PATCH 2/2] net: add documentation for net_prio cgroups Neil Horman
2011-11-17 21:47 ` [PATCH 0/2] net: Add network priority cgroup (v2) Neil Horman
2011-11-17 21:47   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v2) Neil Horman
2011-11-18  3:17     ` John Fastabend [this message]
2011-11-18 11:50       ` Neil Horman
2011-11-17 21:47   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v2) Neil Horman
2011-11-18 16:13 ` [PATCH 0/2] net: Add network priority cgroup (v3) Neil Horman
2011-11-18 16:13   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v3) Neil Horman
2011-11-21 20:39     ` David Miller
2011-11-21 20:43       ` Neil Horman
2011-11-18 16:13   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v3) Neil Horman
2011-11-22 15:10 ` [PATCH 0/2] net: Add network priority cgroup (v4) Neil Horman
2011-11-22 15:10   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v4) Neil Horman
2011-11-22 15:10   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v4) Neil Horman
2011-11-22 20:23   ` [PATCH 0/2] net: Add network priority cgroup (v4) David Miller
2011-11-22 20:39     ` Neil Horman
2011-11-22 20:45       ` David Miller
2011-11-22 21:00         ` Neil Horman
2011-11-23 10:19           ` Kirill Smelkov
2011-11-23 11:49             ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4EC5CE48.8020603@intel.com \
    --to=john.r.fastabend@intel$(echo .)com \
    --cc=davem@davemloft$(echo .)net \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=nhorman@tuxdriver$(echo .)com \
    --cc=robert.w.love@intel$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox