From: John Fastabend <john.r.fastabend@intel•com>
To: Neil Horman <nhorman@tuxdriver•com>
Cc: "netdev@vger•kernel.org" <netdev@vger•kernel.org>,
"Love, Robert W" <robert.w.love@intel•com>,
"David S. Miller" <davem@davemloft•net>
Subject: Re: [PATCH 1/2] net: add network priority cgroup infrastructure (v2)
Date: Thu, 17 Nov 2011 19:17:28 -0800 [thread overview]
Message-ID: <4EC5CE48.8020603@intel.com> (raw)
In-Reply-To: <1321566472-28969-2-git-send-email-nhorman@tuxdriver.com>
On 11/17/2011 1:47 PM, Neil Horman wrote:
> This patch adds in the infrastructure code to create the network priority
> cgroup. The cgroup, in addition to the standard processes file creates two
> control files:
>
> 1) prioidx - This is a read-only file that exports the index of this cgroup.
> This is a value that is both arbitrary and unique to a cgroup in this subsystem,
> and is used to index the per-device priority map
>
> 2) priomap - This is a writeable file. On read it reports a table of 2-tuples
> <name:priority> where name is the name of a network interface and priority is
> indicates the priority assigned to frames egresessing on the named interface and
> originating from a pid in this cgroup
>
> This cgroup allows for skb priority to be set prior to a root qdisc getting
> selected. This is benenficial for DCB enabled systems, in that it allows for any
> application to use dcb configured priorities so without application modification
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver•com>
> Signed-off-by: John Fastabend <john.r.fastabend@intel•com>
> CC: Robert Love <robert.w.love@intel•com>
> CC: "David S. Miller" <davem@davemloft•net>
> ---
one more nit... can we convert the rcu_dereference() into rtnl_dereference()
where it is relevant?
/**
* rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
* @p: The pointer to read, prior to dereferencing
*
* Return the value of the specified RCU-protected pointer, but omit
* both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
* caller holds RTNL.
*/
#define rtnl_dereference(p) \
rcu_dereference_protected(p, lockdep_rtnl_is_held())
[...]
> +
> +static void extend_netdev_table(struct net_device *dev, u32 new_len)
> +{
> + size_t new_size = sizeof(struct netprio_map) +
> + ((sizeof(u32) * new_len));
> + struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
> + struct netprio_map *old_priomap;
> + int i;
> +
> + old_priomap = rcu_dereference(dev->priomap);
> +
This could be rtnl_dereference(dev->priomap) to annotate that we always
have the rtnl lock here.
> + if (!new_priomap) {
> + printk(KERN_WARNING "Unable to alloc new priomap!\n");
> + return;
> + }
> +
> + for (i = 0;
> + old_priomap && (i < old_priomap->priomap_len);
> + i++)
> + new_priomap->priomap[i] = old_priomap->priomap[i];
> +
> + new_priomap->priomap_len = new_len;
> +
> + rcu_assign_pointer(dev->priomap, new_priomap);
> + if (old_priomap)
> + kfree_rcu(old_priomap, rcu);
> +}
> +
> +static void update_netdev_tables(void)
> +{
> + struct net_device *dev;
> + u32 max_len = atomic_read(&max_prioidx);
> + struct netprio_map *map;
> +
> + rtnl_lock();
^^^^^^^^^^^
> +
> +
> + for_each_netdev(&init_net, dev) {
> + map = rcu_dereference(dev->priomap);
same here rtnl_dereference(dev->priomap);
> + if ((!map) ||
> + (map->priomap_len < max_len))
> + extend_netdev_table(dev, max_len);
> + }
> +
> + rtnl_unlock();
> +}
> +
> +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
> + struct cgroup *cgrp)
> +{
> + struct cgroup_netprio_state *cs;
> + int ret;
> +
> + cs = kzalloc(sizeof(*cs), GFP_KERNEL);
> + if (!cs)
> + return ERR_PTR(-ENOMEM);
> +
> + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
> + kfree(cs);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + ret = get_prioidx(&cs->prioidx);
> + if (ret != 0) {
> + printk(KERN_WARNING "No space in priority index array\n");
> + kfree(cs);
> + return ERR_PTR(ret);
> + }
> +
> + return &cs->css;
> +}
> +
> +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> + struct cgroup_netprio_state *cs;
> + struct net_device *dev;
> + struct netprio_map *map;
> +
> + cs = cgrp_netprio_state(cgrp);
> + rtnl_lock();
> + for_each_netdev(&init_net, dev) {
> + map = rcu_dereference(dev->priomap);
map = rtnl_dereference(dev->priomap)
> + if (map)
> + map->priomap[cs->prioidx] = 0;
> + }
> + rtnl_unlock();
> + put_prioidx(cs->prioidx);
> + kfree(cs);
> +}
> +
> +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
> +{
> + return (u64)cgrp_netprio_state(cgrp)->prioidx;
> +}
> +
> +static int read_priomap(struct cgroup *cont, struct cftype *cft,
> + struct cgroup_map_cb *cb)
> +{
> + struct net_device *dev;
> + u32 prioidx = cgrp_netprio_state(cont)->prioidx;
> + u32 priority;
> + struct netprio_map *map;
> +
> + rcu_read_lock();
> +
> + for_each_netdev_rcu(&init_net, dev) {
> + map = rcu_dereference(dev->priomap);
> + priority = map ? map->priomap[prioidx] : 0;
> + cb->fill(cb, dev->name, priority);
> + }
> + rcu_read_unlock();
> + return 0;
> +}
> +
[...]
> +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
> +}
> +
> +static int netprio_device_event(struct notifier_block *unused,
> + unsigned long event, void *ptr)
> +{
> + struct net_device *dev = ptr;
> + struct netprio_map *old;
> + u32 max_len = atomic_read(&max_prioidx);
> +
> + old = rcu_dereference_protected(dev->priomap, 1);
This is protected because of the rtnl lock so use,
old = rtnl_dereference(dev->priomap);
> + /*
> + * Note this is called with rtnl_lock held so we have update side
> + * protection on our rcu assignments
> + */
> +
> + switch (event) {
> +
> + case NETDEV_REGISTER:
> + if (max_len)
> + extend_netdev_table(dev, max_len);
> + break;
> + case NETDEV_UNREGISTER:
> + rcu_assign_pointer(dev->priomap, NULL);
> + if (old)
> + kfree_rcu(old, rcu);
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block netprio_device_notifier = {
> + .notifier_call = netprio_device_event
> +};
> +
I can roll an update if you want, just let me know.
Thanks,
John
next prev parent reply other threads:[~2011-11-18 3:17 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-16 20:51 [PATCH 0/2] net: Add network priority cgroup Neil Horman
2011-11-16 20:51 ` [PATCH 1/2] net: add network priority cgroup infrastructure Neil Horman
2011-11-17 9:29 ` WANG Cong
2011-11-17 11:25 ` John Fastabend
2011-11-17 11:56 ` Neil Horman
2011-11-16 20:51 ` [PATCH 2/2] net: add documentation for net_prio cgroups Neil Horman
2011-11-17 21:47 ` [PATCH 0/2] net: Add network priority cgroup (v2) Neil Horman
2011-11-17 21:47 ` [PATCH 1/2] net: add network priority cgroup infrastructure (v2) Neil Horman
2011-11-18 3:17 ` John Fastabend [this message]
2011-11-18 11:50 ` Neil Horman
2011-11-17 21:47 ` [PATCH 2/2] net: add documentation for net_prio cgroups (v2) Neil Horman
2011-11-18 16:13 ` [PATCH 0/2] net: Add network priority cgroup (v3) Neil Horman
2011-11-18 16:13 ` [PATCH 1/2] net: add network priority cgroup infrastructure (v3) Neil Horman
2011-11-21 20:39 ` David Miller
2011-11-21 20:43 ` Neil Horman
2011-11-18 16:13 ` [PATCH 2/2] net: add documentation for net_prio cgroups (v3) Neil Horman
2011-11-22 15:10 ` [PATCH 0/2] net: Add network priority cgroup (v4) Neil Horman
2011-11-22 15:10 ` [PATCH 1/2] net: add network priority cgroup infrastructure (v4) Neil Horman
2011-11-22 15:10 ` [PATCH 2/2] net: add documentation for net_prio cgroups (v4) Neil Horman
2011-11-22 20:23 ` [PATCH 0/2] net: Add network priority cgroup (v4) David Miller
2011-11-22 20:39 ` Neil Horman
2011-11-22 20:45 ` David Miller
2011-11-22 21:00 ` Neil Horman
2011-11-23 10:19 ` Kirill Smelkov
2011-11-23 11:49 ` Neil Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4EC5CE48.8020603@intel.com \
--to=john.r.fastabend@intel$(echo .)com \
--cc=davem@davemloft$(echo .)net \
--cc=netdev@vger$(echo .)kernel.org \
--cc=nhorman@tuxdriver$(echo .)com \
--cc=robert.w.love@intel$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox