Re: [net-next V5 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP

public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed

From: Daniel Borkmann <daniel@iogearbox•net>
To: Jesper Dangaard Brouer <brouer@redhat•com>, netdev@vger•kernel.org
Cc: jakub.kicinski@netronome•com,
	"Michael S. Tsirkin" <mst@redhat•com>,
	pavel.odintsov@gmail•com, Jason Wang <jasowang@redhat•com>,
	mchan@broadcom•com, John Fastabend <john.fastabend@gmail•com>,
	peter.waskiewicz.jr@intel•com,
	Daniel Borkmann <borkmann@iogearbox•net>,
	Alexei Starovoitov <alexei.starovoitov@gmail•com>,
	Andy Gospodarek <andy@greyhouse•net>
Subject: Re: [net-next V5 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP
Date: Mon, 09 Oct 2017 15:31:21 +0200	[thread overview]
Message-ID: <59DB7A29.5050906@iogearbox.net> (raw)
In-Reply-To: <150730636196.22839.17119032803741721925.stgit@firesoul>

On 10/06/2017 06:12 PM, Jesper Dangaard Brouer wrote:
[...]
> +static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
> +{
> +	struct bpf_cpu_map *cmap;
> +	int err = -ENOMEM;

err init here is basically not needed since overriden later anyway
w/o being read, but ...

> +	u64 cost;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return ERR_PTR(-EPERM);
> +
> +	/* check sanity of attributes */
> +	if (attr->max_entries == 0 || attr->key_size != 4 ||
> +	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
> +		return ERR_PTR(-EINVAL);
> +
> +	cmap = kzalloc(sizeof(*cmap), GFP_USER);
> +	if (!cmap)
> +		return ERR_PTR(-ENOMEM);
> +
> +	/* mandatory map attributes */
> +	cmap->map.map_type = attr->map_type;
> +	cmap->map.key_size = attr->key_size;
> +	cmap->map.value_size = attr->value_size;
> +	cmap->map.max_entries = attr->max_entries;
> +	cmap->map.map_flags = attr->map_flags;
> +	cmap->map.numa_node = bpf_map_attr_numa_node(attr);
> +
> +	/* Pre-limit array size based on NR_CPUS, not final CPU check */
> +	if (cmap->map.max_entries > NR_CPUS)

Nit: needs to be >= NR_CPUS.

> +		return ERR_PTR(-E2BIG);
> +
> +	/* make sure page count doesn't overflow */
> +	cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
> +	cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
> +	if (cost >= U32_MAX - PAGE_SIZE)
> +		goto free_cmap;
> +	cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
> +
> +	/* Notice returns -EPERM on if map size is larger than memlock limit */
> +	err = bpf_map_precharge_memlock(cmap->map.pages);
> +	if (err)
> +		goto free_cmap;

... here, you need to set err = -ENOMEM.

> +	/* A per cpu bitfield with a bit per possible CPU in map  */
> +	cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
> +					    __alignof__(unsigned long));
> +	if (!cmap->flush_needed)
> +		goto free_cmap;

Otherwise when we fail here or in error case for bpf_map_area_alloc()
below, we still return 0 although it's really -ENOMEM. And returning 0,
would mean that find_and_alloc_map() will miss this since it only tests
for IS_ERR(), and we'll crash later on thinking we have a valid map
pointer.

> +	/* Alloc array for possible remote "destination" CPUs */
> +	cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
> +					   sizeof(struct bpf_cpu_map_entry *),
> +					   cmap->map.numa_node);
> +	if (!cmap->cpu_map)
> +		goto free_cmap;
> +
> +	return &cmap->map;
> +free_cmap:
> +	free_percpu(cmap->flush_needed);
> +	kfree(cmap);
> +	return ERR_PTR(err);
> +}
> +
[...]
> +int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
> +				u64 map_flags)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	struct bpf_cpu_map_entry *rcpu;
> +
> +	/* Array index key correspond to CPU number */
> +	u32 key_cpu = *(u32 *)key;
> +	/* Value is the queue size */
> +	u32 qsize = *(u32 *)value;
> +
> +	/* Make sure CPU is a valid possible cpu */
> +	if (!cpu_possible(key_cpu))
> +		return -ENODEV;

Nit: cpu_possible() expects that key_cpu < NR_CPUS, otherwise you'd
access the bitmap out of bounds.

Better move the below test for 'key_cpu >= cmap->map.max_entries'
first as on map alloc you enforce upper limit of NR_CPUS on the
max_entries, then above cpu_possible() test will be valid, too.

> +	if (unlikely(map_flags > BPF_EXIST))
> +		return -EINVAL;
> +	if (unlikely(key_cpu >= cmap->map.max_entries))
> +		return -E2BIG;
> +	if (unlikely(map_flags == BPF_NOEXIST))
> +		return -EEXIST;
> +	if (unlikely(qsize > 16384)) /* sanity limit on qsize */
> +		return -EOVERFLOW;
> +
> +	if (qsize == 0) {
> +		rcpu = NULL; /* Same as deleting */
> +	} else {
> +		/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
> +		rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
> +		if (!rcpu)
> +			return -ENOMEM;
> +	}
> +	rcu_read_lock();
> +	__cpu_map_entry_replace(cmap, key_cpu, rcpu);
> +	rcu_read_unlock();
> +	return 0;
> +}
[...]
> +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	struct bpf_cpu_map_entry *rcpu;
> +
> +	if (key >= map->max_entries)
> +		return NULL;
> +
> +	rcpu = READ_ONCE(cmap->cpu_map[key]);
> +	return rcpu;
> +}
> +
> +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_cpu_map_entry *rcpu =
> +		__cpu_map_lookup_elem(map, *(u32 *)key);
> +
> +	return rcpu ? &rcpu->qsize : NULL;

I still think from my prior email/comment that we should use per-cpu
scratch buffer here. Would be nice to keep the guarantee that noone
can modify it, it's just a tiny change.

> +}
> +
> +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	u32 index = key ? *(u32 *)key : U32_MAX;
> +	u32 *next = next_key;
> +
> +	if (index >= cmap->map.max_entries) {
> +		*next = 0;
> +		return 0;
> +	}
> +
> +	if (index == cmap->map.max_entries - 1)
> +		return -ENOENT;
> +	*next = index + 1;
> +	return 0;
> +}
> +
> +const struct bpf_map_ops cpu_map_ops = {
> +	.map_alloc		= cpu_map_alloc,
> +	.map_free		= cpu_map_free,
> +	.map_delete_elem	= cpu_map_delete_elem,
> +	.map_update_elem	= cpu_map_update_elem,
> +	.map_lookup_elem	= cpu_map_lookup_elem,
> +	.map_get_next_key	= cpu_map_get_next_key,
> +};

next prev parent reply	other threads:[~2017-10-09 13:31 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-06 16:12 [net-next V5 PATCH 0/5] New bpf cpumap type for XDP_REDIRECT Jesper Dangaard Brouer
2017-10-06 16:12 ` [net-next V5 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP Jesper Dangaard Brouer
2017-10-09 13:31   ` Daniel Borkmann [this message]
2017-10-09 13:40     ` Daniel Borkmann
2017-10-09 17:59     ` Jesper Dangaard Brouer
2017-10-09 20:56       ` Daniel Borkmann
2017-10-06 16:12 ` [net-next V5 PATCH 2/5] bpf: XDP_REDIRECT enable use of cpumap Jesper Dangaard Brouer
2017-10-06 16:12 ` [net-next V5 PATCH 3/5] bpf: cpumap xdp_buff to skb conversion and allocation Jesper Dangaard Brouer
2017-10-06 16:12 ` [net-next V5 PATCH 4/5] bpf: cpumap add tracepoints Jesper Dangaard Brouer
2017-10-06 16:13 ` [net-next V5 PATCH 5/5] samples/bpf: add cpumap sample program xdp_redirect_cpu Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=59DB7A29.5050906@iogearbox.net \
    --to=daniel@iogearbox$(echo .)net \
    --cc=alexei.starovoitov@gmail$(echo .)com \
    --cc=andy@greyhouse$(echo .)net \
    --cc=borkmann@iogearbox$(echo .)net \
    --cc=brouer@redhat$(echo .)com \
    --cc=jakub.kicinski@netronome$(echo .)com \
    --cc=jasowang@redhat$(echo .)com \
    --cc=john.fastabend@gmail$(echo .)com \
    --cc=mchan@broadcom$(echo .)com \
    --cc=mst@redhat$(echo .)com \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=pavel.odintsov@gmail$(echo .)com \
    --cc=peter.waskiewicz.jr@intel$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox