public inbox for linux-arm-kernel@lists.infradead.org 
 help / color / mirror / Atom feed
From: "Emil Tsalapatis" <emil@etsalapatis•com>
To: "Tejun Heo" <tj@kernel•org>, "David Vernet" <void@manifault•com>,
	"Andrea Righi" <arighi@nvidia•com>,
	"Changwoo Min" <changwoo@igalia•com>,
	"Alexei Starovoitov" <ast@kernel•org>,
	"Andrii Nakryiko" <andrii@kernel•org>,
	"Daniel Borkmann" <daniel@iogearbox•net>,
	"Martin KaFai Lau" <martin.lau@linux•dev>,
	"Kumar Kartikeya Dwivedi" <memxor@gmail•com>
Cc: "Peter Zijlstra" <peterz@infradead•org>,
	"Catalin Marinas" <catalin.marinas@arm•com>,
	"Will Deacon" <will@kernel•org>,
	"Thomas Gleixner" <tglx@kernel•org>,
	"Ingo Molnar" <mingo@redhat•com>,
	"Borislav Petkov" <bp@alien8•de>,
	"Dave Hansen" <dave.hansen@linux•intel.com>,
	"Andrew Morton" <akpm@linux-foundation•org>,
	"David Hildenbrand" <david@kernel•org>,
	"Mike Rapoport" <rppt@kernel•org>,
	"Emil Tsalapatis" <emil@etsalapatis•com>,
	<sched-ext@lists•linux.dev>, <bpf@vger•kernel.org>,
	<x86@kernel•org>, <linux-arm-kernel@lists•infradead.org>,
	<linux-mm@kvack•org>, <linux-kernel@vger•kernel.org>
Subject: Re: [PATCH 6/8] sched_ext: Require an arena for cid-form schedulers
Date: Thu, 21 May 2026 00:15:26 -0400	[thread overview]
Message-ID: <DIO2ARBBUOJX.1OJ5RVION2UH1@etsalapatis.com> (raw)
In-Reply-To: <20260520235052.4180316-7-tj@kernel.org>

On Wed May 20, 2026 at 7:50 PM EDT, Tejun Heo wrote:
> Upcoming patches will let the kernel place arena-resident scratch shared
> with the BPF program (e.g. per-CPU set_cmask cmask) so the BPF side can
> dereference it directly via __arena pointers, replacing the current
> cmask_copy_from_kernel() probe-read loop. That requires each cid-form
> scheduler to expose its arena to the kernel. Kernel- side accesses are
> recovered by the per-arena scratch-page mechanism.
>
> bpf_scx_reg_cid() walks the struct_ops member progs via
> bpf_struct_ops_for_each_prog() and reads each prog's arena via
> bpf_prog_arena(). The verifier enforces one arena per program, so each
> member prog contributes at most one arena. All non-NULL contributions must
> match and at least one member prog must use an arena. The map ref is held on
> scx_sched and dropped on sched destroy. cpu-form schedulers (bpf_scx_reg)
> are unchanged - no arena requirement.
>
> Signed-off-by: Tejun Heo <tj@kernel•org>
> ---
>  kernel/sched/ext.c          | 56 ++++++++++++++++++++++++++++++++++++-
>  kernel/sched/ext_internal.h |  8 ++++++
>  2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 9c458552d14f..56f94ac32ba0 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -5003,6 +5003,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
>  
>  	rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
>  	free_exit_info(sch->exit_info);
> +	if (sch->arena_map)
> +		bpf_map_put(sch->arena_map);
>  	kfree(sch);
>  }
>  
> @@ -6746,6 +6748,7 @@ struct scx_enable_cmd {
>  		struct sched_ext_ops_cid	*ops_cid;
>  	};
>  	bool			is_cid_type;
> +	struct bpf_map		*arena_map;	/* arena ref to transfer to sch */
>  	int			ret;
>  };
>  
> @@ -6913,6 +6916,15 @@ static struct scx_sched *scx_alloc_and_add_sched(struct scx_enable_cmd *cmd,
>  		return ERR_PTR(ret);
>  	}
>  #endif	/* CONFIG_EXT_SUB_SCHED */
> +
> +	/*
> +	 * Consume the arena_map ref bpf_scx_reg_cid() took. Defer to here so
> +	 * earlier failure paths leave cmd->arena_map set and bpf_scx_reg_cid
> +	 * drops the ref. After this point, sch owns the ref and any cleanup
> +	 * runs through scx_sched_free_rcu_work() which puts it.
> +	 */
> +	sch->arena_map = cmd->arena_map;
> +	cmd->arena_map = NULL;
>  	return sch;
>  
>  #ifdef CONFIG_EXT_SUB_SCHED
> @@ -7898,11 +7910,53 @@ static int bpf_scx_reg(void *kdata, struct bpf_link *link)
>  	return scx_enable(&cmd, link);
>  }
>  
> +struct scx_arena_scan {
> +	struct bpf_map	*arena;
> +	int		err;

Can we skip the int err here...

> +};
> +
> +/*
> + * The verifier enforces one arena per BPF program, so each struct_ops
> + * member prog contributes at most one arena via bpf_prog_arena().
> + * Require all non-NULL contributions to match.
> + */
> +static int scx_arena_scan_prog(struct bpf_prog *prog, void *data)
> +{
> +	struct scx_arena_scan *s = data;
> +	struct bpf_map *arena = bpf_prog_arena(prog);
> +
> +	if (!arena)
> +		return 0;
> +	if (s->arena && s->arena != arena) {
> +		s->err = -EINVAL;

...and just directly return -EINVAL here? bpf_struct_ops_for_each_prog
breaks when we return non-zero so do we need the extra scx_arena_scan
struct?

> +		return 1;
> +	}
> +	s->arena = arena;
> +	return 0;
> +}
> +
>  static int bpf_scx_reg_cid(void *kdata, struct bpf_link *link)
>  {
>  	struct scx_enable_cmd cmd = { .ops_cid = kdata, .is_cid_type = true };
> +	struct scx_arena_scan scan = {};
> +	int ret;
>  
> -	return scx_enable(&cmd, link);
> +	bpf_struct_ops_for_each_prog(kdata, scx_arena_scan_prog, &scan);
> +	if (scan.err) {
> +		pr_err("sched_ext: cid-form scheduler uses multiple arena maps\n");
> +		return scan.err;
> +	}
> +	if (!scan.arena) {
> +		pr_err("sched_ext: cid-form scheduler must use a BPF arena map\n");
> +		return -EINVAL;
> +	}
> +
> +	bpf_map_inc(scan.arena);
> +	cmd.arena_map = scan.arena;
> +	ret = scx_enable(&cmd, link);
> +	if (cmd.arena_map)		/* not consumed by scx_alloc_and_add_sched() */
> +		bpf_map_put(cmd.arena_map);
> +	return ret;
>  }
>  
>  static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
> diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
> index 7258aea94b9f..d40cfd29ddaa 100644
> --- a/kernel/sched/ext_internal.h
> +++ b/kernel/sched/ext_internal.h
> @@ -1111,6 +1111,14 @@ struct scx_sched {
>  		struct sched_ext_ops_cid	ops_cid;
>  	};
>  	bool			is_cid_type;	/* true if registered via bpf_sched_ext_ops_cid */
> +
> +	/*
> +	 * Arena map auto-discovered from member progs at struct_ops attach.
> +	 * cid-form schedulers must use exactly one arena across all member
> +	 * progs. NULL on cpu-form.
> +	 */
> +	struct bpf_map		*arena_map;
> +
>  	DECLARE_BITMAP(has_op, SCX_OPI_END);
>  
>  	/*



  reply	other threads:[~2026-05-21  4:15 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-20 23:50 [PATCHSET v3 sched_ext/for-7.2] bpf/arena: Direct kernel-side access Tejun Heo
2026-05-20 23:50 ` [PATCH 1/8] mm: Add ptep_try_set() for lockless empty-slot installs Tejun Heo
2026-05-21  7:00   ` Andrea Righi
2026-05-21 17:37   ` [PATCH v3 " Tejun Heo
2026-05-20 23:50 ` [PATCH 2/8] bpf: Recover arena kernel faults with scratch page Tejun Heo
2026-05-21  3:16   ` Emil Tsalapatis
2026-05-21  9:42   ` Alexei Starovoitov
2026-05-21 17:39     ` Tejun Heo
2026-05-20 23:50 ` [PATCH 3/8] bpf: Add sleepable variant of bpf_arena_alloc_pages for kernel callers Tejun Heo
2026-05-21  3:17   ` Emil Tsalapatis
2026-05-20 23:50 ` [PATCH 4/8] bpf: Add bpf_struct_ops_for_each_prog() Tejun Heo
2026-05-21  4:07   ` Emil Tsalapatis
2026-05-20 23:50 ` [PATCH 5/8] bpf/arena: Add bpf_arena_map_kern_vm_start() and bpf_prog_arena() Tejun Heo
2026-05-21  4:08   ` Emil Tsalapatis
2026-05-20 23:50 ` [PATCH 6/8] sched_ext: Require an arena for cid-form schedulers Tejun Heo
2026-05-21  4:15   ` Emil Tsalapatis [this message]
2026-05-20 23:50 ` [PATCH 7/8] sched_ext: Sub-allocator over kernel-claimed BPF arena pages Tejun Heo
2026-05-21  7:56   ` Andrea Righi
2026-05-21 17:22     ` Tejun Heo
2026-05-21 17:37   ` [PATCH v2 " Tejun Heo
2026-05-21 17:54     ` Andrea Righi
2026-05-20 23:50 ` [PATCH 8/8] sched_ext: Convert ops.set_cmask() to arena-resident cmask Tejun Heo
2026-05-21  4:19   ` Emil Tsalapatis
2026-05-22  1:59 ` [PATCH v3 2/8] bpf: Recover arena kernel faults with scratch page Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2026-05-22 17:22 [PATCHSET v4 sched_ext/for-7.2] bpf/arena: Direct kernel-side access Tejun Heo
2026-05-22 17:22 ` [PATCH 6/8] sched_ext: Require an arena for cid-form schedulers Tejun Heo
2026-05-17 21:12 [PATCHSET v2 sched_ext/for-7.2] bpf/arena: Direct kernel-side access Tejun Heo
2026-05-17 21:12 ` [PATCH 6/8] sched_ext: Require an arena for cid-form schedulers Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DIO2ARBBUOJX.1OJ5RVION2UH1@etsalapatis.com \
    --to=emil@etsalapatis$(echo .)com \
    --cc=akpm@linux-foundation$(echo .)org \
    --cc=andrii@kernel$(echo .)org \
    --cc=arighi@nvidia$(echo .)com \
    --cc=ast@kernel$(echo .)org \
    --cc=bp@alien8$(echo .)de \
    --cc=bpf@vger$(echo .)kernel.org \
    --cc=catalin.marinas@arm$(echo .)com \
    --cc=changwoo@igalia$(echo .)com \
    --cc=daniel@iogearbox$(echo .)net \
    --cc=dave.hansen@linux$(echo .)intel.com \
    --cc=david@kernel$(echo .)org \
    --cc=linux-arm-kernel@lists$(echo .)infradead.org \
    --cc=linux-kernel@vger$(echo .)kernel.org \
    --cc=linux-mm@kvack$(echo .)org \
    --cc=martin.lau@linux$(echo .)dev \
    --cc=memxor@gmail$(echo .)com \
    --cc=mingo@redhat$(echo .)com \
    --cc=peterz@infradead$(echo .)org \
    --cc=rppt@kernel$(echo .)org \
    --cc=sched-ext@lists$(echo .)linux.dev \
    --cc=tglx@kernel$(echo .)org \
    --cc=tj@kernel$(echo .)org \
    --cc=void@manifault$(echo .)com \
    --cc=will@kernel$(echo .)org \
    --cc=x86@kernel$(echo .)org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox