From: Daniel Borkmann <daniel@iogearbox•net>
To: Tycho Andersen <tycho.andersen@canonical•com>,
Kees Cook <keescook@chromium•org>,
Alexei Starovoitov <ast@kernel•org>
Cc: "David S. Miller" <davem@davemloft•net>,
Will Drewry <wad@chromium•org>, Oleg Nesterov <oleg@redhat•com>,
Andy Lutomirski <luto@amacapital•net>,
Pavel Emelyanov <xemul@parallels•com>,
"Serge E. Hallyn" <serge.hallyn@ubuntu•com>,
linux-kernel@vger•kernel.org, netdev@vger•kernel.org,
linux-api@vger•kernel.org
Subject: Re: [PATCH v2 5/5] seccomp: add a way to attach a filter via eBPF fd
Date: Fri, 11 Sep 2015 14:37:59 +0200 [thread overview]
Message-ID: <55F2CB27.7030804@iogearbox.net> (raw)
In-Reply-To: <1441930862-14347-6-git-send-email-tycho.andersen@canonical.com>
On 09/11/2015 02:21 AM, Tycho Andersen wrote:
> This is the final bit needed to support seccomp filters created via the bpf
> syscall. The patch adds a new seccomp operation SECCOMP_MODE_FILTER_EBPF,
> which takes exactly one command (presumably to be expanded upon later when
> seccomp EBPFs support more interesting things) and an argument struct
> similar to that of bpf(), although the size is explicit in the struct to
> avoid changing the signature of seccomp().
>
> v2: Don't abuse seccomp's third argument; use a separate command and a
> pointer to a structure instead.
Comments below ...
> Signed-off-by: Tycho Andersen <tycho.andersen@canonical•com>
> CC: Kees Cook <keescook@chromium•org>
> CC: Will Drewry <wad@chromium•org>
> CC: Oleg Nesterov <oleg@redhat•com>
> CC: Andy Lutomirski <luto@amacapital•net>
> CC: Pavel Emelyanov <xemul@parallels•com>
> CC: Serge E. Hallyn <serge.hallyn@ubuntu•com>
> CC: Alexei Starovoitov <ast@kernel•org>
> CC: Daniel Borkmann <daniel@iogearbox•net>
> ---
> include/uapi/linux/seccomp.h | 16 +++++
> kernel/seccomp.c | 135 ++++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 138 insertions(+), 13 deletions(-)
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index 0f238a4..a8694e2 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -13,10 +13,14 @@
> /* Valid operations for seccomp syscall. */
> #define SECCOMP_SET_MODE_STRICT 0
> #define SECCOMP_SET_MODE_FILTER 1
> +#define SECCOMP_MODE_FILTER_EBPF 2
Should this be SECCOMP_SET_MODE_FILTER_EBPF or just SECCOMP_SET_MODE_EBPF?
> /* Valid flags for SECCOMP_SET_MODE_FILTER */
> #define SECCOMP_FILTER_FLAG_TSYNC 1
>
> +/* Valid cmds for SECCOMP_MODE_FILTER_EBPF */
> +#define SECCOMP_EBPF_ADD_FD 0
> +
> /*
> * All BPF programs must return a 32-bit value.
> * The bottom 16-bits are for optional return data.
> @@ -51,4 +55,16 @@ struct seccomp_data {
> __u64 args[6];
> };
>
> +struct seccomp_ebpf {
> + unsigned int size;
> +
> + union {
> + /* SECCOMP_EBPF_ADD_FD */
> + struct {
> + unsigned int add_flags;
> + __u32 add_fd;
> + };
> + };
> +};
> +
> #endif /* _UAPI_LINUX_SECCOMP_H */
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 1856f69..e78175a 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -65,6 +65,9 @@ struct seccomp_filter {
> /* Limit any path through the tree to 256KB worth of instructions. */
> #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
>
> +static long seccomp_install_filter(unsigned int flags,
> + struct seccomp_filter *prepared);
> +
> /*
> * Endianness is explicitly ignored and left for BPF program authors to manage
> * as per the specific architecture.
> @@ -356,17 +359,6 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
>
> BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
>
> - /*
> - * Installing a seccomp filter requires that the task has
> - * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> - * This avoids scenarios where unprivileged tasks can affect the
> - * behavior of privileged children.
> - */
> - if (!task_no_new_privs(current) &&
> - security_capable_noaudit(current_cred(), current_user_ns(),
> - CAP_SYS_ADMIN) != 0)
> - return ERR_PTR(-EACCES);
> -
> /* Allocate a new seccomp_filter */
> sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
> if (!sfilter)
> @@ -510,8 +502,105 @@ static void seccomp_send_sigsys(int syscall, int reason)
> info.si_syscall = syscall;
> force_sig_info(SIGSYS, &info, current);
> }
> +
> #endif /* CONFIG_SECCOMP_FILTER */
>
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SECCOMP_FILTER)
> +static struct seccomp_filter *seccomp_prepare_ebpf(int fd)
> +{
> + struct seccomp_filter *ret;
> + struct bpf_prog *prog;
> +
> + prog = bpf_prog_get(fd);
> + if (IS_ERR(prog))
> + return (struct seccomp_filter *) prog;
ERR_CAST()
> +
> + if (prog->type != BPF_PROG_TYPE_SECCOMP) {
> + bpf_prog_put(prog);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + ret = kzalloc(sizeof(*ret), GFP_KERNEL | __GFP_NOWARN);
> + if (!ret) {
> + bpf_prog_put(prog);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + ret->prog = prog;
> + atomic_set(&ret->usage, 1);
> +
> + /* Intentionally don't bpf_prog_put() here, because the underlying prog
> + * is refcounted too and we're holding a reference from the struct
> + * seccomp_filter object.
> + */
> + return ret;
> +}
> +
> +static long seccomp_ebpf_add_fd(struct seccomp_ebpf *ebpf)
> +{
> + struct seccomp_filter *prepared;
> +
> + prepared = seccomp_prepare_ebpf(ebpf->add_fd);
> + if (IS_ERR(prepared))
> + return PTR_ERR(prepared);
> +
> + return seccomp_install_filter(ebpf->add_flags, prepared);
> +}
> +
> +static long seccomp_mode_filter_ebpf(unsigned int cmd, const char __user *uargs)
> +{
> + const struct seccomp_ebpf __user *uebpf;
> + struct seccomp_ebpf ebpf;
> + unsigned int size;
> + long ret = -EFAULT;
> +
> + uebpf = (const struct seccomp_ebpf __user *) uargs;
> +
> + if (get_user(size, &uebpf->size) != 0)
> + return -EFAULT;
> +
> + /* If we're handed a bigger struct than we know of,
> + * ensure all the unknown bits are 0 - i.e. new
> + * user-space does not rely on any kernel feature
> + * extensions we dont know about yet.
> + */
> + if (size > sizeof(ebpf)) {
> + unsigned char __user *addr;
> + unsigned char __user *end;
> + unsigned char val;
> +
> + addr = (void __user *)uebpf + sizeof(ebpf);
> + end = (void __user *)uebpf + size;
> +
> + for (; addr < end; addr++) {
> + int err = get_user(val, addr);
> +
> + if (err)
> + return err;
> + if (val)
> + return -E2BIG;
> + }
> + size = sizeof(ebpf);
> + }
> +
> + if (copy_from_user(&ebpf, uebpf, size) != 0)
> + return -EFAULT;
Not sure it's worth adding all this bpf(2)-alike interface complexity into
this, but fair enough, I guess there are some very good reasons and bigger
additions coming then ...
> + switch (cmd) {
> + case SECCOMP_EBPF_ADD_FD:
> + ret = seccomp_ebpf_add_fd(&ebpf);
> + break;
> + }
> +
> + return ret;
> +}
> +#else
> +static long seccomp_mode_filter_ebpf(unsigned int cmd, const char __user *uargs)
> +{
> + return -EINVAL;
> +}
> +#endif
> +
> /*
> * Secure computing mode 1 allows only read/write/exit/sigreturn.
> * To be fully secure this must be combined with rlimit
> @@ -760,9 +849,7 @@ out:
> static long seccomp_set_mode_filter(unsigned int flags,
> const char __user *filter)
> {
> - const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
> struct seccomp_filter *prepared = NULL;
> - long ret = -EINVAL;
>
> /* Validate flags. */
> if (flags & ~SECCOMP_FILTER_FLAG_MASK)
> @@ -773,6 +860,26 @@ static long seccomp_set_mode_filter(unsigned int flags,
> if (IS_ERR(prepared))
> return PTR_ERR(prepared);
>
> + return seccomp_install_filter(flags, prepared);
I (truly) hope, I'm overseeing something ;) ...
... but why doing all the (classic) seccomp-BPF preparation work (which is rather
a lot) up to this point, where you have it ready, only to *then* find out we don't
have the actual permissions ?!
Plus, when seccomp_install_filter() fails with -EACCES, who is releasing all the
allocated foo resp. dropping taken program refs !?
I see the same in seccomp_ebpf_add_fd().
So, an unprivileged child could increase the parent's bpf_prog's reference count
w/o having the actual permissions to do so, and thus controlling it to the point
where the next bpf_prog_put() would unintentionally release it?
(So yeah, I'm hoping I misread something ... ;))
> +}
> +
> +static long seccomp_install_filter(unsigned int flags,
> + struct seccomp_filter *prepared)
> +{
> + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
> + long ret = -EINVAL;
> +
> + /*
> + * Installing a seccomp filter requires that the task has
> + * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> + * This avoids scenarios where unprivileged tasks can affect the
> + * behavior of privileged children.
> + */
> + if (!task_no_new_privs(current) &&
> + security_capable_noaudit(current_cred(), current_user_ns(),
> + CAP_SYS_ADMIN) != 0)
> + return -EACCES;
> +
> /*
> * Make sure we cannot change seccomp or nnp state via TSYNC
> * while another thread is in the middle of calling exec.
> @@ -875,6 +982,8 @@ static long do_seccomp(unsigned int op, unsigned int flags,
> return seccomp_set_mode_strict();
> case SECCOMP_SET_MODE_FILTER:
> return seccomp_set_mode_filter(flags, uargs);
> + case SECCOMP_MODE_FILTER_EBPF:
> + return seccomp_mode_filter_ebpf(flags, uargs);
> default:
> return -EINVAL;
> }
>
next prev parent reply other threads:[~2015-09-11 12:37 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-11 0:20 v2 of seccomp filter c/r patches Tycho Andersen
2015-09-11 0:20 ` [PATCH v2 2/5] seccomp: make underlying bpf ref counted as well Tycho Andersen
[not found] ` <1441930862-14347-3-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 13:02 ` Daniel Borkmann
[not found] ` <55F2D0EC.9090004-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:44 ` Tycho Andersen
2015-09-11 16:03 ` Daniel Borkmann
[not found] ` <55F2FB6F.7050708-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 17:33 ` Tycho Andersen
2015-09-11 18:28 ` Daniel Borkmann
2015-09-14 16:00 ` Tycho Andersen
2015-09-14 16:48 ` Daniel Borkmann
[not found] ` <55F6FA6B.1060108-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-14 17:30 ` Tycho Andersen
2015-09-11 0:21 ` [PATCH v2 4/5] seccomp: add a way to access filters via bpf fds Tycho Andersen
2015-09-11 11:47 ` Daniel Borkmann
[not found] ` <55F2BF5A.8010006-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:29 ` Tycho Andersen
[not found] ` <1441930862-14347-5-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 12:08 ` Michael Kerrisk (man-pages)
[not found] ` <CAKgNAki99ZFgLPE5mWWjj1nvdNyke1w0ttqmiG+Uk0rVfqutZw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 14:31 ` Tycho Andersen
2015-09-11 16:20 ` Andy Lutomirski
2015-09-11 16:44 ` Tycho Andersen
2015-09-14 17:52 ` Andy Lutomirski
[not found] ` <1441930862-14347-1-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 0:20 ` [PATCH v2 1/5] ebpf: add a seccomp program type Tycho Andersen
2015-09-11 12:09 ` Michael Kerrisk (man-pages)
2015-09-11 0:21 ` [PATCH v2 3/5] ebpf: add a way to dump an eBPF program Tycho Andersen
[not found] ` <1441930862-14347-4-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 2:29 ` Alexei Starovoitov
[not found] ` <20150911022940.GA4903-2RGepAHry06MXrjNfwE7T/6muRTtt8+awzqs5ZKRSiY@public.gmane.org>
2015-09-11 14:59 ` Tycho Andersen
2015-09-11 13:39 ` Daniel Borkmann
2015-09-11 14:44 ` Tycho Andersen
2015-09-11 12:11 ` Michael Kerrisk (man-pages)
2015-09-11 0:21 ` [PATCH v2 5/5] seccomp: add a way to attach a filter via eBPF fd Tycho Andersen
2015-09-11 12:10 ` Michael Kerrisk (man-pages)
2015-09-11 12:37 ` Daniel Borkmann [this message]
[not found] ` <55F2CB27.7030804-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:40 ` Tycho Andersen
2015-09-11 2:50 ` v2 of seccomp filter c/r patches Alexei Starovoitov
2015-09-11 16:30 ` Andy Lutomirski
[not found] ` <CALCETrVYtv1=g-xPjQ-LiX+5GK3xtB6a2hYbat0TuU-Bd4QA6Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 17:00 ` Andy Lutomirski
[not found] ` <CALCETrWxLMSgdsdT9gTL80LSovONmCcTYjzqrHqF-WdJ4BN1Uw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 17:28 ` Tycho Andersen
2015-09-14 17:52 ` Andy Lutomirski
2015-09-15 16:07 ` Tycho Andersen
2015-09-15 18:13 ` Andy Lutomirski
[not found] ` <CALCETrVxhNvmEdMq0XRy1YZ+oJLDwcmE1y6prs7FGGhsS-Y5gg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-15 18:26 ` Tycho Andersen
2015-09-15 20:01 ` Andy Lutomirski
2015-09-15 21:38 ` Tycho Andersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55F2CB27.7030804@iogearbox.net \
--to=daniel@iogearbox$(echo .)net \
--cc=ast@kernel$(echo .)org \
--cc=davem@davemloft$(echo .)net \
--cc=keescook@chromium$(echo .)org \
--cc=linux-api@vger$(echo .)kernel.org \
--cc=linux-kernel@vger$(echo .)kernel.org \
--cc=luto@amacapital$(echo .)net \
--cc=netdev@vger$(echo .)kernel.org \
--cc=oleg@redhat$(echo .)com \
--cc=serge.hallyn@ubuntu$(echo .)com \
--cc=tycho.andersen@canonical$(echo .)com \
--cc=wad@chromium$(echo .)org \
--cc=xemul@parallels$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox