From: Kuniyuki Iwashima <kuniyu@amazon•com>
To: "David S. Miller" <davem@davemloft•net>,
Eric Dumazet <edumazet@google•com>,
Jakub Kicinski <kuba@kernel•org>, Paolo Abeni <pabeni@redhat•com>,
Simon Horman <horms@kernel•org>
Cc: Andrew Lunn <andrew+netdev@lunn•ch>,
Marc Kleine-Budde <mkl@pengutronix•de>,
Vincent Mailhol <mailhol.vincent@wanadoo•fr>,
"Daniel Borkmann" <daniel@iogearbox•net>,
Nikolay Aleksandrov <razor@blackwall•org>,
Kuniyuki Iwashima <kuniyu@amazon•com>,
Kuniyuki Iwashima <kuni1840@gmail•com>, <netdev@vger•kernel.org>
Subject: [PATCH v2 net-next 1/7] rtnetlink: Introduce struct rtnl_nets and helpers.
Date: Tue, 5 Nov 2024 18:24:26 -0800 [thread overview]
Message-ID: <20241106022432.13065-2-kuniyu@amazon.com> (raw)
In-Reply-To: <20241106022432.13065-1-kuniyu@amazon.com>
rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
and 1 for its peer.
We will add rtnl_nets_lock() later, which performs the nested locking
based on struct rtnl_nets, which has an array of struct net pointers.
rtnl_nets_add() adds a net pointer to the array and sorts it so that
rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].
Before calling rtnl_nets_add(), get_net() must be called for the net,
and rtnl_nets_destroy() will call put_net() for each.
Let's apply the helpers to rtnl_newlink().
When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
rtnl_net_lock() thus do not care about the array order, so
rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
can be optimised to NOP.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon•com>
Reviewed-by: Eric Dumazet <edumazet@google•com>
---
v2:
* Move struct rtnl_nets to net/core/rtnetlink.c
* Unexport rtnl_nets_add()
---
net/core/rtnetlink.c | 70 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 67 insertions(+), 3 deletions(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3b33810d92a8..81f4722c1353 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -258,8 +258,67 @@ bool lockdep_rtnl_net_is_held(struct net *net)
return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
}
EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ /* No need to swap */
+ return -1;
+}
#endif
+struct rtnl_nets {
+ /* ->newlink() needs to freeze 3 netns at most;
+ * 2 for the new device, 1 for its peer.
+ */
+ struct net *net[3];
+ unsigned char len;
+};
+
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+ memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ put_net(rtnl_nets->net[i]);
+ rtnl_nets->net[i] = NULL;
+ }
+
+ rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+ case 0:
+ put_net(net);
+ return;
+ case 1:
+ swap(rtnl_nets->net[i], net);
+ }
+ }
+
+ rtnl_nets->net[i] = net;
+ rtnl_nets->len++;
+}
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -3796,6 +3855,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *tgt_net, *link_net = NULL;
struct rtnl_link_ops *ops = NULL;
struct rtnl_newlink_tbs *tbs;
+ struct rtnl_nets rtnl_nets;
int ops_srcu_index;
int ret;
@@ -3839,6 +3899,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
#endif
}
+ rtnl_nets_init(&rtnl_nets);
+
if (ops) {
if (ops->maxtype > RTNL_MAX_TYPE) {
ret = -EINVAL;
@@ -3868,6 +3930,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_ops;
}
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
@@ -3878,6 +3942,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_net;
}
+ rtnl_nets_add(&rtnl_nets, link_net);
+
if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
goto put_net;
@@ -3887,9 +3953,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
put_net:
- if (link_net)
- put_net(link_net);
- put_net(tgt_net);
+ rtnl_nets_destroy(&rtnl_nets);
put_ops:
if (ops)
rtnl_link_ops_put(ops, ops_srcu_index);
--
2.39.5 (Apple Git-154)
next prev parent reply other threads:[~2024-11-06 2:25 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-06 2:24 [PATCH v2 net-next 0/7] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
2024-11-06 2:24 ` Kuniyuki Iwashima [this message]
2024-11-06 10:36 ` [PATCH v2 net-next 1/7] rtnetlink: Introduce struct rtnl_nets and helpers Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 2/7] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
2024-11-06 10:37 ` Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 3/7] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
2024-11-06 10:38 ` Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 4/7] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
2024-11-06 10:38 ` Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 5/7] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
2024-11-06 10:39 ` Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 6/7] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
2024-11-06 9:00 ` Paolo Abeni
2024-11-06 16:32 ` Kuniyuki Iwashima
2024-11-06 10:40 ` Nikolay Aleksandrov
2024-11-06 2:24 ` [PATCH v2 net-next 7/7] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP Kuniyuki Iwashima
2024-11-06 10:41 ` Nikolay Aleksandrov
2024-11-06 14:25 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241106022432.13065-2-kuniyu@amazon.com \
--to=kuniyu@amazon$(echo .)com \
--cc=andrew+netdev@lunn$(echo .)ch \
--cc=daniel@iogearbox$(echo .)net \
--cc=davem@davemloft$(echo .)net \
--cc=edumazet@google$(echo .)com \
--cc=horms@kernel$(echo .)org \
--cc=kuba@kernel$(echo .)org \
--cc=kuni1840@gmail$(echo .)com \
--cc=mailhol.vincent@wanadoo$(echo .)fr \
--cc=mkl@pengutronix$(echo .)de \
--cc=netdev@vger$(echo .)kernel.org \
--cc=pabeni@redhat$(echo .)com \
--cc=razor@blackwall$(echo .)org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox