public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel•org>
To: davem@davemloft•net
Cc: netdev@vger•kernel.org, edumazet@google•com, pabeni@redhat•com,
	andrew+netdev@lunn•ch, horms@kernel•org, sdf@fomichev•me,
	hramamurthy@google•com, kuniyu@amazon•com, jdamato@fastly•com,
	Jakub Kicinski <kuba@kernel•org>
Subject: [PATCH net-next v2 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch
Date: Tue,  8 Apr 2025 12:59:48 -0700	[thread overview]
Message-ID: <20250408195956.412733-2-kuba@kernel.org> (raw)
In-Reply-To: <20250408195956.412733-1-kuba@kernel.org>

netdev_get_by_index_lock() performs following steps:

  rcu_lock();
  dev = lookup(netns, ifindex);
  dev_get(dev);
  rcu_unlock();
  [... lock & validate the dev ...]
  return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: Joe Damato <jdamato@fastly•com>
Acked-by: Stanislav Fomichev <sdf@fomichev•me>
Signed-off-by: Jakub Kicinski <kuba@kernel•org>
---
 include/linux/netdevice.h |  6 +++++-
 net/core/dev.h            |  2 +-
 net/core/dev.c            | 25 ++++++++++++++++++-------
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
+ *				protected by @lock
  *
  * 	@ml_priv:	Mid-layer private
  *	@ml_priv_type:  Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
 
 	bool dismantle;
 
+	/** @moving_ns: device is changing netns, protected by @lock */
+	bool moving_ns;
+
 	enum {
 		RTNL_LINK_INITIALIZED,
 		RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up
+	 *	@up, @moving_ns, @nd_net
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
diff --git a/net/core/dev.h b/net/core/dev.h
index 710abc05ebdb..e7bf21f52fc7 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -30,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 
 struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
-struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
 struct net_device *
 netdev_xa_find_lock(struct net *net, struct net_device *dev,
 		    unsigned long *index);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ccc6dc5303e..8c13e5339cc9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
 	dev_hold(dev);
 	rcu_read_unlock();
 
-	dev = __netdev_put_lock(dev);
+	dev = __netdev_put_lock(dev, net);
 	if (!dev)
 		return NULL;
 
@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
  * This helper is intended for locking net_device after it has been looked up
  * using a lockless lookup helper. Lock prevents the instance from going away.
  */
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
 {
 	netdev_lock(dev);
-	if (dev->reg_state > NETREG_REGISTERED) {
+	if (dev->reg_state > NETREG_REGISTERED ||
+	    dev->moving_ns || !net_eq(dev_net(dev), net)) {
 		netdev_unlock(dev);
 		dev_put(dev);
 		return NULL;
@@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
 	if (!dev)
 		return NULL;
 
-	return __netdev_put_lock(dev);
+	return __netdev_put_lock(dev, net);
 }
 
 struct net_device *
@@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
 		dev_hold(dev);
 		rcu_read_unlock();
 
-		dev = __netdev_put_lock(dev);
+		dev = __netdev_put_lock(dev, net);
 		if (dev)
 			return dev;
 
@@ -12157,7 +12158,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	netif_close(dev);
 	/* And unlink it from device chain */
 	unlist_netdevice(dev);
-	netdev_unlock_ops(dev);
+
+	if (!netdev_need_ops_lock(dev))
+		netdev_lock(dev);
+	dev->moving_ns = true;
+	netdev_unlock(dev);
 
 	synchronize_net();
 
@@ -12193,7 +12198,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	move_netdevice_notifiers_dev_net(dev, net);
 
 	/* Actually switch the network namespace */
+	netdev_lock(dev);
 	dev_net_set(dev, net);
+	netdev_unlock(dev);
 	dev->ifindex = new_ifindex;
 
 	if (new_name[0]) {
@@ -12219,7 +12226,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	err = netdev_change_owner(dev, net_old, net);
 	WARN_ON(err);
 
-	netdev_lock_ops(dev);
+	netdev_lock(dev);
+	dev->moving_ns = false;
+	if (!netdev_need_ops_lock(dev))
+		netdev_unlock(dev);
+
 	/* Add the device back in the hashes */
 	list_netdevice(dev);
 	/* Notify protocols, that a new device appeared. */
-- 
2.49.0


  reply	other threads:[~2025-04-08 20:00 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-08 19:59 [PATCH net-next v2 0/8] net: depend on instance lock for queue related netlink ops Jakub Kicinski
2025-04-08 19:59 ` Jakub Kicinski [this message]
2025-04-08 19:59 ` [PATCH net-next v2 2/8] net: designate XSK pool pointers in queues as "ops protected" Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 3/8] netdev: add "ops compat locking" helpers Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 4/8] netdev: don't hold rtnl_lock over nl queue info get when possible Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 5/8] xdp: double protect netdev->xdp_flags with netdev->lock Jakub Kicinski
2025-04-08 22:15   ` Harshitha Ramamurthy
2025-04-09 18:40   ` Martin KaFai Lau
2025-04-08 19:59 ` [PATCH net-next v2 6/8] netdev: depend on netdev->lock for xdp features Jakub Kicinski
2025-04-10 17:10   ` Kuniyuki Iwashima
2025-04-11  2:10     ` Jakub Kicinski
2025-04-11  2:23       ` Jakub Kicinski
2025-04-11 17:41         ` Stanislav Fomichev
2025-04-11 19:19           ` Jakub Kicinski
2025-04-11  2:36       ` Kuniyuki Iwashima
2025-04-08 19:59 ` [PATCH net-next v2 7/8] docs: netdev: break down the instance locking info per ops struct Jakub Kicinski
2025-04-09  2:59   ` Joe Damato
2025-04-10  6:01   ` Jacob Keller
2025-04-10 16:08     ` Jakub Kicinski
2025-04-10 22:35       ` Keller, Jacob E
2025-04-10 23:39         ` Jakub Kicinski
2025-04-11 21:26           ` Jacob Keller
2025-04-08 19:59 ` [PATCH net-next v2 8/8] netdev: depend on netdev->lock for qstats in ops locked drivers Jakub Kicinski
2025-04-10  5:23   ` Jacob Keller
2025-04-10 23:46     ` Jakub Kicinski
2025-04-11 21:29       ` Jacob Keller
2025-04-10  0:40 ` [PATCH net-next v2 0/8] net: depend on instance lock for queue related netlink ops patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250408195956.412733-2-kuba@kernel.org \
    --to=kuba@kernel$(echo .)org \
    --cc=andrew+netdev@lunn$(echo .)ch \
    --cc=davem@davemloft$(echo .)net \
    --cc=edumazet@google$(echo .)com \
    --cc=horms@kernel$(echo .)org \
    --cc=hramamurthy@google$(echo .)com \
    --cc=jdamato@fastly$(echo .)com \
    --cc=kuniyu@amazon$(echo .)com \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=pabeni@redhat$(echo .)com \
    --cc=sdf@fomichev$(echo .)me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox