public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: Rainer Weikusat <rweikusat@mobileactivedefense•com>
To: Jason Baron <jbaron@akamai•com>
Cc: Rainer Weikusat <rweikusat@mobileactivedefense•com>,
	davem@davemloft•net, netdev@vger•kernel.org,
	linux-kernel@vger•kernel.org, minipli@googlemail•com,
	normalperson@yhbt•net, eric.dumazet@gmail•com,
	viro@zeniv•linux.org.uk, davidel@xmailserver•org,
	dave@stgolabs•net, olivier@mauras•ch, pageexec@freemail•hu,
	torvalds@linux-foundation•org, peterz@infradead•org
Subject: Re: [RFC] unix: fix use-after-free in unix_dgram_poll()/ 4.2.5
Date: Fri, 30 Oct 2015 20:52:08 +0000	[thread overview]
Message-ID: <877fm43wqv.fsf_-_@doppelsaurus.mobileactivedefense.com> (raw)
In-Reply-To: <874mhbx7o1.fsf_-_@doppelsaurus.mobileactivedefense.com> (Rainer Weikusat's message of "Wed, 28 Oct 2015 16:46:38 +0000")

Same changes ported to 4.2.5 with some minor improvments (I hope),
namely,

	- applied a round of DeMorgan to the 'quick' check function in
          order to simplify the condition

	- fixed a (minor) error in the dgram_sendmsg change: In case the
          2nd check resulted in 'can send now', the code would continue
          with 'wait until timeout expired' (since timeo was 0 in the
          case, this shouldn't make much of a practical difference)

	- (hopefully) more intelligible function names and better
          explanation

	- dropped the POLL_OUT_ALL macro again as that's really
          unrelated

---
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(struct sock *sk)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,122 @@ found:
 	return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+				      void *key)
+{
+	struct unix_sock *u;
+	wait_queue_head_t *u_sleep;
+
+	u = container_of(q, struct unix_sock, peer_wake);
+
+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+			    &u->peer_wake);
+	u->peer_wake.private = NULL;
+
+	/* relaying can only happen while the wq still exists */
+	u_sleep = sk_sleep(&u->sk);
+	if (u_sleep)
+		wake_up_interruptible_poll(u_sleep, key);
+
+	return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (!u->peer_wake.private) {
+		u->peer_wake.private = other;
+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+static int unix_dgram_peer_wake_disconnect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (u->peer_wake.private == other) {
+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+		u->peer_wake.private = NULL;
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+/* Needs 'this' unix state lock. Lockless check if data can (likely)
+ * be sent.
+ */
+static inline int unix_dgram_peer_recv_ready(struct sock *sk,
+					     struct sock *other)
+{
+	return unix_peer(other) == sk || !unix_recvq_full(other);
+}
+
+/* Needs 'this' unix state lock. After recv_ready indicated not ready,
+ * establish peer_wait connection if still needed.
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+	int conned;
+
+	conned = unix_dgram_peer_wake_connect(sk, other);
+
+	if (unix_recvq_full(other))
+		return 1;
+
+	if (conned)
+		unix_dgram_peer_wake_disconnect(sk, other);
+
+	return 0;
+}
+
 static inline int unix_writable(struct sock *sk)
 {
 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
@@ -430,6 +546,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
 			skpair->sk_state_change(skpair);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 		}
+
+		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
 	}
@@ -664,6 +782,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -983,7 +1102,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 	struct sock *other;
 	unsigned int hash;
-	int err;
+	int err, disconned;
 
 	if (addr->sa_family != AF_UNSPEC) {
 		err = unix_mkname(sunaddr, alen, &hash);
@@ -1031,6 +1150,14 @@ restart:
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk) = other;
+
+		disconned = unix_dgram_peer_wake_disconnect(sk, other);
+		if (disconned)
+			wake_up_interruptible_poll(sk_sleep(sk),
+						   POLLOUT |
+						   POLLWRNORM |
+						   POLLWRBAND);
+
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
@@ -1463,7 +1590,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
 	struct sock *other = NULL;
 	int namelen = 0; /* fake GCC */
-	int err;
+	int err, disconned;
 	unsigned int hash;
 	struct sk_buff *skb;
 	long timeo;
@@ -1565,6 +1692,14 @@ restart:
 		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
+
+			disconned = unix_dgram_peer_wake_disconnect(sk, other);
+			if (disconned)
+				wake_up_interruptible_poll(sk_sleep(sk),
+							   POLLOUT |
+							   POLLWRNORM |
+							   POLLWRBAND);
+
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1590,10 +1725,14 @@ restart:
 			goto out_unlock;
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
+	if (!unix_dgram_peer_recv_ready(sk, other)) {
 		if (!timeo) {
-			err = -EAGAIN;
-			goto out_unlock;
+			if (unix_dgram_peer_wake_me(sk, other)) {
+				err = -EAGAIN;
+				goto out_unlock;
+			}
+
+			goto restart;
 		}
 
 		timeo = unix_wait_for_peer(other, timeo);
@@ -2453,14 +2592,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 		return mask;
 
 	writable = unix_writable(sk);
-	other = unix_peer_get(sk);
-	if (other) {
-		if (unix_peer(other) != sk) {
-			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-			if (unix_recvq_full(other))
-				writable = 0;
-		}
-		sock_put(other);
+	if (writable) {
+		unix_state_lock(sk);
+
+		other = unix_peer(sk);
+		if (other &&
+		    !unix_dgram_peer_recv_ready(sk, other) &&
+		    unix_dgram_peer_wake_me(sk, other))
+			writable = 0;
+
+		unix_state_unlock(sk);
 	}
 
 	if (writable)

  parent reply	other threads:[~2015-10-30 20:52 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-02 20:43 [PATCH v2 0/3] af_unix: fix use-after-free Jason Baron
2015-10-02 20:43 ` [PATCH v2 1/3] unix: fix use-after-free in unix_dgram_poll() Jason Baron
2015-10-03  5:46   ` Mathias Krause
2015-10-03 17:02     ` Rainer Weikusat
2015-10-04 17:41       ` Rainer Weikusat
2015-10-05 16:31   ` Rainer Weikusat
2015-10-05 16:54     ` Eric Dumazet
2015-10-05 17:20       ` Rainer Weikusat
2015-10-05 17:55     ` Jason Baron
2015-10-12 20:41       ` Rainer Weikusat
2015-10-14  3:44         ` Jason Baron
2015-10-14 17:47           ` Rainer Weikusat
2015-10-15  2:54             ` Jason Baron
2015-10-18 20:58               ` Rainer Weikusat
2015-10-19 15:07                 ` Jason Baron
2015-10-20 22:29                   ` Rainer Weikusat
2015-10-21 17:34                     ` Rainer Weikusat
2015-10-28 16:46                     ` [RFC] " Rainer Weikusat
2015-10-28 17:57                       ` Jason Baron
2015-10-29 14:23                         ` Rainer Weikusat
2015-10-30 20:52                       ` Rainer Weikusat [this message]
     [not found]                         ` <57d2f5b6aae251957bff7a1a52b8bf2c@core-hosting.net>
2015-11-02 21:55                           ` [RFC] unix: fix use-after-free in unix_dgram_poll()/ 4.2.5 Rainer Weikusat
2015-10-02 20:43 ` [PATCH v2 2/3] af_unix: Convert gc_flags to flags Jason Baron
2015-10-02 20:44 ` [PATCH v2 3/3] af_unix: optimize the unix_dgram_recvmsg() Jason Baron
2015-10-05  7:41   ` Peter Zijlstra
2015-10-05 17:13     ` Jason Baron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877fm43wqv.fsf_-_@doppelsaurus.mobileactivedefense.com \
    --to=rweikusat@mobileactivedefense$(echo .)com \
    --cc=dave@stgolabs$(echo .)net \
    --cc=davem@davemloft$(echo .)net \
    --cc=davidel@xmailserver$(echo .)org \
    --cc=eric.dumazet@gmail$(echo .)com \
    --cc=jbaron@akamai$(echo .)com \
    --cc=linux-kernel@vger$(echo .)kernel.org \
    --cc=minipli@googlemail$(echo .)com \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=normalperson@yhbt$(echo .)net \
    --cc=olivier@mauras$(echo .)ch \
    --cc=pageexec@freemail$(echo .)hu \
    --cc=peterz@infradead$(echo .)org \
    --cc=torvalds@linux-foundation$(echo .)org \
    --cc=viro@zeniv$(echo .)linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox