public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail•com>
To: Neil Horman <nhorman@tuxdriver•com>
Cc: netdev@vger•kernel.org, davem@davemloft•net, socketcan@hartkopp•net
Subject: Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
Date: Thu, 08 Oct 2009 03:05:12 +0200	[thread overview]
Message-ID: <4ACD3AC8.608@gmail.com> (raw)
In-Reply-To: <20091007180835.GB20524@hmsreliant.think-freely.org>

Neil Horman a écrit :
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7626b6a..8bd366f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  	skb_len = skb->len;
>  


>  	skb_queue_tail(&sk->sk_receive_queue, skb);
> +	skb->dropcount = atomic_read(&sk->sk_drops);

No, skb was given to skb_queue_tail(), you are not allowed to touch it now,
another cpu might already consume it.

You better do :

struct sk_buff_head *list = &sk->sk_receive_queue;

spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops); // should be done under lock protection
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);



>  
>  	if (!sock_flag(sk, SOCK_DEAD))
>  		sk->sk_data_ready(sk, skb_len);
> @@ -702,6 +703,12 @@ set_rcvbuf:
>  
>  		/* We implement the SO_SNDLOWAT etc to
>  		   not be settable (1003.1g 5.3) */
> +	case SO_RXQ_OVFL:
> +		if (valbool)
> +			set_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		else
> +			clear_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
>  	default:
>  		ret = -ENOPROTOOPT;
>  		break;
> @@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
>  		v.val = sk->sk_mark;
>  		break;
>  
> +	case SO_RXQ_OVFL:
> +		v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
> +
>  	default:
>  		return -ENOPROTOOPT;
>  	}
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d7ecca0..920ae1e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  	if (pskb_trim(skb, snaplen))
>  		goto drop_n_acct;
>  

> +	skb->dropcount = atomic_read(&sk->sk_drops);
This should be done a litle bit after, right before "__skb_queue_tail(&sk->sk_receive_queue, skb); "

>  	skb_set_owner_r(skb, sk);
>  	skb->dev = NULL;
>  	skb_dst_drop(skb);
> @@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  drop_n_acct:


>  	spin_lock(&sk->sk_receive_queue.lock);
>  	po->stats.tp_drops++;
> +	atomic_inc(&sk->sk_drops);
>  	spin_unlock(&sk->sk_receive_queue.lock);

You could replace this block of four lines by : po->stat.tp_drop = atomic_inc_return(&sk->sk_drops);

>  
>  drop_n_restore:
> diff --git a/net/socket.c b/net/socket.c
> index 7565536..ad157a3 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  {
>  	int err;
>  	struct sock_iocb *si = kiocb_to_siocb(iocb);
> +	struct sk_buff *skb;
> +	int rc;
> +	struct sock *sk = sock->sk;
> +	unsigned long cpu_flags;
> +	__u32 gap = 0;

> +	int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
>  
>  	si->sock = sock;
>  	si->scm = NULL;
> @@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  	if (err)
>  		return err;
>  
> -	return sock->ops->recvmsg(iocb, sock, msg, size, flags);




> +	if (check_drops) {
> +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> +				flags & MSG_DONTWAIT, &err);

	Ouch, this is too expensive, please find another way :)

> +		if (skb) {
> +			gap = skb->dropcount;
> +			consume_skb(skb);
> +		}
> +	}
> +
> +	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
> +
> +	if (check_drops && (rc > 0))

		&& gap != 0

> +		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
> +


  reply	other threads:[~2009-10-08  1:06 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
2009-10-08  1:05 ` Eric Dumazet [this message]
2009-10-08 13:54   ` Neil Horman
2009-10-08 14:45     ` Eric Dumazet
2009-10-08 17:20       ` Neil Horman
2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
2009-10-09 21:31   ` Eric Dumazet
2009-10-09 23:21     ` Neil Horman
2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
2009-10-10  4:59   ` Eric Dumazet
2009-10-10  5:12   ` Eric Dumazet
2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
2009-10-12  4:38   ` Eric Dumazet
2009-10-12  5:48     ` Oliver Hartkopp
2009-10-12 10:01     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4ACD3AC8.608@gmail.com \
    --to=eric.dumazet@gmail$(echo .)com \
    --cc=davem@davemloft$(echo .)net \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=nhorman@tuxdriver$(echo .)com \
    --cc=socketcan@hartkopp$(echo .)net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox