public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay•com>
To: Stephen Hemminger <shemminger@vyatta•com>
Cc: David Miller <davem@davemloft•net>, netdev@vger•kernel.org
Subject: Re: [PATCH 4/6] netfilter: abstract xt_counters
Date: Sun, 01 Feb 2009 13:25:26 +0100	[thread overview]
Message-ID: <498594B6.6000905@cosmosbay.com> (raw)
In-Reply-To: <20090130215729.416851870@vyatta.com>

Stephen Hemminger a écrit :
> Break out the parts of the x_tables code that manipulates counters so
> changes to locking are easier.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta•com>
> 
> 
> ---
>  include/linux/netfilter/x_tables.h |    6 +++++-
>  net/ipv4/netfilter/arp_tables.c    |    9 +++++----
>  net/ipv4/netfilter/ip_tables.c     |    9 +++++----
>  net/ipv6/netfilter/ip6_tables.c    |   21 +++++++++++----------
>  net/netfilter/x_tables.c           |   15 +++++++++++++++
>  5 files changed, 41 insertions(+), 19 deletions(-)
> 
> --- a/include/linux/netfilter/x_tables.h	2009-01-30 08:31:48.630454493 -0800
> +++ b/include/linux/netfilter/x_tables.h	2009-01-30 09:14:01.294680339 -0800
> @@ -105,13 +105,17 @@ struct _xt_align
>  #define XT_ERROR_TARGET "ERROR"
>  
>  #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
> -#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
>  
>  struct xt_counters
>  {
>  	u_int64_t pcnt, bcnt;			/* Packet and byte counters */
>  };
>  
> +extern void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p);
> +extern void xt_sum_counter(struct xt_counters *t,
> +			   int cpu, const struct xt_counters *c);
> +
> +
>  /* The argument to IPT_SO_ADD_COUNTERS. */
>  struct xt_counters_info
>  {
> --- a/net/ipv4/netfilter/arp_tables.c	2009-01-30 08:31:48.569479503 -0800
> +++ b/net/ipv4/netfilter/arp_tables.c	2009-01-30 09:12:40.181542286 -0800
> @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
>  
>  			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
>  				(2 * skb->dev->addr_len);
> -			ADD_COUNTER(e->counters, hdr_len, 1);
> +			xt_add_counter(&e->counters, hdr_len, 1);
>  
>  			t = arpt_get_target(e);
>  
> @@ -662,10 +662,11 @@ static int translate_table(const char *n
>  
>  /* Gets counters. */
>  static inline int add_entry_to_counter(const struct arpt_entry *e,
> +				       int cpu,
>  				       struct xt_counters total[],
>  				       unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -709,6 +710,7 @@ static void get_counters(const struct xt
>  		ARPT_ENTRY_ITERATE(t->entries[cpu],
>  				   t->size,
>  				   add_entry_to_counter,
> +				   cpu,
>  				   counters,
>  				   &i);
>  	}
> @@ -1082,8 +1084,7 @@ static inline int add_counter_to_entry(s
>  				       const struct xt_counters addme[],
>  				       unsigned int *i)
>  {
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/ipv4/netfilter/ip_tables.c	2009-01-30 08:31:48.538730580 -0800
> +++ b/net/ipv4/netfilter/ip_tables.c	2009-01-30 09:12:40.169542168 -0800
> @@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
>  			if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
>  				goto no_match;
>  
> -			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
> +			xt_add_counter(&e->counters, ntohs(ip->tot_len), 1);
>  
>  			t = ipt_get_target(e);
>  			IP_NF_ASSERT(t->u.kernel.target);
> @@ -872,10 +872,11 @@ translate_table(const char *name,
>  /* Gets counters. */
>  static inline int
>  add_entry_to_counter(const struct ipt_entry *e,
> +		     int cpu,
>  		     struct xt_counters total[],
>  		     unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -921,6 +922,7 @@ get_counters(const struct xt_table_info 
>  		IPT_ENTRY_ITERATE(t->entries[cpu],
>  				  t->size,
>  				  add_entry_to_counter,
> +				  cpu,
>  				  counters,
>  				  &i);
>  	}
> @@ -1327,8 +1329,7 @@ add_counter_to_entry(struct ipt_entry *e
>  		 (long unsigned int)addme[*i].pcnt,
>  		 (long unsigned int)addme[*i].bcnt);
>  #endif
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/ipv6/netfilter/ip6_tables.c	2009-01-30 08:31:48.605479850 -0800
> +++ b/net/ipv6/netfilter/ip6_tables.c	2009-01-30 09:12:40.205542065 -0800
> @@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
>  			if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
>  				goto no_match;
>  
> -			ADD_COUNTER(e->counters,
> -				    ntohs(ipv6_hdr(skb)->payload_len) +
> -				    sizeof(struct ipv6hdr), 1);
> +			xt_add_counter(&e->counters,
> +					ntohs(ipv6_hdr(skb)->payload_len) +
> +					sizeof(struct ipv6hdr), 1);
>  
>  			t = ip6t_get_target(e);
>  			IP_NF_ASSERT(t->u.kernel.target);
> @@ -901,10 +901,11 @@ translate_table(const char *name,
>  /* Gets counters. */
>  static inline int
>  add_entry_to_counter(const struct ip6t_entry *e,
> +		     int cpu,
>  		     struct xt_counters total[],
>  		     unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -948,10 +949,11 @@ get_counters(const struct xt_table_info 
>  			continue;
>  		i = 0;
>  		IP6T_ENTRY_ITERATE(t->entries[cpu],
> -				  t->size,
> -				  add_entry_to_counter,
> -				  counters,
> -				  &i);
> +				   t->size,
> +				   add_entry_to_counter,
> +				   cpu,
> +				   counters,
> +				   &i);
>  	}
>  }
>  
> @@ -1357,8 +1359,7 @@ add_counter_to_entry(struct ip6t_entry *
>  		 (long unsigned int)addme[*i].pcnt,
>  		 (long unsigned int)addme[*i].bcnt);
>  #endif
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/netfilter/x_tables.c	2009-01-30 08:38:32.949293300 -0800
> +++ b/net/netfilter/x_tables.c	2009-01-30 09:13:27.636792850 -0800
> @@ -577,6 +577,21 @@ int xt_compat_target_to_user(struct xt_e
>  EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
>  #endif
>  
> +void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p)
> +{
> +	c->bcnt += b;
> +	c->pcnt += p;
> +}
> +EXPORT_SYMBOL_GPL(xt_add_counter);
> +
> +void xt_sum_counter(struct xt_counters *t, int cpu,
> +		    const struct xt_counters *c)
> +{
> +	t->pcnt += c->pcnt;
> +	t->bcnt += c->bcnt;
> +}
> +EXPORT_SYMBOL_GPL(xt_sum_counter);
> +
>  struct xt_table_info *xt_alloc_table_info(unsigned int size)
>  {
>  	struct xt_table_info *newinfo;
> 

First I wondered if adding out of line xt_add_counter() could slow firewalls,
I did some testings, with tbench and a small iptables setup (about 16 matched rules per packet)


CPU: Core 2, speed 3000.11 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000
samples  %        symbol name
3166081  12.8996  ipt_do_table
2471426  10.0694  copy_from_user
1016717   4.1424  copy_to_user
902072    3.6753  schedule
687266    2.8001  xt_add_counter
589899    2.4034  tcp_sendmsg
579619    2.3615  tcp_ack
455883    1.8574  tcp_transmit_skb
 


c044d110 <xt_add_counter>: /* xt_add_counter total: 687266  2.8001 */
 80675  0.3287 :c044d110:       push   %ebp
 15574  0.0635 :c044d111:       mov    %esp,%ebp
     2 8.1e-06 :c044d113:       sub    $0xc,%esp
 39583  0.1613 :c044d116:       mov    %ebx,(%esp)
  4387  0.0179 :c044d119:       mov    %edi,0x8(%esp)
  1187  0.0048 :c044d11d:       mov    %esi,0x4(%esp)
  1601  0.0065 :c044d121:       mov    $0xc068ae4c,%edi
 38881  0.1584 :c044d126:       mov    %fs:0xc0688540,%ebx
  5585  0.0228 :c044d12d:       add    %ebx,%edi
  3910  0.0159 :c044d12f:       incl   (%edi)
133482  0.5438 :c044d131:       xor    %esi,%esi
    32 1.3e-04 :c044d133:       add    %edx,0x8(%eax)
 71181  0.2900 :c044d136:       mov    %ecx,%edx
  7986  0.0325 :c044d138:       adc    %esi,0xc(%eax)
 88695  0.3614 :c044d13b:       xor    %ecx,%ecx
    15 6.1e-05 :c044d13d:       add    %edx,(%eax)
 41496  0.1691 :c044d13f:       adc    %ecx,0x4(%eax)
 52944  0.2157 :c044d142:       incl   (%edi)
 30759  0.1253 :c044d144:       mov    (%esp),%ebx
 20241  0.0825 :c044d147:       mov    0x4(%esp),%esi
  5662  0.0231 :c044d14b:       mov    0x8(%esp),%edi
  2288  0.0093 :c044d14f:       leave
 41100  0.1675 :c044d150:       ret

tbench 8 results here, after all your patches applied :

Throughput 2331 MB/sec 8 procs

And if inlined :

Throughput 2359.06 MB/sec 8 procs

and if we check inlined code/costs we see :

  2597  0.1719 :c048dfee:       mov    -0x64(%ebp),%edx
   182  0.0120 :c048dff1:       movzwl 0x2(%edx),%eax
   524  0.0347 :c048dff5:       mov    %fs:0xc0688540,%ecx
     7 4.6e-04 :c048dffc:       add    -0x70(%ebp),%ecx
  2465  0.1632 :c048dfff:       incl   (%ecx)
  9476  0.6273 :c048e001:       addl   $0x1,0x60(%edi)
 10068  0.6665 :c048e005:       adcl   $0x0,0x64(%edi)
  6543  0.4332 :c048e009:       xor    %edx,%edx
     1 6.6e-05 :c048e00b:       rol    $0x8,%ax
   234  0.0155 :c048e00f:       movzwl %ax,%eax
  2198  0.1455 :c048e012:       add    %eax,0x68(%edi)
    80  0.0053 :c048e015:       adc    %edx,0x6c(%edi)
  2858  0.1892 :c048e018:       incl   (%ecx)


With upcoming work on fast percpu accesses, we might in the future see following code:
(no need for registers to compute address of variable)

      mov    -0x64(%ebp),%edx
      movzwl 0x2(%edx),%eax
      incl   %fs:xt_counter_sequence
      addl   $0x1,0x60(%edi)
      adcl   $0x0,0x64(%edi)
      xor    %edx,%edx
      rol    $0x8,%ax
      movzwl %ax,%eax
      add    %eax,0x68(%edi)
      adc    %edx,0x6c(%edi)
      incl   %fs:xt_counter_sequence



  reply	other threads:[~2009-02-01 12:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-30 21:57 [PATCH 0/6] iptables: eliminate read/write lock (v0.4) Stephen Hemminger
2009-01-30 21:57 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
2009-01-30 21:57 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
2009-01-30 21:57 ` [PATCH 3/6] ebtables: " Stephen Hemminger
2009-01-30 21:57 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
2009-02-01 12:25   ` Eric Dumazet [this message]
2009-02-02 23:33     ` [PATCH 3/3] iptables: lock free counters (alternate version) Stephen Hemminger
2009-02-03 19:00       ` Eric Dumazet
2009-02-03 19:19         ` Eric Dumazet
2009-02-03 19:32         ` Paul E. McKenney
2009-02-03 20:20           ` Eric Dumazet
2009-02-03 20:44             ` Stephen Hemminger
2009-02-03 21:05               ` Eric Dumazet
2009-02-03 21:10             ` Paul E. McKenney
2009-02-03 21:22               ` Stephen Hemminger
2009-02-03 21:27                 ` Rick Jones
2009-02-03 23:11                 ` Paul E. McKenney
2009-02-03 23:18                   ` Stephen Hemminger
2009-01-30 21:57 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
2009-01-30 21:57 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
2009-01-31 17:27   ` Eric Dumazet
  -- strict thread matches above, loose matches on Subject: below --
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=498594B6.6000905@cosmosbay.com \
    --to=dada1@cosmosbay$(echo .)com \
    --cc=davem@davemloft$(echo .)net \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=shemminger@vyatta$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox