public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed
From: Michael Breuer <mbreuer@majjas•com>
To: Stephen Hemminger <shemminger@vyatta•com>
Cc: David Miller <davem@davemloft•net>,
	jarkao2@gmail•com, mikem@ring3k•org, flyboy@gmail•com,
	rjw@sisk•pl, netdev@vger•kernel.org
Subject: Re: [PATCH] sky2: safer transmit ring cleaning
Date: Tue, 12 Jan 2010 15:31:36 -0500	[thread overview]
Message-ID: <4B4CDC28.2050508@majjas.com> (raw)
In-Reply-To: <4B4CC29E.4020703@majjas.com>

On 1/12/2010 1:42 PM, Michael Breuer wrote:
> On 1/12/2010 1:35 PM, Michael Breuer wrote:
>> On 1/12/2010 11:15 AM, Stephen Hemminger wrote:
>>> This code makes transmit path and transmit reset safer by:
>>>    * adding memory barrier before checking available ring slots
>>>    * reseting state of tx ring elements after free
>>>    * seperate cleanup function from ring done function
>>>    * removing mostly unused tx_next element
>>>
>>> Signed-off-by: Stephen Hemminger<shemminger@vyatta•com>
>>>
>>> ---
>>> Please apply this instead of the various bits and pieces flying
>>> around labeled as sky2 panic under load
>>>
>>>
>>> --- a/drivers/net/sky2.c    2010-01-11 10:49:50.907113126 -0800
>>> +++ b/drivers/net/sky2.c    2010-01-11 17:36:22.027429875 -0800
>>> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>>>   /* Number of list elements available for next tx */
>>>   static inline int tx_avail(const struct sky2_port *sky2)
>>>   {
>>> +    /* Makes sure update of tx_prod from start_xmit and
>>> +       tx_cons from tx_done are seen. */
>>> +    smp_mb();
>>>       return sky2->tx_pending - tx_inuse(sky2);
>>>   }
>>>
>>> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>>>       return count;
>>>   }
>>>
>>> -static void sky2_tx_unmap(struct pci_dev *pdev,
>>> -              const struct tx_ring_info *re)
>>> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info 
>>> *re)
>>>   {
>>>       if (re->flags&  TX_MAP_SINGLE)
>>>           pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
>>> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>>>           pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>>>                      pci_unmap_len(re, maplen),
>>>                      PCI_DMA_TODEVICE);
>>> +    re->flags = 0;
>>>   }
>>>
>>>   /*
>>> @@ -1804,7 +1807,8 @@ mapping_error:
>>>   }
>>>
>>>   /*
>>> - * Free ring elements from starting at tx_cons until "done"
>>> + * Transmit complete processing
>>> + * Free ring elements from starting at tx_cons until done index
>>>    *
>>>    * NB:
>>>    *  1. The hardware will tell us about partial completion of 
>>> multi-part
>>> @@ -1813,9 +1817,9 @@ mapping_error:
>>>    *     looks at the tail of the queue of FIFO (tx_cons), not
>>>    *     the head (tx_prod)
>>>    */
>>> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
>>> +static void sky2_tx_done(struct net_device *dev, u16 done)
>>>   {
>>> -    struct net_device *dev = sky2->netdev;
>>> +    struct sky2_port *sky2 = netdev_priv(dev);
>>>       unsigned idx;
>>>
>>>       BUG_ON(done>= sky2->tx_ring_size);
>>> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>>>           sky2_tx_unmap(sky2->hw->pdev, re);
>>>
>>>           if (skb) {
>>> +            re->skb = NULL;
>>> +
>>>               if (unlikely(netif_msg_tx_done(sky2)))
>>>                   printk(KERN_DEBUG "%s: tx done %u\n",
>>>                          dev->name, idx);
>>> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>>>               dev->stats.tx_bytes += skb->len;
>>>
>>>               dev_kfree_skb_any(skb);
>>> -
>>> -            sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>>>           }
>>>       }
>>>
>>>       sky2->tx_cons = idx;
>>> -    smp_mb();
>>>
>>>       if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4)
>>>           netif_wake_queue(dev);
>>> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>>>       sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>>>   }
>>>
>>> +static void sky2_tx_clean(struct sky2_port *sky2)
>>> +{
>>> +    u16 idx;
>>> +
>>> +    for (idx = 0; idx<  sky2->tx_ring_size; idx++) {
>>> +        struct tx_ring_info *re = sky2->tx_ring + idx;
>>> +
>>> +        sky2_tx_unmap(sky2->hw->pdev, re);
>>> +        if (re->skb) {
>>> +            dev_kfree_skb_any(re->skb);
>>> +            re->skb = NULL;
>>> +        }
>>> +    }
>>> +}
>>> +
>>>   /* Network shutdown */
>>>   static int sky2_down(struct net_device *dev)
>>>   {
>>> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>>>       sky2_tx_reset(hw, port);
>>>
>>>       /* Free any pending frames stuck in HW queue */
>>> -    sky2_tx_complete(sky2, sky2->tx_prod);
>>> -
>>> +    sky2_tx_clean(sky2);
>>>       sky2_rx_clean(sky2);
>>>
>>>       sky2_free_buffers(sky2);
>>> @@ -2411,15 +2428,6 @@ error:
>>>       goto resubmit;
>>>   }
>>>
>>> -/* Transmit complete */
>>> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
>>> -{
>>> -    struct sky2_port *sky2 = netdev_priv(dev);
>>> -
>>> -    if (netif_running(dev))
>>> -        sky2_tx_complete(sky2, last);
>>> -}
>>> -
>>>   static inline void sky2_skb_rx(const struct sky2_port *sky2,
>>>                      u32 status, struct sk_buff *skb)
>>>   {
>>> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>>>
>>>       /* Dump contents of tx ring */
>>>       sop = 1;
>>> -    for (idx = sky2->tx_next; idx != sky2->tx_prod&&  idx<  
>>> sky2->tx_ring_size;
>>> +    for (idx = sky2->tx_cons; idx != sky2->tx_prod&&  idx<  
>>> sky2->tx_ring_size;
>>>            idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>>>           const struct sky2_tx_le *le = sky2->tx_le + idx;
>>>           u32 a = le32_to_cpu(le->addr);
>>> --- a/drivers/net/sky2.h    2010-01-11 17:29:22.817088617 -0800
>>> +++ b/drivers/net/sky2.h    2010-01-11 17:29:28.197120484 -0800
>>> @@ -2187,7 +2187,6 @@ struct sky2_port {
>>>       u16             tx_ring_size;
>>>       u16             tx_cons;        /* next le to check */
>>>       u16             tx_prod;        /* next le to use */
>>> -    u16             tx_next;        /* debug only */
>>>
>>>       u16             tx_pending;
>>>       u16             tx_last_mss;
>> Test observations:
>>
>> 1. DHCP request/response sequence having some issues... can't confirm 
>> that it's a result of this patch, but I don't see this with the prior 
>> patch. Prior to this patch, if I connect a new device (Blackberry in 
>> this case) I see DHCPDISCOVER;DHCPOFFER;DHCPREQUEST;DHCPACK (just the 
>> four messages). With this patch I'm seeing repeated attempts - i.e., 
>> DISCOVER/OFFER 6 times before the REQUEST/ACK. This is repeatable 
>> and  happening whether or not under load. As my original problem 
>> started with DHCP packets, this seems interesting. I don't see any 
>> errors logged (dmesg, messages, etc.).
>> 2. Probably not related to this patch, but perhaps to the driver - 
>> I've finally tracked the source of my dropped RX packets. It's 
>> happening when a sending data to a CIFS client on a MacOS system. The 
>> RX drop rate seems proportional to the TX rate for SMB to that client 
>> - at a tx rate of about 200Kb/s I see about 20 dropped RX packets/sec 
>> - at 400 I see about 40.  I'm thinking therefore it's ACKs being 
>> dropped on RX. Why? no idea (yet). Nothing reported by ethtool or 
>> netstat -s remotely correlates to the number of dropped RX packets.
>>
>>
> Let me add: the CIFS client from which the packets are dropped is 
> connected via a dd-wrt router (on wifi) connected to the sky2 1G port. 
> A Windows client connected directly to the 1G port does not exhibit 
> the same symptoms (. I'll try later the Mac directly connected & a 
> Wintel box over wifi if possible. The DD-WRT router (linksys 
> wrt54g-tm) is bridged (wifi clients on same subnet as wired & serviced 
> by DHCPD running on the linux box). This is also the source of the 
> aforementioned perhaps-flaky DHCP connections.
Ok - a little more on the dropped packets - only happens when connected 
through the wifi router - but happens using a wired connection as well 
(via the router's ethernet port).

 From sniffer traces: I see large frames outbound (even though 
MTU=1500). I see the fragmented result arriving on the remote side. I 
see ACK's for each of the fragmented frames outbound from the Mac, but 
not received on the Linux side.

I also don't see any retransmissions or duplicate ACKS on either sniffer 
trace. I'm wondering whether there is fragmentation offload to the 
Yukon2, and whether the individual fragment acks are what's showing up 
dropped. If so, I don't understand why it would only happen with the 
wifi router vs. switch in the middle. Maybe the router is doing 
something to the packets which is causing the Yukon to forward the acks 
up differently? The router MTU is also 1500 on all ports, and does not 
show dropped packets or errors corresponding to what I'm seeing on the 
sky2 adapter.

  reply	other threads:[~2010-01-12 20:32 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-10 22:27 2.6.33-rc3-git3: Reported regressions from 2.6.32 Rafael J. Wysocki
2010-01-10 22:32 ` [Bug #14925] sky2 panic under load Rafael J. Wysocki
2010-01-11  0:36   ` Berck E. Nash
2010-01-11 13:26     ` Jarek Poplawski
2010-01-11 19:32       ` Rafael J. Wysocki
2010-01-11 20:31         ` Jarek Poplawski
2010-01-11 20:50           ` Rafael J. Wysocki
2010-01-11 21:02           ` Berck E. Nash
2010-01-11 21:47             ` Jarek Poplawski
2010-01-11 14:03     ` Mike McCormack
2010-01-11 16:45       ` Stephen Hemminger
2010-01-11 22:07         ` Jarek Poplawski
2010-01-12  0:14           ` David Miller
2010-01-12  7:50             ` Jarek Poplawski
2010-01-12  8:08               ` David Miller
2010-01-12  8:56                 ` Jarek Poplawski
2010-01-12  9:42                   ` David Miller
2010-01-12 10:31                     ` Jarek Poplawski
2010-01-12 10:56                     ` David Miller
2010-01-12 11:04                       ` Jarek Poplawski
2010-01-12 15:39                       ` Stephen Hemminger
2010-01-12 16:15                       ` [PATCH] sky2: safer transmit ring cleaning Stephen Hemminger
2010-01-12 16:32                         ` Michael Breuer
2010-01-12 17:02                           ` Stephen Hemminger
2010-01-12 18:04                         ` Jarek Poplawski
2010-01-12 18:13                           ` Stephen Hemminger
2010-01-12 18:24                             ` Jarek Poplawski
2010-01-12 18:49                               ` [PATCH] sky2: safer transmit ring cleaning (v2) Stephen Hemminger
2010-01-12 19:16                                 ` Jarek Poplawski
2010-01-12 19:23                                   ` Stephen Hemminger
2010-01-12 19:50                                     ` Jarek Poplawski
2010-01-13  1:23                                       ` Stephen Hemminger
2010-01-12 19:34                                 ` Michael Breuer
2010-01-12 18:35                         ` [PATCH] sky2: safer transmit ring cleaning Michael Breuer
2010-01-12 18:42                           ` Michael Breuer
2010-01-12 20:31                             ` Michael Breuer [this message]
2010-01-13  4:10                               ` [PATCH] sky2: safer transmit ring cleaning (v3) Stephen Hemminger
2010-01-13  4:31                                 ` Michael Breuer
2010-01-13  7:35                                 ` Jarek Poplawski
2010-01-13 16:04                                 ` Michael Breuer
2010-01-14  3:41                                   ` [PATCH] sky2: safer transmit ring cleaning (v4) Stephen Hemminger
2010-01-14 10:14                                     ` Jarek Poplawski
2010-01-14 11:16                                       ` Jarek Poplawski
2010-01-14 11:20                                         ` David Miller
2010-01-14 11:26                                           ` Jarek Poplawski
2010-01-14 13:19                                             ` Mike McCormack
2010-01-14 15:43                                               ` Michael Breuer
2010-01-14 16:46                                               ` Michael Breuer
2010-01-14 17:51                                               ` Stephen Hemminger
2010-01-14 17:52                                       ` Stephen Hemminger
2010-01-14 23:51                                         ` Michael Breuer
2010-01-16 18:35                                           ` sky2 DHCPOFFER packet loss under load (Was Re: [PATCH] sky2: safer transmit ring cleaning (v4)) Michael Breuer
2010-01-14 15:46                                     ` [PATCH] sky2: safer transmit ring cleaning (v4) Michael Breuer
2010-01-11 22:31         ` [Bug #14925] sky2 panic under load Jarek Poplawski
2010-01-11 16:00 ` 2.6.33-rc3-git3: Reported regressions from 2.6.32 Luis R. Rodriguez
2010-01-11 21:47 ` Nick Bowler
2010-01-11 22:10   ` Rafael J. Wysocki
     [not found] ` <omZ8aBZReyD.A.rz.COlSLB@chimera>
2010-01-28 23:18   ` [PATCH net-2.6] cdc_ether: Partially revert "usbnet: Set link down initially ..." Ben Hutchings
2010-01-29  5:37     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B4CDC28.2050508@majjas.com \
    --to=mbreuer@majjas$(echo .)com \
    --cc=davem@davemloft$(echo .)net \
    --cc=flyboy@gmail$(echo .)com \
    --cc=jarkao2@gmail$(echo .)com \
    --cc=mikem@ring3k$(echo .)org \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=rjw@sisk$(echo .)pl \
    --cc=shemminger@vyatta$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox