From: Michael Breuer <mbreuer@majjas•com>
To: Stephen Hemminger <shemminger@vyatta•com>
Cc: David Miller <davem@davemloft•net>,
jarkao2@gmail•com, mikem@ring3k•org, flyboy@gmail•com,
rjw@sisk•pl, netdev@vger•kernel.org
Subject: Re: [PATCH] sky2: safer transmit ring cleaning
Date: Tue, 12 Jan 2010 15:31:36 -0500 [thread overview]
Message-ID: <4B4CDC28.2050508@majjas.com> (raw)
In-Reply-To: <4B4CC29E.4020703@majjas.com>
On 1/12/2010 1:42 PM, Michael Breuer wrote:
> On 1/12/2010 1:35 PM, Michael Breuer wrote:
>> On 1/12/2010 11:15 AM, Stephen Hemminger wrote:
>>> This code makes transmit path and transmit reset safer by:
>>> * adding memory barrier before checking available ring slots
>>> * reseting state of tx ring elements after free
>>> * seperate cleanup function from ring done function
>>> * removing mostly unused tx_next element
>>>
>>> Signed-off-by: Stephen Hemminger<shemminger@vyatta•com>
>>>
>>> ---
>>> Please apply this instead of the various bits and pieces flying
>>> around labeled as sky2 panic under load
>>>
>>>
>>> --- a/drivers/net/sky2.c 2010-01-11 10:49:50.907113126 -0800
>>> +++ b/drivers/net/sky2.c 2010-01-11 17:36:22.027429875 -0800
>>> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>>> /* Number of list elements available for next tx */
>>> static inline int tx_avail(const struct sky2_port *sky2)
>>> {
>>> + /* Makes sure update of tx_prod from start_xmit and
>>> + tx_cons from tx_done are seen. */
>>> + smp_mb();
>>> return sky2->tx_pending - tx_inuse(sky2);
>>> }
>>>
>>> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>>> return count;
>>> }
>>>
>>> -static void sky2_tx_unmap(struct pci_dev *pdev,
>>> - const struct tx_ring_info *re)
>>> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info
>>> *re)
>>> {
>>> if (re->flags& TX_MAP_SINGLE)
>>> pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
>>> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>>> pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>>> pci_unmap_len(re, maplen),
>>> PCI_DMA_TODEVICE);
>>> + re->flags = 0;
>>> }
>>>
>>> /*
>>> @@ -1804,7 +1807,8 @@ mapping_error:
>>> }
>>>
>>> /*
>>> - * Free ring elements from starting at tx_cons until "done"
>>> + * Transmit complete processing
>>> + * Free ring elements from starting at tx_cons until done index
>>> *
>>> * NB:
>>> * 1. The hardware will tell us about partial completion of
>>> multi-part
>>> @@ -1813,9 +1817,9 @@ mapping_error:
>>> * looks at the tail of the queue of FIFO (tx_cons), not
>>> * the head (tx_prod)
>>> */
>>> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
>>> +static void sky2_tx_done(struct net_device *dev, u16 done)
>>> {
>>> - struct net_device *dev = sky2->netdev;
>>> + struct sky2_port *sky2 = netdev_priv(dev);
>>> unsigned idx;
>>>
>>> BUG_ON(done>= sky2->tx_ring_size);
>>> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>>> sky2_tx_unmap(sky2->hw->pdev, re);
>>>
>>> if (skb) {
>>> + re->skb = NULL;
>>> +
>>> if (unlikely(netif_msg_tx_done(sky2)))
>>> printk(KERN_DEBUG "%s: tx done %u\n",
>>> dev->name, idx);
>>> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>>> dev->stats.tx_bytes += skb->len;
>>>
>>> dev_kfree_skb_any(skb);
>>> -
>>> - sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>>> }
>>> }
>>>
>>> sky2->tx_cons = idx;
>>> - smp_mb();
>>>
>>> if (tx_avail(sky2)> MAX_SKB_TX_LE + 4)
>>> netif_wake_queue(dev);
>>> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>>> sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>>> }
>>>
>>> +static void sky2_tx_clean(struct sky2_port *sky2)
>>> +{
>>> + u16 idx;
>>> +
>>> + for (idx = 0; idx< sky2->tx_ring_size; idx++) {
>>> + struct tx_ring_info *re = sky2->tx_ring + idx;
>>> +
>>> + sky2_tx_unmap(sky2->hw->pdev, re);
>>> + if (re->skb) {
>>> + dev_kfree_skb_any(re->skb);
>>> + re->skb = NULL;
>>> + }
>>> + }
>>> +}
>>> +
>>> /* Network shutdown */
>>> static int sky2_down(struct net_device *dev)
>>> {
>>> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>>> sky2_tx_reset(hw, port);
>>>
>>> /* Free any pending frames stuck in HW queue */
>>> - sky2_tx_complete(sky2, sky2->tx_prod);
>>> -
>>> + sky2_tx_clean(sky2);
>>> sky2_rx_clean(sky2);
>>>
>>> sky2_free_buffers(sky2);
>>> @@ -2411,15 +2428,6 @@ error:
>>> goto resubmit;
>>> }
>>>
>>> -/* Transmit complete */
>>> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
>>> -{
>>> - struct sky2_port *sky2 = netdev_priv(dev);
>>> -
>>> - if (netif_running(dev))
>>> - sky2_tx_complete(sky2, last);
>>> -}
>>> -
>>> static inline void sky2_skb_rx(const struct sky2_port *sky2,
>>> u32 status, struct sk_buff *skb)
>>> {
>>> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>>>
>>> /* Dump contents of tx ring */
>>> sop = 1;
>>> - for (idx = sky2->tx_next; idx != sky2->tx_prod&& idx<
>>> sky2->tx_ring_size;
>>> + for (idx = sky2->tx_cons; idx != sky2->tx_prod&& idx<
>>> sky2->tx_ring_size;
>>> idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>>> const struct sky2_tx_le *le = sky2->tx_le + idx;
>>> u32 a = le32_to_cpu(le->addr);
>>> --- a/drivers/net/sky2.h 2010-01-11 17:29:22.817088617 -0800
>>> +++ b/drivers/net/sky2.h 2010-01-11 17:29:28.197120484 -0800
>>> @@ -2187,7 +2187,6 @@ struct sky2_port {
>>> u16 tx_ring_size;
>>> u16 tx_cons; /* next le to check */
>>> u16 tx_prod; /* next le to use */
>>> - u16 tx_next; /* debug only */
>>>
>>> u16 tx_pending;
>>> u16 tx_last_mss;
>> Test observations:
>>
>> 1. DHCP request/response sequence having some issues... can't confirm
>> that it's a result of this patch, but I don't see this with the prior
>> patch. Prior to this patch, if I connect a new device (Blackberry in
>> this case) I see DHCPDISCOVER;DHCPOFFER;DHCPREQUEST;DHCPACK (just the
>> four messages). With this patch I'm seeing repeated attempts - i.e.,
>> DISCOVER/OFFER 6 times before the REQUEST/ACK. This is repeatable
>> and happening whether or not under load. As my original problem
>> started with DHCP packets, this seems interesting. I don't see any
>> errors logged (dmesg, messages, etc.).
>> 2. Probably not related to this patch, but perhaps to the driver -
>> I've finally tracked the source of my dropped RX packets. It's
>> happening when a sending data to a CIFS client on a MacOS system. The
>> RX drop rate seems proportional to the TX rate for SMB to that client
>> - at a tx rate of about 200Kb/s I see about 20 dropped RX packets/sec
>> - at 400 I see about 40. I'm thinking therefore it's ACKs being
>> dropped on RX. Why? no idea (yet). Nothing reported by ethtool or
>> netstat -s remotely correlates to the number of dropped RX packets.
>>
>>
> Let me add: the CIFS client from which the packets are dropped is
> connected via a dd-wrt router (on wifi) connected to the sky2 1G port.
> A Windows client connected directly to the 1G port does not exhibit
> the same symptoms (. I'll try later the Mac directly connected & a
> Wintel box over wifi if possible. The DD-WRT router (linksys
> wrt54g-tm) is bridged (wifi clients on same subnet as wired & serviced
> by DHCPD running on the linux box). This is also the source of the
> aforementioned perhaps-flaky DHCP connections.
Ok - a little more on the dropped packets - only happens when connected
through the wifi router - but happens using a wired connection as well
(via the router's ethernet port).
From sniffer traces: I see large frames outbound (even though
MTU=1500). I see the fragmented result arriving on the remote side. I
see ACK's for each of the fragmented frames outbound from the Mac, but
not received on the Linux side.
I also don't see any retransmissions or duplicate ACKS on either sniffer
trace. I'm wondering whether there is fragmentation offload to the
Yukon2, and whether the individual fragment acks are what's showing up
dropped. If so, I don't understand why it would only happen with the
wifi router vs. switch in the middle. Maybe the router is doing
something to the packets which is causing the Yukon to forward the acks
up differently? The router MTU is also 1500 on all ports, and does not
show dropped packets or errors corresponding to what I'm seeing on the
sky2 adapter.
next prev parent reply other threads:[~2010-01-12 20:32 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-01-10 22:27 2.6.33-rc3-git3: Reported regressions from 2.6.32 Rafael J. Wysocki
2010-01-10 22:32 ` [Bug #14925] sky2 panic under load Rafael J. Wysocki
2010-01-11 0:36 ` Berck E. Nash
2010-01-11 13:26 ` Jarek Poplawski
2010-01-11 19:32 ` Rafael J. Wysocki
2010-01-11 20:31 ` Jarek Poplawski
2010-01-11 20:50 ` Rafael J. Wysocki
2010-01-11 21:02 ` Berck E. Nash
2010-01-11 21:47 ` Jarek Poplawski
2010-01-11 14:03 ` Mike McCormack
2010-01-11 16:45 ` Stephen Hemminger
2010-01-11 22:07 ` Jarek Poplawski
2010-01-12 0:14 ` David Miller
2010-01-12 7:50 ` Jarek Poplawski
2010-01-12 8:08 ` David Miller
2010-01-12 8:56 ` Jarek Poplawski
2010-01-12 9:42 ` David Miller
2010-01-12 10:31 ` Jarek Poplawski
2010-01-12 10:56 ` David Miller
2010-01-12 11:04 ` Jarek Poplawski
2010-01-12 15:39 ` Stephen Hemminger
2010-01-12 16:15 ` [PATCH] sky2: safer transmit ring cleaning Stephen Hemminger
2010-01-12 16:32 ` Michael Breuer
2010-01-12 17:02 ` Stephen Hemminger
2010-01-12 18:04 ` Jarek Poplawski
2010-01-12 18:13 ` Stephen Hemminger
2010-01-12 18:24 ` Jarek Poplawski
2010-01-12 18:49 ` [PATCH] sky2: safer transmit ring cleaning (v2) Stephen Hemminger
2010-01-12 19:16 ` Jarek Poplawski
2010-01-12 19:23 ` Stephen Hemminger
2010-01-12 19:50 ` Jarek Poplawski
2010-01-13 1:23 ` Stephen Hemminger
2010-01-12 19:34 ` Michael Breuer
2010-01-12 18:35 ` [PATCH] sky2: safer transmit ring cleaning Michael Breuer
2010-01-12 18:42 ` Michael Breuer
2010-01-12 20:31 ` Michael Breuer [this message]
2010-01-13 4:10 ` [PATCH] sky2: safer transmit ring cleaning (v3) Stephen Hemminger
2010-01-13 4:31 ` Michael Breuer
2010-01-13 7:35 ` Jarek Poplawski
2010-01-13 16:04 ` Michael Breuer
2010-01-14 3:41 ` [PATCH] sky2: safer transmit ring cleaning (v4) Stephen Hemminger
2010-01-14 10:14 ` Jarek Poplawski
2010-01-14 11:16 ` Jarek Poplawski
2010-01-14 11:20 ` David Miller
2010-01-14 11:26 ` Jarek Poplawski
2010-01-14 13:19 ` Mike McCormack
2010-01-14 15:43 ` Michael Breuer
2010-01-14 16:46 ` Michael Breuer
2010-01-14 17:51 ` Stephen Hemminger
2010-01-14 17:52 ` Stephen Hemminger
2010-01-14 23:51 ` Michael Breuer
2010-01-16 18:35 ` sky2 DHCPOFFER packet loss under load (Was Re: [PATCH] sky2: safer transmit ring cleaning (v4)) Michael Breuer
2010-01-14 15:46 ` [PATCH] sky2: safer transmit ring cleaning (v4) Michael Breuer
2010-01-11 22:31 ` [Bug #14925] sky2 panic under load Jarek Poplawski
2010-01-11 16:00 ` 2.6.33-rc3-git3: Reported regressions from 2.6.32 Luis R. Rodriguez
2010-01-11 21:47 ` Nick Bowler
2010-01-11 22:10 ` Rafael J. Wysocki
[not found] ` <omZ8aBZReyD.A.rz.COlSLB@chimera>
2010-01-28 23:18 ` [PATCH net-2.6] cdc_ether: Partially revert "usbnet: Set link down initially ..." Ben Hutchings
2010-01-29 5:37 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B4CDC28.2050508@majjas.com \
--to=mbreuer@majjas$(echo .)com \
--cc=davem@davemloft$(echo .)net \
--cc=flyboy@gmail$(echo .)com \
--cc=jarkao2@gmail$(echo .)com \
--cc=mikem@ring3k$(echo .)org \
--cc=netdev@vger$(echo .)kernel.org \
--cc=rjw@sisk$(echo .)pl \
--cc=shemminger@vyatta$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox