Re: [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly

public inbox for netdev@vger.kernel.org 
 help / color / mirror / Atom feed

From: Roopa Prabhu <roopa@cumulusnetworks•com>
To: Nikolay Aleksandrov <nikolay@cumulusnetworks•com>
Cc: netdev@vger•kernel.org, davem@davemloft•net,
	stephen@networkplumber•org, tgraf@suug•ch,
	hannes@stressinduktion•org, jbenc@redhat•com, pshelar@ovn•org,
	dsa@cumulusnetworks•com, hadi@mojatatu•com
Subject: Re: [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly
Date: Sun, 22 Jan 2017 07:18:09 -0800	[thread overview]
Message-ID: <5884CD31.5070708@cumulusnetworks.com> (raw)
In-Reply-To: <f4c86e0a-472b-700c-98fe-f5da2eea35e6@cumulusnetworks.com>

On 1/22/17, 3:40 AM, Nikolay Aleksandrov wrote:
> On 21/01/17 06:46, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks•com>
>>
>> This patch series makes vxlan COLLECT_METADATA mode bridge
>> and layer2 network friendly. Vxlan COLLECT_METADATA mode today
>> solves the per-vni netdev scalability problem in l3 networks.
>> When vxlan collect metadata device participates in bridging
>> vlan to vn-segments, It can only get the vlan mapped vni in
>> the xmit tunnel dst metadata. It will need the vxlan driver to
>> continue learn, hold forwarding state and remote destination
>> information similar to how it already does for non COLLECT_METADATA
>> vxlan netdevices today.
>>
>> Changes introduced by this patch:
>>     - allow learning and forwarding database state to vxlan netdev in
>>       COLLECT_METADATA mode. Current behaviour is not changed
>>       by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
>>       to support the new bridge friendly mode.
>>     - A single fdb table hashed by (mac, vni) to allow fdb entries with
>>       multiple vnis in the same fdb table
>>     - rx path already has the vni
>>     - tx path expects a vni in the packet with dst_metadata
>>     - prior to this series, fdb remote_dsts carried remote vni and
>>       the vxlan device carrying the fdb table represented the
>>       source vni. With the vxlan device now representing multiple vnis,
>>       this patch adds a src vni attribute to the fdb entry. The remote
>>       vni already uses NDA_VNI attribute. This patch introduces
>>       NDA_SRC_VNI netlink attribute to represent the src vni in a multi
>>       vni fdb table.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks•com>
>> ---
> [snip]
>> @@ -2173,23 +2221,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	bool did_rsc = false;
>>  	struct vxlan_rdst *rdst, *fdst = NULL;
>>  	struct vxlan_fdb *f;
>> +	__be32 vni = 0;
>>  
>>  	info = skb_tunnel_info(skb);
>>  
>>  	skb_reset_mac_header(skb);
>>  
>>  	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
>> -		if (info && info->mode & IP_TUNNEL_INFO_TX)
>> -			vxlan_xmit_one(skb, dev, NULL, false);
>> -		else
>> -			kfree_skb(skb);
>> -		return NETDEV_TX_OK;
>> +		if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
>> +		    info->mode & IP_TUNNEL_INFO_TX) {
> nit: parentheses around the IP_TUNNEL_INFO_TX check
>
>> +			vni = tunnel_id_to_key32(info->key.tun_id);
>> +		} else {
>> +			if (info && info->mode & IP_TUNNEL_INFO_TX)
> nit: parentheses around the IP_TUNNEL_INFO_TX check

ack
>> +				vxlan_xmit_one(skb, dev, vni, NULL, false);
>> +			else
>> +				kfree_skb(skb);
>> +			return NETDEV_TX_OK;
>> +		}
>>  	}
>>  
>>  	if (vxlan->flags & VXLAN_F_PROXY) {
>>  		eth = eth_hdr(skb);
>>  		if (ntohs(eth->h_proto) == ETH_P_ARP)
>> -			return arp_reduce(dev, skb);
>> +			return arp_reduce(dev, skb, vni);
>>  #if IS_ENABLED(CONFIG_IPV6)
>>  		else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
>>  			 pskb_may_pull(skb, sizeof(struct ipv6hdr)
>> @@ -2200,13 +2254,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  				msg = (struct nd_msg *)skb_transport_header(skb);
>>  				if (msg->icmph.icmp6_code == 0 &&
>>  				    msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
>> -					return neigh_reduce(dev, skb);
>> +					return neigh_reduce(dev, skb, vni);
>>  		}
>>  #endif
>>  	}
>>  
>>  	eth = eth_hdr(skb);
>> -	f = vxlan_find_mac(vxlan, eth->h_dest);
>> +	f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>>  	did_rsc = false;
>>  
>>  	if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
>> @@ -2214,11 +2268,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	     ntohs(eth->h_proto) == ETH_P_IPV6)) {
>>  		did_rsc = route_shortcircuit(dev, skb);
>>  		if (did_rsc)
>> -			f = vxlan_find_mac(vxlan, eth->h_dest);
>> +			f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>>  	}
>>  
>>  	if (f == NULL) {
>> -		f = vxlan_find_mac(vxlan, all_zeros_mac);
>> +		f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
>>  		if (f == NULL) {
>>  			if ((vxlan->flags & VXLAN_F_L2MISS) &&
>>  			    !is_multicast_ether_addr(eth->h_dest))
>> @@ -2239,11 +2293,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  		}
>>  		skb1 = skb_clone(skb, GFP_ATOMIC);
>>  		if (skb1)
>> -			vxlan_xmit_one(skb1, dev, rdst, did_rsc);
>> +			vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
>>  	}
>>  
>>  	if (fdst)
>> -		vxlan_xmit_one(skb, dev, fdst, did_rsc);
>> +		vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
>>  	else
>>  		kfree_skb(skb);
>>  	return NETDEV_TX_OK;
>> @@ -2307,12 +2361,12 @@ static int vxlan_init(struct net_device *dev)
>>  	return 0;
>>  }
>>  
>> -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
>> +static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
>>  {
>>  	struct vxlan_fdb *f;
>>  
>>  	spin_lock_bh(&vxlan->hash_lock);
>> -	f = __vxlan_find_mac(vxlan, all_zeros_mac);
>> +	f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
>>  	if (f)
>>  		vxlan_fdb_destroy(vxlan, f);
>>  	spin_unlock_bh(&vxlan->hash_lock);
>> @@ -2322,7 +2376,7 @@ static void vxlan_uninit(struct net_device *dev)
>>  {
>>  	struct vxlan_dev *vxlan = netdev_priv(dev);
>>  
>> -	vxlan_fdb_delete_default(vxlan);
>> +	vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>>  
>>  	free_percpu(dev->tstats);
>>  }
>> @@ -2536,6 +2590,8 @@ static void vxlan_setup(struct net_device *dev)
>>  	dev->vlan_features = dev->features;
>>  	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
>>  	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
>> +	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
>> +	dev->features |= dev->hw_features;
>>  	netif_keep_dst(dev);
>>  	dev->priv_flags |= IFF_NO_QUEUE;
>>  
>> @@ -2921,6 +2977,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>>  				       NLM_F_EXCL|NLM_F_CREATE,
>>  				       vxlan->cfg.dst_port,
>>  				       vxlan->default_dst.remote_vni,
>> +				       vxlan->default_dst.remote_vni,
>>  				       vxlan->default_dst.remote_ifindex,
>>  				       NTF_SELF);
>>  		if (err)
>> @@ -2929,7 +2986,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>>  
>>  	err = register_netdevice(dev);
>>  	if (err) {
>> -		vxlan_fdb_delete_default(vxlan);
>> +		vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>>  		return err;
>>  	}
>>  
>> @@ -3023,19 +3080,19 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>>  
>>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
>>  
>>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
>>  
>>  	if (data[IFLA_VXLAN_REMCSUM_TX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
>>  		conf.flags |= VXLAN_F_REMCSUM_TX;
>>  
>>  	if (data[IFLA_VXLAN_REMCSUM_RX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
>>  		conf.flags |= VXLAN_F_REMCSUM_RX;
> Aren't these going to break user-space ? 

correct... ignore these. Not intentional. these were from an incorrect merge with an earlier changelink patch i had.
Did not realize these had crept it.

thanks.

next prev parent reply	other threads:[~2017-01-22 15:18 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-21  5:46 [RFC PATCH net-next 0/5] bridge: per vlan lwt and dst_metadata support Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 1/5] ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info mode Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly Roopa Prabhu
2017-01-22 11:40   ` Nikolay Aleksandrov
2017-01-22 15:18     ` Roopa Prabhu [this message]
2017-01-21  5:46 ` [RFC PATCH net-next 3/5] bridge: uapi: add per vlan tunnel info Roopa Prabhu
2017-01-21 16:59   ` Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 4/5] bridge: vlan lwt and dst_metadata netlink support Roopa Prabhu
2017-01-22 12:05   ` Nikolay Aleksandrov
2017-01-22 15:23     ` Roopa Prabhu
2017-01-23  0:22   ` Rosen, Rami
2017-01-23 15:39     ` Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 5/5] bridge: vlan lwt dst_metadata hooks in ingress and egress paths Roopa Prabhu
2017-01-22 12:15   ` Nikolay Aleksandrov
2017-01-22 15:27     ` Roopa Prabhu
2017-01-23  8:08 ` [RFC PATCH net-next 0/5] bridge: per vlan lwt and dst_metadata support Jiri Pirko
2017-01-23  8:51   ` Jiri Benc
2017-01-23 16:13     ` Roopa Prabhu
2017-01-23 16:24       ` Jiri Benc
2017-01-24  0:00         ` Roopa Prabhu
     [not found]       ` <CAJ3xEMiC5xJ+rex8xMnyuGj5QKj+sYA9A6JjOM0xQaZraFSHig@mail.gmail.com>
2017-01-24  0:09         ` Roopa Prabhu
2017-01-24 15:47 ` Stephen Hemminger
2017-01-25 17:08   ` Roopa Prabhu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5884CD31.5070708@cumulusnetworks.com \
    --to=roopa@cumulusnetworks$(echo .)com \
    --cc=davem@davemloft$(echo .)net \
    --cc=dsa@cumulusnetworks$(echo .)com \
    --cc=hadi@mojatatu$(echo .)com \
    --cc=hannes@stressinduktion$(echo .)org \
    --cc=jbenc@redhat$(echo .)com \
    --cc=netdev@vger$(echo .)kernel.org \
    --cc=nikolay@cumulusnetworks$(echo .)com \
    --cc=pshelar@ovn$(echo .)org \
    --cc=stephen@networkplumber$(echo .)org \
    --cc=tgraf@suug$(echo .)ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox