* ip6ip6 tunnel routing issue
@ 2012-02-22 9:25 Takács András
0 siblings, 0 replies; only message in thread
From: Takács András @ 2012-02-22 9:25 UTC (permalink / raw)
To: netdev
[-- Attachment #1: Type: text/plain, Size: 2438 bytes --]
Dear All,
We're using Mobile IPv6 for vehicle communication, and we have found a
very annoying issue in IPv6 routing.
We're running kernel 2.6.35.14, but we think, that the problem still
exists in the latest kernel tree also.
The description of the problem:
Mobile IPv6 implementation in Linux is using different metric values for
each Care-of Addresses, when creates or modifies default routes. This
cause, that all of the packets are goes out on the interface which has
lowest metric value, independently from the tunnel interface. In
practice it cause that the packets which are routed into ip6tnl2 are
goes out on eth1, instead of the setup, which binds ip6tnl2 to eth2.
I have attached a test script, which reproduces the problem, without
Mobile IPv6. The init argument initializes the environment.
I have tried to find the problem in the kernel source. I found the
followings:
In the find_rr_leaf function (net/ipv6/route.c) the iteration goes until
the metric is equal to the specified one. If you look the calling
environment, you should see, that this metric value is the metric of the
first route info entry: http://pastebin.com/XfALRrrY
We have two totally same route entries, where only the interfaces and
the metric values are different:
default via fe80::20c:29ff:fe3b:4d16 dev eth1 proto ra metric 1023
mtu 1500 advmss 1440 hoplimit 0
default via fe80::20c:29ff:fe3b:4d20 dev eth2 proto ra metric 1053
mtu 1500 advmss 1440 hoplimit 0
In this case, the above loop, will call find_match only once, for eth1.
If it happens, the find_match function, couldn't find eth2, which
belongs to ip6tnl2, and returns with eth1.
Unfortunately, the (IPv6) routing mechanism in quite complex in the
kernel. I could find only a very ugly workaround for our problem:
The attached patch, introduces a new route lookup flag:
RT6_LOOKUP_F_IP6TUNNEL. It is passed step-by-step to find_rr_leaf, and
it will ignore the metric condition in the loop, if this flag has been
enabled.
The ip6ip6-metric-fix.patch contains the fix for 2.6.35.14. We had to
edit the definition of ip6_route_output. Because of this, the
ip6ip6-metric-stuff.patch contains the modification of calling this at
any other occurrences.
What is your opinion about this problem? What do you think about this
workaround? Could anybody help to us to find a more elegant solution for
this issue?
Best Regards,
András Takács
[-- Attachment #2: ip6ip6-metric-fix.patch --]
[-- Type: text/plain, Size: 5868 bytes --]
Index: /trunk/kernel/linux-2.6.35.14/include/net/ip6_route.h
===================================================================
--- /trunk/kernel/linux-2.6.35.14/include/net/ip6_route.h (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/include/net/ip6_route.h (revision 384)
@@ -34,10 +34,11 @@
#define RT6_LOOKUP_F_REACHABLE 0x00000002
#define RT6_LOOKUP_F_HAS_SADDR 0x00000004
#define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008
#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
+#define RT6_LOOKUP_F_IP6TUNNEL 0x00000040
/*
* rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
* between IPV6_ADDR_PREFERENCES socket option values
* IPV6_PREFER_SRC_TMP = 0x1
@@ -58,11 +59,11 @@
extern void ip6_route_input(struct sk_buff *skb);
extern struct dst_entry * ip6_route_output(struct net *net,
struct sock *sk,
- struct flowi *fl);
+ struct flowi *fl, int flags);
extern int ip6_route_init(void);
extern void ip6_route_cleanup(void);
extern int ipv6_route_ioctl(struct net *net,
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_tunnel.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_tunnel.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_tunnel.c (revision 384)
@@ -860,11 +860,11 @@
int pkt_len;
if ((dst = ip6_tnl_dst_check(t)) != NULL)
dst_hold(dst);
else {
- dst = ip6_route_output(net, NULL, fl);
+ dst = ip6_route_output(net, NULL, fl, RT6_LOOKUP_F_IP6TUNNEL);
if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
goto tx_err_link_failure;
}
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/route.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/route.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/route.c (revision 384)
@@ -398,27 +398,32 @@
return match;
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
struct rt6_info *rr_head,
- u32 metric, int oif, int strict)
+ u32 metric, int oif, int flags, int reachable)
{
struct rt6_info *rt, *match;
int mpri = -1;
+ int strict = 0;
+
+ strict |= flags & RT6_LOOKUP_F_IFACE;
match = NULL;
- for (rt = rr_head; rt && rt->rt6i_metric == metric;
- rt = rt->u.dst.rt6_next)
+ for (rt = rr_head; rt && ((flags & RT6_LOOKUP_F_IP6TUNNEL) || rt->rt6i_metric == metric);
+ rt = rt->u.dst.rt6_next) {
match = find_match(rt, oif, strict, &mpri, match);
- for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
- rt = rt->u.dst.rt6_next)
+ }
+ for (rt = fn->leaf; rt && rt != rr_head && ((flags & RT6_LOOKUP_F_IP6TUNNEL) || rt->rt6i_metric == metric);
+ rt = rt->u.dst.rt6_next) {
match = find_match(rt, oif, strict, &mpri, match);
+ }
return match;
}
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int flags, int reachable)
{
struct rt6_info *match, *rt0;
struct net *net;
RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
@@ -426,14 +431,13 @@
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
-
- if (!match &&
- (strict & RT6_LOOKUP_F_REACHABLE)) {
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, flags, reachable);
+
+ if (!match && reachable) {
struct rt6_info *next = rt0->u.dst.rt6_next;
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
next = fn->leaf;
@@ -703,25 +707,22 @@
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
- int strict = 0;
int attempts = 3;
int err;
int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
- strict |= flags & RT6_LOOKUP_F_IFACE;
-
relookup:
read_lock_bh(&table->tb6_lock);
restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(fn, oif, strict | reachable);
+ rt = rt6_select(fn, oif, flags, reachable);
BACKTRACK(net, &fl->fl6_src);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
@@ -768,11 +769,15 @@
read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
- return rt;
+ if (flags & RT6_LOOKUP_F_IP6TUNNEL) {
+ printk(KERN_INFO "*** %s: %s\n", __FUNCTION__, rt->rt6i_dev->name);
+ }
+
+ return rt;
}
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi *fl, int flags)
{
@@ -808,14 +813,12 @@
{
return ip6_pol_route(net, table, fl->oif, fl, flags);
}
struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
- struct flowi *fl)
-{
- int flags = 0;
-
+ struct flowi *fl, int flags)
+{
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
flags |= RT6_LOOKUP_F_IFACE;
if (!ipv6_addr_any(&fl->fl6_src))
flags |= RT6_LOOKUP_F_HAS_SADDR;
@@ -2381,11 +2384,11 @@
through good chunk of routing engine.
*/
skb_reset_mac_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
- rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
+ rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl, 0);
skb_dst_set(skb, &rt->u.dst);
err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, 0, 0);
[-- Attachment #3: ip6ip6-metric-stuff.patch --]
[-- Type: text/plain, Size: 8462 bytes --]
Index: /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_xmit.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_xmit.c (revision 269)
+++ /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_xmit.c (revision 384)
@@ -139,11 +139,11 @@
},
},
};
rt = (struct rt6_info *)ip6_route_output(&init_net,
- NULL, &fl);
+ NULL, &fl, 0);
if (!rt) {
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
&dest->addr.in6);
return NULL;
@@ -165,11 +165,11 @@
},
},
},
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl, 0);
if (!rt) {
IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
&cp->daddr.in6);
return NULL;
}
@@ -299,11 +299,11 @@
.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
};
EnterFunction(10);
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl, 0);
if (!rt) {
IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
__func__, &iph->daddr);
goto tx_error_icmp;
}
Index: /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_ctl.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_ctl.c (revision 269)
+++ /trunk/kernel/linux-2.6.35.14/net/netfilter/ipvs/ip_vs_ctl.c (revision 384)
@@ -110,11 +110,11 @@
.ip6_u = {
.daddr = *addr,
.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl, 0);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
return 0;
}
Index: /trunk/kernel/linux-2.6.35.14/net/netfilter/xt_TEE.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/netfilter/xt_TEE.c (revision 269)
+++ /trunk/kernel/linux-2.6.35.14/net/netfilter/xt_TEE.c (revision 384)
@@ -151,11 +151,11 @@
fl.oif = info->priv->oif;
}
fl.nl_u.ip6_u.daddr = info->gw.in6;
fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl, 0);
if (dst == NULL)
return false;
skb_dst_drop(skb);
skb_dst_set(skb, dst);
Index: /trunk/kernel/linux-2.6.35.14/net/sctp/ipv6.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/sctp/ipv6.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/sctp/ipv6.c (revision 384)
@@ -256,11 +256,11 @@
if (saddr) {
ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src);
}
- dst = ip6_route_output(&init_net, NULL, &fl);
+ dst = ip6_route_output(&init_net, NULL, &fl, 0);
if (!dst->error) {
struct rt6_info *rt;
rt = (struct rt6_info *)dst;
SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n",
&rt->rt6i_dst.addr, &rt->rt6i_src.addr);
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/ndisc.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/ndisc.c (revision 379)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/ndisc.c (revision 384)
@@ -1531,11 +1531,11 @@
}
icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
&saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl, 0);
if (dst == NULL)
return;
err = xfrm_lookup(net, &dst, &fl, NULL, 0);
if (err)
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter/ip6t_REJECT.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter/ip6t_REJECT.c (revision 379)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter/ip6t_REJECT.c (revision 384)
@@ -96,11 +96,11 @@
ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
fl.fl_ip_sport = otcph.dest;
fl.fl_ip_dport = otcph.source;
security_skb_classify_flow(oldskb, &fl);
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl, 0);
if (dst == NULL || dst->error) {
dst_release(dst);
return;
}
if (xfrm_lookup(net, &dst, &fl, NULL, 0))
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_output.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_output.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6_output.c (revision 384)
@@ -927,11 +927,11 @@
{
int err;
struct net *net = sock_net(sk);
if (*dst == NULL)
- *dst = ip6_route_output(net, sk, fl);
+ *dst = ip6_route_output(net, sk, fl, 0);
if ((err = (*dst)->error))
goto out_err_release;
if (ipv6_addr_any(&fl->fl6_src)) {
@@ -970,11 +970,11 @@
* default router instead
*/
dst_release(*dst);
memcpy(&fl_gw, fl, sizeof(struct flowi));
memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
- *dst = ip6_route_output(net, sk, &fl_gw);
+ *dst = ip6_route_output(net, sk, &fl_gw, 0);
if ((err = (*dst)->error))
goto out_err_release;
}
}
#endif
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/xfrm6_policy.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/xfrm6_policy.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/xfrm6_policy.c (revision 384)
@@ -36,11 +36,11 @@
memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst));
if (saddr)
memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl, 0);
err = dst->error;
if (dst->error) {
dst_release(dst);
dst = ERR_PTR(err);
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/netfilter.c (revision 384)
@@ -22,11 +22,11 @@
{ .ip6_u =
{ .daddr = iph->daddr,
.saddr = iph->saddr, } },
};
- dst = ip6_route_output(net, skb->sk, &fl);
+ dst = ip6_route_output(net, skb->sk, &fl, 0);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
return -EINVAL;
@@ -91,11 +91,11 @@
return 0;
}
static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
{
- *dst = ip6_route_output(&init_net, NULL, fl);
+ *dst = ip6_route_output(&init_net, NULL, fl, 0);
return (*dst)->error;
}
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6mr.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6mr.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/ip6mr.c (revision 384)
@@ -1845,11 +1845,11 @@
.nl_u = { .ip6_u =
{ .daddr = ipv6h->daddr, }
}
};
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl, 0);
if (!dst)
goto out_free;
skb_dst_drop(skb);
skb_dst_set(skb, dst);
Index: /trunk/kernel/linux-2.6.35.14/net/ipv6/icmp.c
===================================================================
--- /trunk/kernel/linux-2.6.35.14/net/ipv6/icmp.c (revision 68)
+++ /trunk/kernel/linux-2.6.35.14/net/ipv6/icmp.c (revision 384)
@@ -175,11 +175,11 @@
/*
* Look up the output route.
* XXX: perhaps the expire for routing entries cloned by
* this lookup should be more aggressive (not longer than timeout).
*/
- dst = ip6_route_output(net, sk, fl);
+ dst = ip6_route_output(net, sk, fl, 0);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = 1;
[-- Attachment #4: test-ip6-tnls.sh --]
[-- Type: application/x-sh, Size: 5195 bytes --]
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2012-02-22 9:25 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-22 9:25 ip6ip6 tunnel routing issue Takács András
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox