net: Enable support for VRF with ipv4 multicast
authorDavid Ahern <dsa@cumulusnetworks.com>
Mon, 31 Oct 2016 22:54:00 +0000 (15:54 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 1 Nov 2016 15:54:26 +0000 (11:54 -0400)
Enable support for IPv4 multicast:
- similar to unicast the flow struct is updated to L3 master device
  if relevant prior to calling fib_rules_lookup. The table id is saved
  to the lookup arg so the rule action for ipmr can return the table
  associated with the device.

- ip_mr_forward needs to check for master device mismatch as well
  since the skb->dev is set to it

- allow multicast address on VRF device for Rx by checking for the
  daddr in the VRF device as well as the original ingress device

- on Tx need to drop to __mkroute_output when FIB lookup fails for
  multicast destination address.

- if CONFIG_IP_MROUTE_MULTIPLE_TABLES is enabled VRF driver creates
  IPMR FIB rules on first device create similar to FIB rules. In
  addition the VRF driver does not divert IPv4 multicast packets:
  it breaks on Tx since the fib lookup fails on the mcast address.

With this patch, ipmr forwarding and local rx/tx work.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/vrf.c
net/ipv4/ipmr.c
net/ipv4/route.c

index 820de6a9ddde1dcfa8ee389bc20e56ae61afa1b9..3bca24651dc0a1c5121348a21d7431d8384d2024 100644 (file)
@@ -272,11 +272,6 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
        if (IS_ERR(rt))
                goto err;
 
-       if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-               ip_rt_put(rt);
-               goto err;
-       }
-
        skb_dst_drop(skb);
 
        /* if dst.dev is loopback or the VRF device again this is locally
@@ -611,6 +606,10 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
        struct dst_entry *dst = NULL;
        struct rtable *rth;
 
+       /* don't divert multicast */
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               return skb;
+
        rcu_read_lock();
 
        rth = rcu_dereference(vrf->rth);
@@ -999,6 +998,9 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
        skb->skb_iif = vrf_dev->ifindex;
        IPCB(skb)->flags |= IPSKB_L3SLAVE;
 
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               goto out;
+
        /* loopback traffic; do not push through packet taps again.
         * Reset pkt_type for upper layers to process skb
         */
@@ -1162,8 +1164,19 @@ static int vrf_add_fib_rules(const struct net_device *dev)
        if (err < 0)
                goto ipv6_err;
 
+#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
+       err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true);
+       if (err < 0)
+               goto ipmr_err;
+#endif
+
        return 0;
 
+#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
+ipmr_err:
+       vrf_fib_rule(dev, AF_INET6,  false);
+#endif
+
 ipv6_err:
        vrf_fib_rule(dev, AF_INET,  false);
 
index 51d71a70fbbeb24477b7c890fc5495de7a3b1961..f2fd13b072734c2330b01ed663f3d785b0c35332 100644 (file)
@@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
                .flags = FIB_LOOKUP_NOREF,
        };
 
+       /* update flow if oif or iif point to device enslaved to l3mdev */
+       l3mdev_update_flow(net, flowi4_to_flowi(flp4));
+
        err = fib_rules_lookup(net->ipv4.mr_rules_ops,
                               flowi4_to_flowi(flp4), 0, &arg);
        if (err < 0)
@@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
                return -EINVAL;
        }
 
-       mrt = ipmr_get_table(rule->fr_net, rule->table);
+       arg->table = fib_rule_get_table(rule, arg);
+
+       mrt = ipmr_get_table(rule->fr_net, arg->table);
        if (!mrt)
                return -EAGAIN;
        res->mrt = mrt;
@@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 
        /* Wrong interface: drop packet and (maybe) send PIM assert. */
        if (mrt->vif_table[vif].dev != skb->dev) {
+               struct net_device *mdev;
+
+               mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
+               if (mdev == skb->dev)
+                       goto forward;
+
                if (rt_is_output_route(skb_rtable(skb))) {
                        /* It is our own packet, looped back.
                         * Very complicated situation...
index 62d4d90c1389c4ea7da37c81779b2f55207d2a92..4392db83d54061beb7fc4cb39997bd6d8b0a3483 100644 (file)
@@ -1980,25 +1980,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         */
        if (ipv4_is_multicast(daddr)) {
                struct in_device *in_dev = __in_dev_get_rcu(dev);
+               int our = 0;
 
-               if (in_dev) {
-                       int our = ip_check_mc_rcu(in_dev, daddr, saddr,
-                                                 ip_hdr(skb)->protocol);
-                       if (our
+               if (in_dev)
+                       our = ip_check_mc_rcu(in_dev, daddr, saddr,
+                                             ip_hdr(skb)->protocol);
+
+               /* check l3 master if no match yet */
+               if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+                       struct in_device *l3_in_dev;
+
+                       l3_in_dev = __in_dev_get_rcu(skb->dev);
+                       if (l3_in_dev)
+                               our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
+                                                     ip_hdr(skb)->protocol);
+               }
+
+               res = -EINVAL;
+               if (our
 #ifdef CONFIG_IP_MROUTE
-                               ||
-                           (!ipv4_is_local_multicast(daddr) &&
-                            IN_DEV_MFORWARD(in_dev))
+                       ||
+                   (!ipv4_is_local_multicast(daddr) &&
+                    IN_DEV_MFORWARD(in_dev))
 #endif
-                          ) {
-                               int res = ip_route_input_mc(skb, daddr, saddr,
-                                                           tos, dev, our);
-                               rcu_read_unlock();
-                               return res;
-                       }
+                  ) {
+                       res = ip_route_input_mc(skb, daddr, saddr,
+                                               tos, dev, our);
                }
                rcu_read_unlock();
-               return -EINVAL;
+               return res;
        }
        res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
        rcu_read_unlock();
@@ -2266,7 +2276,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                res.fi = NULL;
                res.table = NULL;
                if (fl4->flowi4_oif &&
-                   !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+                   (ipv4_is_multicast(fl4->daddr) ||
+                   !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
                        /* Apparently, routing tables are wrong. Assume,
                           that the destination is on link.