[ovs-dev] [PATCH v4] userspace: Enable non-bridge port as tunnel endpoint.

Yifeng Sun pkusunyifeng at gmail.com
Thu Sep 5 17:40:28 UTC 2019


For userspace datapath, currently only the bridge itself, the LOCAL port,
can be the tunnel endpoint to encap/decap tunnel packets.  This patch
enables non-bridge port as tunnel endpoint.  One use case is for users to
create a bridge and a vtep port as tap, and configure underlay IP at vtep
port as the tunnel endpoint.

This patch causes failure for test "ptap - L3 over patch port". This is
because this test is already using non-bridge port gre1 as tunnel endpoint.
In this test, a flow is added to redirect tunnel packets to gre1 port,
as shown below:
  ovs-ofctl add-flow br1 in_port=p1,actions=output=gre1

It later generates a datapath flow which matches an extra eth field:
  - recirc_id(0),...,eth_type(0x0800),...
  + recirc_id(0),...,eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),...

With this patch, this flow need only a NORMAL action.

Signed-off-by: William Tu <u9012063 at gmail.com>
Co-authored-by: William Tu <u9012063 at gmail.com>
Signed-off-by: Yifeng Sun <pkusunyifeng at gmail.com>
---
v1->v2: Fixed an error pointed out by Ben.
v2->v3: Fixed a test failure, thanks Ben for review and testing!
v3->v4: When creating v3, a code rebase lead to test number change and
we tested the wrong test. Thanks Ben for catching it. In addition to
this fix, this version also added system test, as suggested by Darrell.
 ofproto/ofproto-dpif-xlate.c   | 56 +++++++++++++++++++++++++++++++++---------
 tests/packet-type-aware.at     |  8 +++---
 tests/system-layer3-tunnels.at | 55 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 17800f3c8a3f..9c2f44784161 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3410,6 +3410,19 @@ tnl_route_lookup_flow(const struct xlate_ctx *ctx,
             }
         }
     }
+
+    /* If tunnel IP isn't configured on bridges, then we search all ports. */
+    HMAP_FOR_EACH (xbridge, hmap_node, &ctx->xcfg->xbridges) {
+        struct xport *port;
+
+        HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
+            if (!strncmp(netdev_get_name(port->netdev),
+                         out_dev, IFNAMSIZ)) {
+                *out_port = port;
+                return 0;
+            }
+        }
+    }
     return -ENOENT;
 }
 
@@ -3972,6 +3985,16 @@ is_nd_dst_correct(const struct flow *flow, const struct in6_addr *ipv6_addr)
             IN6_ARE_ADDR_EQUAL(&flow->ipv6_dst, ipv6_addr);
 }
 
+static bool
+is_neighbor_reply_matched(const struct flow *flow, struct in6_addr *ip_addr)
+{
+    return ((IN6_IS_ADDR_V4MAPPED(ip_addr) &&
+             flow->dl_type == htons(ETH_TYPE_ARP) &&
+             in6_addr_get_mapped_ipv4(ip_addr) == flow->nw_dst) ||
+            (!IN6_IS_ADDR_V4MAPPED(ip_addr) &&
+              is_nd_dst_correct(flow, ip_addr)));
+}
+
 /* Function verifies if the ARP reply or Neighbor Advertisement represented by
  * 'flow' addresses the 'xbridge' of 'ctx'. Returns true if the ARP TA or
  * neighbor discovery destination is in the list of configured IP addresses of
@@ -3986,11 +4009,7 @@ is_neighbor_reply_correct(const struct xlate_ctx *ctx, const struct flow *flow)
     /* Verify if 'nw_dst' of ARP or 'ipv6_dst' of ICMPV6 is in the list. */
     for (i = 0; xbridge_addr && i < xbridge_addr->n_addr; i++) {
         struct in6_addr *ip_addr = &xbridge_addr->addr[i];
-        if ((IN6_IS_ADDR_V4MAPPED(ip_addr) &&
-             flow->dl_type == htons(ETH_TYPE_ARP) &&
-             in6_addr_get_mapped_ipv4(ip_addr) == flow->nw_dst) ||
-            (!IN6_IS_ADDR_V4MAPPED(ip_addr) &&
-              is_nd_dst_correct(flow, ip_addr))) {
+        if (is_neighbor_reply_matched(flow, ip_addr)) {
             /* Found a match. */
             ret = true;
             break;
@@ -3998,20 +4017,35 @@ is_neighbor_reply_correct(const struct xlate_ctx *ctx, const struct flow *flow)
     }
 
     xbridge_addr_unref(xbridge_addr);
+
+    /* If not found in bridge's IPs, search in its ports. */
+    if (!ret) {
+        struct in6_addr *ip_addr, *mask;
+        struct xport *port;
+        int error, n_in6;
+
+        HMAP_FOR_EACH (port, ofp_node, &ctx->xbridge->xports) {
+            error = netdev_get_addr_list(port->netdev, &ip_addr,
+                                         &mask, &n_in6);
+            if (!error && is_neighbor_reply_matched(flow, ip_addr)) {
+                /* Found a match. */
+                ret = true;
+                break;
+            }
+        }
+    }
     return ret;
 }
 
 static bool
-terminate_native_tunnel(struct xlate_ctx *ctx, ofp_port_t ofp_port,
-                        struct flow *flow, struct flow_wildcards *wc,
-                        odp_port_t *tnl_port)
+terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow,
+                        struct flow_wildcards *wc, odp_port_t *tnl_port)
 {
     *tnl_port = ODPP_NONE;
 
     /* XXX: Write better Filter for tunnel port. We can use in_port
      * in tunnel-port flow to avoid these checks completely. */
-    if (ofp_port == OFPP_LOCAL &&
-        ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+    if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
         *tnl_port = tnl_port_map_lookup(flow, wc);
 
         /* If no tunnel port was found and it's about an ARP or ICMPv6 packet,
@@ -4155,7 +4189,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
             native_tunnel_output(ctx, xport, flow, odp_port, truncate);
             flow->tunnel = flow_tnl; /* Restore tunnel metadata */
 
-        } else if (terminate_native_tunnel(ctx, ofp_port, flow, wc,
+        } else if (terminate_native_tunnel(ctx, flow, wc,
                                            &odp_tnl_port)) {
             /* Intercept packet to be received on native tunnel port. */
             nl_msg_put_odp_port(ctx->odp_actions, OVS_ACTION_ATTR_TUNNEL_POP,
diff --git a/tests/packet-type-aware.at b/tests/packet-type-aware.at
index e169709a906b..fb867860f369 100644
--- a/tests/packet-type-aware.at
+++ b/tests/packet-type-aware.at
@@ -697,7 +697,7 @@ AT_CHECK([
     ovs-ofctl del-flows br1 &&
     ovs-ofctl del-flows br2 &&
     ovs-ofctl add-flow br0 in_port=n0,actions=decap,output=p0 -OOpenFlow13 &&
-    ovs-ofctl add-flow br1 in_port=p1,actions=output=gre1 &&
+    ovs-ofctl add-flow br1 in_port=p1,actions=NORMAL &&
     ovs-ofctl add-flow br2 in_port=LOCAL,actions=output=n2
 ], [0])
 
@@ -708,7 +708,7 @@ AT_CHECK([ovs-ofctl -OOpenFlow13 dump-flows br0 | ofctl_strip | grep actions],
 
 AT_CHECK([ovs-ofctl -OOpenFlow13 dump-flows br1 | ofctl_strip | grep actions],
 [0], [dnl
- reset_counts in_port=20 actions=output:100
+ reset_counts in_port=20 actions=NORMAL
 ])
 
 AT_CHECK([ovs-ofctl -OOpenFlow13 dump-flows br2 | ofctl_strip | grep actions],
@@ -726,7 +726,7 @@ ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows --names dummy at ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(n0),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(tos=0/0x3,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,clone(tnl_push(tnl_port(gre_sys),header(size=38,type=3,eth(dst=de:af:be:ef:ba:be,src=aa:55:00:00:00:02,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x800))),out_port(br2)),n2)
+recirc_id(0),in_port(n0),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(tos=0/0x3,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,clone(tnl_push(tnl_port(gre_sys),header(size=38,type=3,eth(dst=de:af:be:ef:ba:be,src=aa:55:00:00:00:02,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x800))),out_port(br2)),n2)
 ])
 
 AT_CHECK([
@@ -892,7 +892,7 @@ ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows --names dummy at ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(n0),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(tos=0/0x3,frag=no), packets:1, bytes:98, used:0.0s, actions:n1,pop_eth,clone(tnl_push(tnl_port(gre_sys),header(size=38,type=3,eth(dst=de:af:be:ef:ba:be,src=aa:55:00:00:00:02,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x800))),out_port(br2)),n2)
+recirc_id(0),in_port(n0),packet_type(ns=0,id=0),eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(tos=0/0x3,frag=no), packets:1, bytes:98, used:0.0s, actions:n1,pop_eth,clone(tnl_push(tnl_port(gre_sys),header(size=38,type=3,eth(dst=de:af:be:ef:ba:be,src=aa:55:00:00:00:02,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x800))),out_port(br2)),n2)
 ])
 
 AT_CHECK([
diff --git a/tests/system-layer3-tunnels.at b/tests/system-layer3-tunnels.at
index db64e231da8b..1232964bb67c 100644
--- a/tests/system-layer3-tunnels.at
+++ b/tests/system-layer3-tunnels.at
@@ -97,3 +97,58 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING
 
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([layer3 - use non-local port as tunnel endpoint])
+
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1])
+AT_CHECK([ovs-vsctl add-port br0 vtep0 -- set int vtep0 type=dummy], [0])
+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy], [0])
+AT_CHECK([ovs-vsctl add-port int-br t1 -- set Interface t1 type=gre \
+                    options:remote_ip=1.1.2.92 ofport_request=3], [0])
+
+AT_CHECK([ovs-appctl dpif/show], [0], [dnl
+dummy at ovs-dummy: hit:0 missed:0
+  br0:
+    br0 65534/100: (dummy-internal)
+    p0 1/1: (dummy)
+    vtep0 2/2: (dummy)
+  int-br:
+    int-br 65534/3: (dummy-internal)
+    t1 3/4: (gre: remote_ip=1.1.2.92)
+])
+
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr vtep0 1.1.2.88/24], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 vtep0], [0], [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow int-br action=normal])
+
+dnl Use arp request and reply to achieve tunnel next hop mac binding
+dnl By default, vtep0's MAC address is aa:55:aa:55:00:03
+AT_CHECK([ovs-appctl netdev-dummy/receive vtep0 'recirc_id(0),in_port(2),eth(dst=ff:ff:ff:ff:ff:ff,src=aa:55:aa:55:00:03),eth_type(0x0806),arp(tip=1.1.2.92,sip=1.1.2.88,op=1,sha=aa:55:aa:55:00:03,tha=00:00:00:00:00:00)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'recirc_id(0),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:03),eth_type(0x0806),arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=aa:55:aa:55:00:03)'])
+
+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+1.1.2.92                                      f8:bc:12:44:34:b6   br0
+])
+
+AT_CHECK([ovs-appctl ovs/route/show | tail -n+2 | sort], [0], [dnl
+User: 1.1.2.0/24 dev vtep0 SRC 1.1.2.88
+])
+
+dnl Check GRE tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:03),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: tnl_pop(4)
+])
+
+dnl Check GRE tunnel push
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(dst=f9:bc:12:44:34:b6,src=af:55:aa:55:00:03),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.92,proto=1,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: clone(tnl_push(tnl_port(4),header(size=38,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:03,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x6558))),out_port(2)),1)
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
-- 
2.7.4



More information about the dev mailing list