[ovs-dev] [PATCH ovn] Fix the routing for external logical ports of bridged logical switches.

numans at ovn.org numans at ovn.org
Tue Jun 16 17:55:39 UTC 2020


From: Numan Siddique <numans at ovn.org>

Routing for external logical ports is broken if these ports belonged
to bridged logical switches (with localnet port) and 'ovn-chassis-mac-mappings'
is configured. External logical ports are those which are external to OVN,
but there is a logical port for it and it is claimed by one of the HA chassis.
The claimed chassis provides routing and other native OVN serices like dhcp and dns.

When the external port sends ARP request for the router IP, the claimed chassis
replies for the ARP request, but the arp.sha is set to the actual router mac instead
of the chassis mac. This causes the traffic from external port VM/container to be handled
incorrectly. A ping to the router ip, is replied by all the chassis which can see this
packet instead of just the claimed HA chassis.

To fix this, this patch does 2 things.

1. In the table - OFTABLE_LOG_TO_PHY (65), it adds a 160 priority flow to
   modify the ARP packets arp.sha to store the chassis mac.

2. And when the packet destined to the chassis mac is received, it replaces the
   chassis mac with the actual router mac in table 0.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1829762
Reported-by: Daniel Alvarez <dalvarez at redhat.com>
CC: Ankur Sharma <ankur.sharma at nutanix.com>
Signed-off-by: Numan Siddique <numans at ovn.org>
---
 controller/chassis.c  |  48 ++++++++------
 controller/chassis.h  |   2 +
 controller/physical.c | 145 +++++++++++++++++++++++++++++++++++++++---
 tests/ovn.at          | 131 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 299 insertions(+), 27 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index d619361c9..edd5e57f0 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -625,10 +625,11 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 }
 
 bool
-chassis_get_mac(const struct sbrec_chassis *chassis_rec,
-                const char *bridge_mapping,
-                struct eth_addr *chassis_mac)
+chassis_get_mac_mappings(const struct sbrec_chassis *chassis_rec,
+                         struct smap *chassis_mappings)
 {
+    smap_init(chassis_mappings);
+
     const char *tokens
         = get_chassis_mac_mappings(&chassis_rec->other_config);
     if (!tokens[0]) {
@@ -636,7 +637,6 @@ chassis_get_mac(const struct sbrec_chassis *chassis_rec,
     }
 
     char *save_ptr = NULL;
-    bool ret = false;
     char *tokstr = xstrdup(tokens);
 
     /* Format for a chassis mac configuration is:
@@ -649,24 +649,36 @@ chassis_get_mac(const struct sbrec_chassis *chassis_rec,
         char *chassis_mac_bridge = strtok_r(token, ":", &save_ptr2);
         char *chassis_mac_str = strtok_r(NULL, "", &save_ptr2);
 
-        if (!strcmp(chassis_mac_bridge, bridge_mapping)) {
-            struct eth_addr temp_mac;
+        smap_replace(chassis_mappings, chassis_mac_bridge, chassis_mac_str);
+    }
 
-            /* Return the first chassis mac. */
-            char *err_str = str_to_mac(chassis_mac_str, &temp_mac);
-            if (err_str) {
-                free(err_str);
-                continue;
-            }
+    free(tokstr);
+    return true;
+}
 
-            ret = true;
-            *chassis_mac = temp_mac;
-            break;
-        }
+bool
+chassis_get_mac(const struct sbrec_chassis *chassis_rec,
+                const char *bridge_mapping,
+                struct eth_addr *chassis_mac)
+{
+    struct smap chassis_mappings;
+
+    if (!chassis_get_mac_mappings(chassis_rec, &chassis_mappings)) {
+        return false;
     }
 
-    free(tokstr);
-    return ret;
+    const char *chassis_mac_str = smap_get_def(&chassis_mappings,
+                                               bridge_mapping, "");
+    struct eth_addr temp_mac;
+
+    char *err_str = str_to_mac(chassis_mac_str, &temp_mac);
+    if (err_str) {
+        free(err_str);
+        return false;
+    }
+
+    *chassis_mac = temp_mac;
+    return true;
 }
 
 /* Returns true if the database is all cleaned up, false if more work is
diff --git a/controller/chassis.h b/controller/chassis.h
index 178d2957e..dae761312 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -42,6 +42,8 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
 bool chassis_get_mac(const struct sbrec_chassis *chassis,
                      const char *bridge_mapping,
                      struct eth_addr *chassis_mac);
+bool chassis_get_mac_mappings(const struct sbrec_chassis *,
+                              struct smap *chassis_mappings);
 const char *chassis_get_id(void);
 const char * get_chassis_mac_mappings(const struct smap *ext_ids);
 
diff --git a/controller/physical.c b/controller/physical.c
index f06313b9d..6a3936d4c 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -62,7 +62,8 @@ load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
 /* UUID to identify OF flows not associated with ovsdb rows. */
 static struct uuid *hc_uuid = NULL;
 
-#define CHASSIS_MAC_TO_ROUTER_MAC_CONJID        100
+#define CHASSIS_MAC_TO_ROUTER_SRC_MAC_CONJID        100
+#define CHASSIS_MAC_TO_ROUTER_DST_MAC_CONJID        101
 
 void
 physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
@@ -148,6 +149,18 @@ put_move(enum mf_field_id src, int src_ofs,
     move->dst.n_bits = n_bits;
 }
 
+static void
+put_value(const uint8_t *data, size_t len,
+          enum mf_field_id dst, int ofs, int n_bits,
+          struct ofpbuf *ofpacts)
+{
+    struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
+                                                       mf_from_id(dst), NULL,
+                                                       NULL);
+    bitwise_copy(data, len, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
+    bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
+}
+
 static void
 put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
 {
@@ -494,11 +507,10 @@ put_chassis_mac_conj_id_flow(const struct sbrec_chassis_table *chassis_table,
         ofpbuf_clear(ofpacts_p);
         match_init_catchall(&match);
 
-
         match_set_dl_src(&match, chassis_mac);
 
         conj = ofpact_put_CONJUNCTION(ofpacts_p);
-        conj->id = CHASSIS_MAC_TO_ROUTER_MAC_CONJID;
+        conj->id = CHASSIS_MAC_TO_ROUTER_SRC_MAC_CONJID;
         conj->n_clauses = 2;
         conj->clause = 0;
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180,
@@ -507,6 +519,51 @@ put_chassis_mac_conj_id_flow(const struct sbrec_chassis_table *chassis_table,
     }
 
     free_remote_chassis_macs();
+
+    /* We need to replace the packet destined to the chassis mac (eth.dst)
+     * with the router mac. This is required to support external ports.
+     * These ports don't see the router mac at all since we send the
+     * chassis MAC in the ARP reply for any ARP requests to the router IPs.
+     * Without these flows, the packets will not enter the router pipeline
+     * if they need to be routed.
+     * Please see put_replace_chassis_mac_flows() for the 2nd clause of
+     * conj id - CHASSIS_MAC_TO_ROUTER_DST_MAC_CONJID.
+     * */
+    struct smap chassis_mac_mappings = SMAP_INITIALIZER(&chassis_mac_mappings);
+    if (chassis_get_mac_mappings(chassis, &chassis_mac_mappings)) {
+        struct smap_node *node;
+        struct sset macs = SSET_INITIALIZER(&macs);
+        SMAP_FOR_EACH (node, &chassis_mac_mappings) {
+            struct eth_addr chassis_mac;
+
+            char *err_str = str_to_mac(node->value, &chassis_mac);
+            if (err_str) {
+                free(err_str);
+                continue;
+            }
+
+            if (!sset_add(&macs, node->value)) {
+                /* The OF flow for the mac is already added. */
+                continue;
+            }
+
+            ofpbuf_clear(ofpacts_p);
+            match_init_catchall(&match);
+
+            match_set_dl_dst(&match, chassis_mac);
+
+            struct ofpact_conjunction *conj;
+            conj = ofpact_put_CONJUNCTION(ofpacts_p);
+            conj->id = CHASSIS_MAC_TO_ROUTER_DST_MAC_CONJID;
+            conj->n_clauses = 2;
+            conj->clause = 0;
+            ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180,
+                            0, &match, ofpacts_p, hc_uuid);
+        }
+        sset_destroy(&macs);
+    }
+
+    smap_destroy(&chassis_mac_mappings);
 }
 
 static void
@@ -555,7 +612,7 @@ put_replace_chassis_mac_flows(const struct simap *ct_zones,
 
         /* Match on ingress port, vlan_id and conjunction id */
         match_set_in_port(&match, ofport);
-        match_set_conj_id(&match, CHASSIS_MAC_TO_ROUTER_MAC_CONJID);
+        match_set_conj_id(&match, CHASSIS_MAC_TO_ROUTER_SRC_MAC_CONJID);
 
         if (tag) {
             match_set_dl_vlan(&match, htons(tag), 0);
@@ -572,6 +629,37 @@ put_replace_chassis_mac_flows(const struct simap *ct_zones,
         replace_mac = ofpact_put_SET_ETH_SRC(ofpacts_p);
         replace_mac->mac = router_port_mac;
 
+        /* Resubmit to first logical ingress pipeline table. */
+        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
+        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180,
+                        rport_binding->header_.uuid.parts[0],
+                        &match, ofpacts_p, hc_uuid);
+
+        ofpbuf_clear(ofpacts_p);
+        match_init_catchall(&match);
+
+        /* Add flow, which will match on conjunction id and will
+         * replace destination mac with router port mac */
+
+        /* Match on ingress port, vlan_id and conjunction id */
+        match_set_in_port(&match, ofport);
+        match_set_conj_id(&match, CHASSIS_MAC_TO_ROUTER_DST_MAC_CONJID);
+
+        if (tag) {
+            match_set_dl_vlan(&match, htons(tag), 0);
+        } else {
+            match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
+        }
+
+        /* Actions */
+
+        if (tag) {
+            ofpact_put_STRIP_VLAN(ofpacts_p);
+        }
+        load_logical_ingress_metadata(localnet_port, &zone_ids, ofpacts_p);
+        replace_mac = ofpact_put_SET_ETH_DST(ofpacts_p);
+        replace_mac->mac = router_port_mac;
+
         /* Resubmit to first logical ingress pipeline table. */
         put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180,
@@ -579,7 +667,7 @@ put_replace_chassis_mac_flows(const struct simap *ct_zones,
                         &match, ofpacts_p, hc_uuid);
 
         /* Provide second search criteria, i.e localnet port's
-         * vlan ID for conjunction flow */
+         * vlan ID for conjunction flows. */
         struct ofpact_conjunction *conj;
         ofpbuf_clear(ofpacts_p);
         match_init_catchall(&match);
@@ -591,12 +679,19 @@ put_replace_chassis_mac_flows(const struct simap *ct_zones,
         }
 
         conj = ofpact_put_CONJUNCTION(ofpacts_p);
-        conj->id = CHASSIS_MAC_TO_ROUTER_MAC_CONJID;
+        conj->id = CHASSIS_MAC_TO_ROUTER_SRC_MAC_CONJID;
+        conj->n_clauses = 2;
+        conj->clause = 1;
+
+        conj = ofpact_put_CONJUNCTION(ofpacts_p);
+        conj->id = CHASSIS_MAC_TO_ROUTER_DST_MAC_CONJID;
         conj->n_clauses = 2;
         conj->clause = 1;
+
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180,
                         rport_binding->header_.uuid.parts[0],
                         &match, ofpacts_p, hc_uuid);
+
     }
 }
 
@@ -665,9 +760,6 @@ put_replace_router_port_mac_flows(struct ovsdb_idl_index
          * a. Flow replaces ingress router port mac with a chassis mac.
          * b. Flow appends the vlan id localnet port is configured with.
          */
-        match_init_catchall(&match);
-        ofpbuf_clear(ofpacts_p);
-
         ovs_assert(rport_binding->n_mac == 1);
         char *err_str = str_to_mac(rport_binding->mac[0], &router_port_mac);
         if (err_str) {
@@ -679,6 +771,9 @@ put_replace_router_port_mac_flows(struct ovsdb_idl_index
         }
 
         /* Replace Router mac flow */
+        match_init_catchall(&match);
+        ofpbuf_clear(ofpacts_p);
+
         match_set_metadata(&match, htonll(dp_key));
         match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
         match_set_dl_src(&match, router_port_mac);
@@ -698,6 +793,38 @@ put_replace_router_port_mac_flows(struct ovsdb_idl_index
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 150,
                         localnet_port->header_.uuid.parts[0],
                         &match, ofpacts_p, &localnet_port->header_.uuid);
+
+        /* Replace Router mac in the ARP packets (arp.sha) to the chassis MAC.
+         * This is very important and required for external logical ports and
+         * when these ports send ARP for their router IPs, the chassis mac
+         * should be sent which has claimed these external ports. */
+        match_init_catchall(&match);
+        ofpbuf_clear(ofpacts_p);
+
+        match_set_metadata(&match, htonll(dp_key));
+        match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
+        match_set_dl_src(&match, router_port_mac);
+        match_set_dl_type(&match, htons(ETH_TYPE_ARP));
+        match_set_arp_sha(&match, router_port_mac);
+
+        replace_mac = ofpact_put_SET_ETH_SRC(ofpacts_p);
+        replace_mac->mac = chassis_mac;
+
+        if (tag) {
+            struct ofpact_vlan_vid *vlan_vid;
+            vlan_vid = ofpact_put_SET_VLAN_VID(ofpacts_p);
+            vlan_vid->vlan_vid = tag;
+            vlan_vid->push_vlan_if_needed = true;
+        }
+
+        put_value(chassis_mac.ea, sizeof chassis_mac.ea, MFF_ARP_SHA,
+                  0, 48, ofpacts_p);
+
+        ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+
+        ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 160,
+                        localnet_port->header_.uuid.parts[0],
+                        &match, ofpacts_p, &localnet_port->header_.uuid);
     }
 }
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 7e1ace556..6e8815dbc 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -14474,6 +14474,137 @@ AT_CHECK([cat ext1_v6.packets | cut -c -120], [0], [expout])
 cat ext1_v6.expected | cut -c 125- > expout
 AT_CHECK([cat ext1_v6.packets | cut -c 125-], [0], [expout])
 
+# Configure ovn-chassis-mac-mappings on all the hypervisors.
+as hv1
+ovs-vsctl set open . external_ids:ovn-chassis-mac-mappings=phys:1e:02:ad:aa:bb:01
+
+as hv2
+ovs-vsctl set open . external_ids:ovn-chassis-mac-mappings=phys:1e:02:ad:aa:bb:02
+
+as hv3
+ovs-vsctl set open . external_ids:ovn-chassis-mac-mappings=phys:1e:02:ad:aa:bb:03
+
+OVS_WAIT_UNTIL([test 6 = $(as hv1 ovs-ofctl dump-flows br-int table=0 | grep conj -c)])
+OVS_WAIT_UNTIL([test 6 = $(as hv2 ovs-ofctl dump-flows br-int table=0 | grep conj -c)])
+OVS_WAIT_UNTIL([test 6 = $(as hv3 ovs-ofctl dump-flows br-int table=0 | grep conj -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=0 | \
+grep conj | grep "dl_dst=1e:02:ad:aa:bb:01" -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv2 ovs-ofctl dump-flows br-int table=0 | \
+grep conj | grep "dl_dst=1e:02:ad:aa:bb:02" -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv3 ovs-ofctl dump-flows br-int table=0 | \
+grep conj | grep "dl_dst=1e:02:ad:aa:bb:03" -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb01->NXM_NX_ARP_SHA" -c)])
+
+OVS_WAIT_UNTIL([test 0 = $(as hv2 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb01->NXM_NX_ARP_SHA" -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv2 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb02->NXM_NX_ARP_SHA" -c)])
+
+OVS_WAIT_UNTIL([test 1 = $(as hv3 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb03->NXM_NX_ARP_SHA" -c)])
+
+as hv1
+reset_pcap_file hv1-ext1 hv1/ext1
+
+send_arp_request() {
+    local inport=$1 eth_src=$2 eth_dst=$3 spa=$4 tpa=$5
+    local reply_src_mac=$6 reply_dst_mac=$7
+    local reply_sha=$8 reply_tha=$9
+
+    local eth_type=0806
+    local eth=${eth_dst}${eth_src}${eth_type}
+
+    local arp=0001080006040001${eth_src}${spa}${eth_dst}${tpa}
+
+    local request=${eth}${arp}
+    as hv1 ovs-appctl netdev-dummy/receive hv${inport}-ext${inport} $request
+
+    local reply=${reply_dst_mac}${reply_src_mac}${eth_type}
+    reply=${reply}0001080006040002${reply_sha}${tpa}${reply_tha}${spa}
+    echo $reply > hv1-ext${inport}.expected
+}
+
+src_mac=f00000000003
+dst_mac=ffffffffffff
+reply_src_mac=1e02adaabb03
+repl_dst_mac=f00000000003
+# Send ARP request to router ip - 10.0.0.1
+send_arp_request 1 ${src_mac} ${dst_mac} $(ip_to_hex 10 0 0 6) $(ip_to_hex 10 0 0 1) \
+${reply_src_mac} ${repl_dst_mac} ${reply_src_mac} ${repl_dst_mac}
+
+OVS_WAIT_UNTIL([test 1 = $(as hv3 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb03->NXM_NX_ARP_SHA" | grep "n_packets=1" -c)])
+
+OVN_CHECK_PACKETS([hv1/ext1-tx.pcap], [hv1-ext1.expected])
+
+as hv1
+reset_pcap_file hv1-ext1 hv1/ext1
+
+# Send unicast ARP request destined to the chassis mac of hv3.
+src_mac=f00000000003
+dst_mac=1e02adaabb03
+reply_src_mac=1e02adaabb03
+repl_dst_mac=f00000000003
+send_arp_request 1 ${src_mac} ${dst_mac} $(ip_to_hex 10 0 0 6) $(ip_to_hex 10 0 0 1) \
+${reply_src_mac} ${repl_dst_mac} ${reply_src_mac} ${repl_dst_mac}
+
+OVS_WAIT_UNTIL([test 1 = $(as hv3 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb03->NXM_NX_ARP_SHA" | grep "n_packets=2" -c)])
+
+OVN_CHECK_PACKETS([hv1/ext1-tx.pcap], [hv1-ext1.expected])
+
+# Make hv2 active.
+ovn-nbctl ha-chassis-group-add-chassis hagrp1 hv2 60
+
+OVS_WAIT_UNTIL(
+    [chassis=`ovn-sbctl --bare --columns chassis find port_binding \
+logical_port=ls1-lp_ext1`
+    test "$chassis" = "$hv2_uuid"])
+
+reset_pcap_file hv1-ext1 hv1/ext1
+
+src_mac=f00000000003
+dst_mac=ffffffffffff
+reply_src_mac=1e02adaabb02
+repl_dst_mac=f00000000003
+# Send ARP request to router ip - 10.0.0.1. Should be replied by hv2.
+send_arp_request 1 ${src_mac} ${dst_mac} $(ip_to_hex 10 0 0 6) $(ip_to_hex 10 0 0 1) \
+${reply_src_mac} ${repl_dst_mac} ${reply_src_mac} ${repl_dst_mac}
+
+OVS_WAIT_UNTIL([test 1 = $(as hv2 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb02->NXM_NX_ARP_SHA" | grep "n_packets=1" -c)])
+
+OVN_CHECK_PACKETS([hv1/ext1-tx.pcap], [hv1-ext1.expected])
+
+as hv1
+reset_pcap_file hv1-ext1 hv1/ext1
+
+# Send unicast ARP request destined to the chassis mac of hv2.
+src_mac=f00000000003
+dst_mac=1e02adaabb02
+reply_src_mac=1e02adaabb02
+repl_dst_mac=f00000000003
+send_arp_request 1 ${src_mac} ${dst_mac} $(ip_to_hex 10 0 0 6) $(ip_to_hex 10 0 0 1) \
+${reply_src_mac} ${repl_dst_mac} ${reply_src_mac} ${repl_dst_mac}
+
+OVS_WAIT_UNTIL([test 1 = $(as hv2 ovs-ofctl dump-flows br-int table=65,arp | \
+grep "load:0x1e02adaabb02->NXM_NX_ARP_SHA" | grep "n_packets=2" -c)])
+
+OVN_CHECK_PACKETS([hv1/ext1-tx.pcap], [hv1-ext1.expected])
+
+ovn-nbctl ha-chassis-group-add-chassis hagrp1 hv3 70
+ovn-nbctl ha-chassis-group-add-chassis hagrp1 hv2 10
+OVS_WAIT_UNTIL(
+    [chassis=`ovn-sbctl --bare --columns chassis find port_binding \
+logical_port=ls1-lp_ext1`
+    test "$chassis" = "$hv3_uuid"])
+
 # disconnect hv3 from the network, hv1 should take over
 as hv3
 port=${sandbox}_br-phys
-- 
2.26.2



More information about the dev mailing list