[ovs-dev] [PATCH v4 ovn] Don't suppress localport traffic directed to external port

Ihar Hrachyshka ihrachys at redhat.com
Wed Jul 14 20:59:01 UTC 2021


Recently, we stopped leaking localport traffic through localnet ports
into fabric to avoid unnecessary flipping between chassis hosting the
same localport.

Despite the type name, in some scenarios localports are supposed to
talk outside the hosting chassis. Specifically, in OpenStack [1]
metadata service for SR-IOV ports is implemented as a localport hosted
on another chassis that is exposed to the chassis owning the SR-IOV
port through an "external" port. In this case, "leaking" localport
traffic into fabric is desirable.

This patch inserts a higher priority flow per external port on the
same datapath that avoids dropping localport traffic.

Fixes: 96959e56d634 ("physical: do not forward traffic from localport
to a localnet one")

[1] https://docs.openstack.org/neutron/latest/admin/ovn/sriov.html

Signed-off-by: Ihar Hrachyshka <ihrachys at redhat.com>

--

v1: initial version.
v2: fixed code for unbound external ports.
v2: rebased.
v3: optimize external ports iteration.
v3: rate limit error message on mac address parse failure.
v4: fixed several memory leaks on local_datapaths cleanup.
v4: properly clean up flows for deleted external ports.
v4: test that external port created after localnet works.
v4: test that external port deleted doesn't pass traffic.
---
 controller/binding.c        | 35 +++++++++++++--
 controller/ovn-controller.c |  2 +
 controller/ovn-controller.h |  2 +
 controller/physical.c       | 46 ++++++++++++++++++++
 tests/ovn.at                | 85 +++++++++++++++++++++++++++++++++++++
 5 files changed, 167 insertions(+), 3 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index 70bf13390..d50f3affa 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -108,6 +108,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
     hmap_insert(local_datapaths, &ld->hmap_node, dp_key);
     ld->datapath = datapath;
     ld->localnet_port = NULL;
+    shash_init(&ld->external_ports);
     ld->has_local_l3gateway = has_local_l3gateway;
 
     if (tracked_datapaths) {
@@ -474,6 +475,18 @@ is_network_plugged(const struct sbrec_port_binding *binding_rec,
     return network ? !!shash_find_data(bridge_mappings, network) : false;
 }
 
+static void
+update_ld_external_ports(const struct sbrec_port_binding *binding_rec,
+                         struct hmap *local_datapaths)
+{
+    struct local_datapath *ld = get_local_datapath(
+        local_datapaths, binding_rec->datapath->tunnel_key);
+    if (ld) {
+        shash_replace(&ld->external_ports, binding_rec->logical_port,
+                      binding_rec);
+    }
+}
+
 static void
 update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
                         struct shash *bridge_mappings,
@@ -1657,8 +1670,9 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
         !sset_is_empty(b_ctx_out->egress_ifaces) ? &qos_map : NULL;
 
     struct ovs_list localnet_lports = OVS_LIST_INITIALIZER(&localnet_lports);
+    struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports);
 
-    struct localnet_lport {
+    struct lport {
         struct ovs_list list_node;
         const struct sbrec_port_binding *pb;
     };
@@ -1713,11 +1727,14 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
 
         case LP_EXTERNAL:
             consider_external_lport(pb, b_ctx_in, b_ctx_out);
+            struct lport *ext_lport = xmalloc(sizeof *ext_lport);
+            ext_lport->pb = pb;
+            ovs_list_push_back(&external_lports, &ext_lport->list_node);
             break;
 
         case LP_LOCALNET: {
             consider_localnet_lport(pb, b_ctx_in, b_ctx_out, &qos_map);
-            struct localnet_lport *lnet_lport = xmalloc(sizeof *lnet_lport);
+            struct lport *lnet_lport = xmalloc(sizeof *lnet_lport);
             lnet_lport->pb = pb;
             ovs_list_push_back(&localnet_lports, &lnet_lport->list_node);
             break;
@@ -1744,7 +1761,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
     /* Run through each localnet lport list to see if it is a localnet port
      * on local datapaths discovered from above loop, and update the
      * corresponding local datapath accordingly. */
-    struct localnet_lport *lnet_lport;
+    struct lport *lnet_lport;
     LIST_FOR_EACH_POP (lnet_lport, list_node, &localnet_lports) {
         update_ld_localnet_port(lnet_lport->pb, &bridge_mappings,
                                 b_ctx_out->egress_ifaces,
@@ -1752,6 +1769,15 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
         free(lnet_lport);
     }
 
+    /* Run through external lport list to see if these are external ports
+     * on local datapaths discovered from above loop, and update the
+     * corresponding local datapath accordingly. */
+    struct lport *ext_lport;
+    LIST_FOR_EACH_POP (ext_lport, list_node, &external_lports) {
+        update_ld_external_ports(ext_lport->pb, b_ctx_out->local_datapaths);
+        free(ext_lport);
+    }
+
     shash_destroy(&bridge_mappings);
 
     if (!sset_is_empty(b_ctx_out->egress_ifaces)
@@ -1954,6 +1980,8 @@ remove_pb_from_local_datapath(const struct sbrec_port_binding *pb,
                                          pb->logical_port)) {
             ld->localnet_port = NULL;
         }
+    } else if (!strcmp(pb->type, "external")) {
+        shash_find_and_delete(&ld->external_ports, pb->logical_port);
     }
 
     if (!strcmp(pb->type, "l3gateway")) {
@@ -2619,6 +2647,7 @@ delete_done:
 
         case LP_EXTERNAL:
             handled = consider_external_lport(pb, b_ctx_in, b_ctx_out);
+            update_ld_external_ports(pb, b_ctx_out->local_datapaths);
             break;
 
         case LP_LOCALNET: {
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 54304d788..2418c301b 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1168,6 +1168,7 @@ en_runtime_data_cleanup(void *data)
     HMAP_FOR_EACH_SAFE (cur_node, next_node, hmap_node,
                         &rt_data->local_datapaths) {
         free(cur_node->peer_ports);
+        shash_destroy(&cur_node->external_ports);
         hmap_remove(&rt_data->local_datapaths, &cur_node->hmap_node);
         free(cur_node);
     }
@@ -1286,6 +1287,7 @@ en_runtime_data_run(struct engine_node *node, void *data)
         struct local_datapath *cur_node, *next_node;
         HMAP_FOR_EACH_SAFE (cur_node, next_node, hmap_node, local_datapaths) {
             free(cur_node->peer_ports);
+            shash_destroy(&cur_node->external_ports);
             hmap_remove(local_datapaths, &cur_node->hmap_node);
             free(cur_node);
         }
diff --git a/controller/ovn-controller.h b/controller/ovn-controller.h
index 417c7aacb..b864ed0fa 100644
--- a/controller/ovn-controller.h
+++ b/controller/ovn-controller.h
@@ -67,6 +67,8 @@ struct local_datapath {
 
     size_t n_peer_ports;
     size_t n_allocated_peer_ports;
+
+    struct shash external_ports;
 };
 
 struct local_datapath *get_local_datapath(const struct hmap *,
diff --git a/controller/physical.c b/controller/physical.c
index 17ca5afbb..483a5a49b 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1281,6 +1281,52 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
             ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 160,
                             binding->header_.uuid.parts[0], &match,
                             ofpacts_p, &binding->header_.uuid);
+
+            /* localport traffic directed to external is *not* local */
+            struct shash_node *node;
+            SHASH_FOR_EACH (node, &ld->external_ports) {
+                const struct sbrec_port_binding *pb = node->data;
+
+                /* skip ports that are not claimed by this chassis */
+                if (!pb->chassis) {
+                    continue;
+                }
+                if (strcmp(pb->chassis->name, chassis->name)) {
+                    continue;
+                }
+
+                ofpbuf_clear(ofpacts_p);
+                for (int i = 0; i < MFF_N_LOG_REGS; i++) {
+                    put_load(0, MFF_REG0 + i, 0, 32, ofpacts_p);
+                }
+                put_resubmit(OFTABLE_LOG_EGRESS_PIPELINE, ofpacts_p);
+
+                /* allow traffic directed to external MAC address */
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                for (int i = 0; i < pb->n_mac; i++) {
+                    char *err_str;
+                    struct eth_addr peer_mac;
+                    if ((err_str = str_to_mac(pb->mac[i], &peer_mac))) {
+                        VLOG_WARN_RL(
+                            &rl, "Parsing MAC failed for external port: %s, "
+                                 "with error: %s", pb->logical_port, err_str);
+                        free(err_str);
+                        continue;
+                    }
+
+                    match_init_catchall(&match);
+                    match_set_metadata(&match, htonll(dp_key));
+                    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
+                                  port_key);
+                    match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
+                                         MLF_LOCALPORT, MLF_LOCALPORT);
+                    match_set_dl_dst(&match, peer_mac);
+
+                    ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 170,
+                                    binding->header_.uuid.parts[0], &match,
+                                    ofpacts_p, &binding->header_.uuid);
+                }
+            }
         }
 
     } else if (!tun && !is_ha_remote) {
diff --git a/tests/ovn.at b/tests/ovn.at
index e5d8869a8..93e1a0267 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -12143,6 +12143,91 @@ OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([localport doesn't suppress ARP directed to external port])
+
+ovn_start
+net_add n1
+
+check ovs-vsctl add-br br-phys
+check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+check ovn-nbctl ls-add ls
+
+# create topology to allow to talk from localport through localnet to external port
+check ovn-nbctl lsp-add ls lp
+check ovn-nbctl lsp-set-addresses lp "00:00:00:00:00:01 10.0.0.1"
+check ovn-nbctl lsp-set-type lp localport
+check ovs-vsctl add-port br-int lp -- set Interface lp external-ids:iface-id=lp
+
+check ovn-nbctl --wait=sb ha-chassis-group-add hagrp
+check ovn-nbctl --wait=sb ha-chassis-group-add-chassis hagrp main 10
+check ovn-nbctl lsp-add ls lext
+check ovn-nbctl lsp-set-addresses lext "00:00:00:00:00:02 10.0.0.2"
+check ovn-nbctl lsp-set-type lext external
+hagrp_uuid=`ovn-nbctl --bare --columns _uuid find ha_chassis_group name=hagrp`
+check ovn-nbctl set logical_switch_port lext ha_chassis_group=$hagrp_uuid
+
+check ovn-nbctl lsp-add ls ln
+check ovn-nbctl lsp-set-addresses ln unknown
+check ovn-nbctl lsp-set-type ln localnet
+check ovn-nbctl lsp-set-options ln network_name=phys
+check ovn-nbctl --wait=hv sync
+
+# also create second external port AFTER localnet to check that order is irrelevant
+check ovn-nbctl lsp-add ls lext2
+check ovn-nbctl lsp-set-addresses lext2 "00:00:00:00:00:10 10.0.0.10"
+check ovn-nbctl lsp-set-type lext2 external
+check ovn-nbctl set logical_switch_port lext2 ha_chassis_group=$hagrp_uuid
+check ovn-nbctl --wait=hv sync
+
+# create and immediately delete an external port to later check that flows for
+# deleted ports are not left over in flow table
+check ovn-nbctl lsp-add ls lext-deleted
+check ovn-nbctl lsp-set-addresses lext-deleted "00:00:00:00:00:03 10.0.0.3"
+check ovn-nbctl lsp-set-type lext-deleted external
+check ovn-nbctl set logical_switch_port lext-deleted ha_chassis_group=$hagrp_uuid
+check ovn-nbctl --wait=hv sync
+check ovn-nbctl lsp-del lext-deleted
+check ovn-nbctl --wait=hv sync
+
+send_garp() {
+    local inport=$1 eth_src=$2 eth_dst=$3 spa=$4 tpa=$5
+    local request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
+    ovs-appctl netdev-dummy/receive $inport $request
+}
+
+spa=$(ip_to_hex 10 0 0 1)
+tpa=$(ip_to_hex 10 0 0 2)
+send_garp lp 000000000001 000000000002 $spa $tpa
+
+spa=$(ip_to_hex 10 0 0 1)
+tpa=$(ip_to_hex 10 0 0 10)
+send_garp lp 000000000001 000000000010 $spa $tpa
+
+spa=$(ip_to_hex 10 0 0 1)
+tpa=$(ip_to_hex 10 0 0 3)
+send_garp lp 000000000001 000000000003 $spa $tpa
+
+dnl external traffic from localport should be sent to localnet
+AT_CHECK([tcpdump -r main/br-phys_n1-tx.pcap arp[[24:4]]=0x0a000002 | wc -l],[0],[dnl
+1
+],[ignore])
+
+#dnl ...regardless of localnet / external ports creation order
+AT_CHECK([tcpdump -r main/br-phys_n1-tx.pcap arp[[24:4]]=0x0a00000a | wc -l],[0],[dnl
+1
+],[ignore])
+
+dnl traffic from localport should not be sent to deleted external port
+AT_CHECK([tcpdump -r main/br-phys_n1-tx.pcap arp[[24:4]]=0x0a000003 | wc -l],[0],[dnl
+0
+],[ignore])
+
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([1 LR with HA distributed router gateway port])
 ovn_start
-- 
2.31.1



More information about the dev mailing list