[ovs-dev] [PATCH ovn v2] northd: Generate ARP responder flows only for reachable VIPs.

Dumitru Ceara dceara at redhat.com
Fri Nov 19 11:51:59 UTC 2021


It's not useful to generate ARP responder flows for VIPs that are not
reachable on any port of a logical router port.  On scaled
ovn-kubernetes deployments the VIP ARP responder Southbound address
sets become quite large, wasting resources because they are never
used.

Stop generating ARP responder flows in these cases and update the tests
to reflect this change.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2022403
Signed-off-by: Dumitru Ceara <dceara at redhat.com>
---
V2: Rebase after initial northd I-P.
---
 northd/northd.c     | 87 ++++++++++++++++++++++++++++++++++++++++-----
 tests/ovn-northd.at | 18 +++++++---
 2 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 0ff61deec..da5025fd0 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -622,8 +622,10 @@ struct ovn_datapath {
      */
     struct sset lb_ips_v4;
     struct sset lb_ips_v4_routable;
+    struct sset lb_ips_v4_reachable;
     struct sset lb_ips_v6;
     struct sset lb_ips_v6_routable;
+    struct sset lb_ips_v6_reachable;
 
     struct ovn_port **localnet_ports;
     size_t n_localnet_ports;
@@ -918,8 +920,10 @@ init_lb_for_datapath(struct ovn_datapath *od)
 {
     sset_init(&od->lb_ips_v4);
     sset_init(&od->lb_ips_v4_routable);
+    sset_init(&od->lb_ips_v4_reachable);
     sset_init(&od->lb_ips_v6);
     sset_init(&od->lb_ips_v6_routable);
+    sset_init(&od->lb_ips_v6_reachable);
 
     if (od->nbs) {
         od->has_lb_vip = ls_has_lb_vip(od);
@@ -937,8 +941,10 @@ destroy_lb_for_datapath(struct ovn_datapath *od)
 
     sset_destroy(&od->lb_ips_v4);
     sset_destroy(&od->lb_ips_v4_routable);
+    sset_destroy(&od->lb_ips_v4_reachable);
     sset_destroy(&od->lb_ips_v6);
     sset_destroy(&od->lb_ips_v6_routable);
+    sset_destroy(&od->lb_ips_v6_reachable);
 }
 
 /* A group of logical router datapaths which are connected - either
@@ -3909,6 +3915,38 @@ build_lrouter_lb_ips(struct ovn_datapath *od, const struct ovn_northd_lb *lb)
     }
 }
 
+static bool lrouter_port_ipv4_reachable(const struct ovn_port *op,
+                                        ovs_be32 addr);
+static bool lrouter_port_ipv6_reachable(const struct ovn_port *op,
+                                        const struct in6_addr *addr);
+static void
+build_lrouter_lb_reachable_ips(struct ovn_datapath *od,
+                               const struct ovn_northd_lb *lb)
+{
+    for (size_t i = 0; i < lb->n_vips; i++) {
+        if (IN6_IS_ADDR_V4MAPPED(&lb->vips[i].vip)) {
+            ovs_be32 vip_ip4 = in6_addr_get_mapped_ipv4(&lb->vips[i].vip);
+            struct ovn_port *op;
+
+            LIST_FOR_EACH (op, dp_node, &od->port_list) {
+                if (lrouter_port_ipv4_reachable(op, vip_ip4)) {
+                    sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str);
+                    break;
+                }
+            }
+        } else {
+            struct ovn_port *op;
+
+            LIST_FOR_EACH (op, dp_node, &od->port_list) {
+                if (lrouter_port_ipv6_reachable(op, &lb->vips[i].vip)) {
+                    sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str);
+                    break;
+                }
+            }
+        }
+    }
+}
+
 static void
 build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
 {
@@ -3939,6 +3977,36 @@ build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
     }
 }
 
+static void
+build_lrouter_lbs_reachable_ips(struct hmap *datapaths, struct hmap *lbs)
+{
+    struct ovn_datapath *od;
+
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        for (size_t i = 0; i < od->nbr->n_load_balancer; i++) {
+            struct ovn_northd_lb *lb =
+                ovn_northd_lb_find(lbs,
+                                   &od->nbr->load_balancer[i]->header_.uuid);
+            build_lrouter_lb_reachable_ips(od, lb);
+        }
+
+        for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
+            const struct nbrec_load_balancer_group *lbg =
+                od->nbr->load_balancer_group[i];
+            for (size_t j = 0; j < lbg->n_load_balancer; j++) {
+                struct ovn_northd_lb *lb =
+                    ovn_northd_lb_find(lbs,
+                                       &lbg->load_balancer[j]->header_.uuid);
+                build_lrouter_lb_reachable_ips(od, lb);
+            }
+        }
+    }
+}
+
 static bool
 ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
 {
@@ -12060,7 +12128,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
                                    &op->nbrp->header_, lflows);
         }
 
-        if (sset_count(&op->od->lb_ips_v4)) {
+        if (sset_count(&op->od->lb_ips_v4_reachable)) {
             ds_clear(match);
             if (is_l3dgw_port(op)) {
                 ds_put_format(match, "is_chassis_resident(%s)",
@@ -12075,7 +12143,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
             free(lb_ips_v4_as);
         }
 
-        if (sset_count(&op->od->lb_ips_v6)) {
+        if (sset_count(&op->od->lb_ips_v6_reachable)) {
             ds_clear(match);
 
             if (is_l3dgw_port(op)) {
@@ -13859,22 +13927,24 @@ sync_address_sets(struct northd_input *input_data,
             continue;
         }
 
-        if (sset_count(&od->lb_ips_v4)) {
+        if (sset_count(&od->lb_ips_v4_reachable)) {
             char *ipv4_addrs_name = lr_lb_address_set_name(od, AF_INET);
-            const char **ipv4_addrs = sset_array(&od->lb_ips_v4);
+            const char **ipv4_addrs = sset_array(&od->lb_ips_v4_reachable);
 
             sync_address_set(ovnsb_txn, ipv4_addrs_name, ipv4_addrs,
-                             sset_count(&od->lb_ips_v4), &sb_address_sets);
+                             sset_count(&od->lb_ips_v4_reachable),
+                             &sb_address_sets);
             free(ipv4_addrs_name);
             free(ipv4_addrs);
         }
 
-        if (sset_count(&od->lb_ips_v6)) {
+        if (sset_count(&od->lb_ips_v6_reachable)) {
             char *ipv6_addrs_name = lr_lb_address_set_name(od, AF_INET6);
-            const char **ipv6_addrs = sset_array(&od->lb_ips_v6);
+            const char **ipv6_addrs = sset_array(&od->lb_ips_v6_reachable);
 
             sync_address_set(ovnsb_txn, ipv6_addrs_name, ipv6_addrs,
-                             sset_count(&od->lb_ips_v6), &sb_address_sets);
+                             sset_count(&od->lb_ips_v6_reachable),
+                             &sb_address_sets);
             free(ipv6_addrs_name);
             free(ipv6_addrs);
         }
@@ -14660,6 +14730,7 @@ ovnnb_db_run(struct northd_input *input_data,
     build_ports(input_data, ovnsb_txn, sbrec_chassis_by_name,
                 sbrec_chassis_by_hostname,
                 &data->datapaths, &data->ports);
+    build_lrouter_lbs_reachable_ips(&data->datapaths, &data->lbs);
     build_ovn_lr_lbs(&data->datapaths, &data->lbs);
     build_ovn_lb_svcs(input_data, ovnsb_txn, &data->ports, &data->lbs);
     build_ipam(&data->datapaths, &data->ports);
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 85b47a18f..1e947a6c8 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1670,7 +1670,7 @@ ovn_start
 ovn-sbctl chassis-add ch geneve 127.0.0.1
 
 ovn-nbctl lr-add lr
-ovn-nbctl lrp-add lr lrp-public 00:00:00:00:01:00 43.43.43.1/24
+ovn-nbctl lrp-add lr lrp-public 00:00:00:00:01:00 43.43.43.1/24 4343::1/64
 ovn-nbctl lrp-add lr lrp 00:00:00:00:00:01 42.42.42.1/24
 
 ovn-nbctl ls-add ls
@@ -1693,21 +1693,25 @@ ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080"
 ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080"
 ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:101]]:8080" "[[fe02::200:ff:fe00:101]]:8080"
 ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:102]]:8080" "[[fe02::200:ff:fe00:102]]:8080"
+ovn-nbctl lb-add lb6 "43.43.43.43:8080" "10.0.0.8:8080" udp
+ovn-nbctl lb-add lb7 "[[4343::4343]]:8080" "[[10::10]]:8080" udp
 
 ovn-nbctl lr-lb-add lr lb1
 ovn-nbctl lr-lb-add lr lb2
 ovn-nbctl lr-lb-add lr lb3
 ovn-nbctl lr-lb-add lr lb4
 ovn-nbctl lr-lb-add lr lb5
+ovn-nbctl lr-lb-add lr lb6
+ovn-nbctl lr-lb-add lr lb7
 
 ovn-nbctl --wait=sb sync
 lr_key=$(fetch_column sb:datapath_binding tunnel_key external_ids:name=lr)
 lb_as_v4="_rtr_lb_${lr_key}_ip4"
 lb_as_v6="_rtr_lb_${lr_key}_ip6"
 
-# Check generated VIP address sets.
-check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set addresses name=${lb_as_v4}
-check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=${lb_as_v6}
+# Check generated VIP address sets (only reachable IPs).
+check_column '43.43.43.43' Address_Set addresses name=${lb_as_v4}
+check_column '4343::4343 fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=${lb_as_v6}
 
 # Ingress router port ETH address is stored in lr_in_admission.
 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_admission.*xreg0\[[0..47\]]" | sort], [0], [dnl
@@ -1758,6 +1762,9 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
 match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
 action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
   table=3 (lr_in_ip_input     ), priority=90   , dnl
+match=(inport == "lrp-public" && ip6.dst == {4343::1, ff02::1:ff00:1} && nd_ns && nd.target == 4343::1), dnl
+action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
+  table=3 (lr_in_ip_input     ), priority=90   , dnl
 match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100), dnl
 action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
   table=3 (lr_in_ip_input     ), priority=90   , dnl
@@ -1827,6 +1834,9 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
 match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
 action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
   table=3 (lr_in_ip_input     ), priority=90   , dnl
+match=(inport == "lrp-public" && ip6.dst == {4343::1, ff02::1:ff00:1} && nd_ns && nd.target == 4343::1 && is_chassis_resident("cr-lrp-public")), dnl
+action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
+  table=3 (lr_in_ip_input     ), priority=90   , dnl
 match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100 && is_chassis_resident("cr-lrp-public")), dnl
 action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
   table=3 (lr_in_ip_input     ), priority=90   , dnl
-- 
2.27.0



More information about the dev mailing list