[ovs-dev] [PATCH ovn 1/2] northd: Use address sets for ARP responder flows for VIPs.
Dumitru Ceara
dceara at redhat.com
Fri Oct 1 13:46:54 UTC 2021
Otherwise the S_ROUTER_IN_IP_INPUT aggregated flows that reply to ARP
requests targeting load balancer VIPs get completely regenerated every
time a new VIP/LB is added. This affects SB memory usage as RAFT log
entries are huge. Use an address set instead. Updating an address set
is incremental, because it's performed with a "mutate" operation.
On a large scale ovn-kubernetes deployment with a high number of
load balancers (services) this change reduces memory usage of
ovsdb-servers running the OVN_Southbound cluster by 50%, from ~2GB
RSS to ~1GB RSS.
Signed-off-by: Dumitru Ceara <dceara at redhat.com>
---
northd/northd.c | 61 +++++++++++++++++++++++++++++++++------------------
tests/ovn-northd.at | 44 +++++++++++++++++--------------------
2 files changed, 59 insertions(+), 46 deletions(-)
diff --git a/northd/northd.c b/northd/northd.c
index cf2467fe1..5699745d6 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -11937,39 +11937,28 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
op->cr_port->json_key);
}
- struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER;
-
- /* For IPv4 we can just create one rule with all required IPs. */
- ds_put_cstr(&load_balancer_ips_v4, "{ ");
- ds_put_and_free_cstr(&load_balancer_ips_v4,
- sset_join(&op->od->lb_ips_v4, ", ", " }"));
-
- build_lrouter_arp_flow(op->od, op, ds_cstr(&load_balancer_ips_v4),
+ /* Create a single ARP rule for all IPs that are used as VIPs. */
+ char *lb_ips_v4_as = xasprintf("$%s_lb_ip4", op->od->nbr->name);
+ build_lrouter_arp_flow(op->od, op, lb_ips_v4_as,
REG_INPORT_ETH_ADDR,
match, false, 90, NULL, lflows);
- ds_destroy(&load_balancer_ips_v4);
+ free(lb_ips_v4_as);
}
if (sset_count(&op->od->lb_ips_v6)) {
ds_clear(match);
- ds_clear(actions);
-
- struct ds load_balancer_ips_v6 = DS_EMPTY_INITIALIZER;
-
- ds_put_cstr(&load_balancer_ips_v6, "{ ");
- ds_put_and_free_cstr(&load_balancer_ips_v6,
- sset_join(&op->od->lb_ips_v6, ", ", " }"));
if (is_l3dgw_port(op)) {
ds_put_format(match, "is_chassis_resident(%s)",
op->cr_port->json_key);
}
- build_lrouter_nd_flow(op->od, op, "nd_na",
- ds_cstr(&load_balancer_ips_v6), NULL,
+
+ /* Create a single ND rule for all IPs that are used as VIPs. */
+ char *lb_ips_v6_as = xasprintf("$%s_lb_ip6", op->od->nbr->name);
+ build_lrouter_nd_flow(op->od, op, "nd_na", lb_ips_v6_as, NULL,
REG_INPORT_ETH_ADDR, match, false, 90,
NULL, lflows, meter_groups);
-
- ds_destroy(&load_balancer_ips_v6);
+ free(lb_ips_v6_as);
}
if (!op->od->is_gw_router && !op->od->n_l3dgw_ports) {
@@ -13541,7 +13530,7 @@ sync_address_set(struct northd_context *ctx, const char *name,
* in OVN_Northbound, so that the address sets used in Logical_Flows in
* OVN_Southbound is checked against the proper set.*/
static void
-sync_address_sets(struct northd_context *ctx)
+sync_address_sets(struct northd_context *ctx, struct hmap *datapaths)
{
struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
@@ -13588,6 +13577,34 @@ sync_address_sets(struct northd_context *ctx)
svec_destroy(&ipv6_addrs);
}
+ /* Sync router load balancer VIP generated address sets. */
+ struct ovn_datapath *od;
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ if (sset_count(&od->lb_ips_v4)) {
+ char *ipv4_addrs_name = xasprintf("%s_lb_ip4", od->nbr->name);
+ const char **ipv4_addrs = sset_array(&od->lb_ips_v4);
+
+ sync_address_set(ctx, ipv4_addrs_name, ipv4_addrs,
+ sset_count(&od->lb_ips_v4), &sb_address_sets);
+ free(ipv4_addrs_name);
+ free(ipv4_addrs);
+ }
+
+ if (sset_count(&od->lb_ips_v6)) {
+ char *ipv6_addrs_name = xasprintf("%s_lb_ip6", od->nbr->name);
+ const char **ipv6_addrs = sset_array(&od->lb_ips_v6);
+
+ sync_address_set(ctx, ipv6_addrs_name, ipv6_addrs,
+ sset_count(&od->lb_ips_v6), &sb_address_sets);
+ free(ipv6_addrs_name);
+ free(ipv6_addrs);
+ }
+ }
+
/* sync user defined address sets, which may overwrite port group
* generated address sets if same name is used */
const struct nbrec_address_set *nb_address_set;
@@ -14328,7 +14345,7 @@ ovnnb_db_run(struct northd_context *ctx,
stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
ovn_update_ipv6_prefix(ports);
- sync_address_sets(ctx);
+ sync_address_sets(ctx, datapaths);
sync_port_groups(ctx, &port_groups);
sync_meters(ctx, &meter_groups);
sync_dns_entries(ctx, datapaths);
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 5de554455..aece7aba4 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1658,8 +1658,8 @@ ovn-nbctl lb-add lb1 "192.168.2.1:8080" "10.0.0.4:8080"
ovn-nbctl lb-add lb2 "192.168.2.4:8080" "10.0.0.5:8080" udp
ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080"
ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080"
-ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:101:8080" "fe02::200:ff:fe00:101:8080"
-ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:102:8080" "fe02::200:ff:fe00:102:8080"
+ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:101]]:8080" "[[fe02::200:ff:fe00:101]]:8080"
+ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:102]]:8080" "[[fe02::200:ff:fe00:102]]:8080"
ovn-nbctl lr-lb-add lr lb1
ovn-nbctl lr-lb-add lr lb2
@@ -1669,6 +1669,10 @@ ovn-nbctl lr-lb-add lr lb5
ovn-nbctl --wait=sb sync
+# Check generated VIP address sets.
+check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set addresses name=lr_lb_ip4
+check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=lr_lb_ip6
+
# Ingress router port ETH address is stored in lr_in_admission.
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_admission.*xreg0\[[0..47\]]" | sort], [0], [dnl
table=0 (lr_in_admission ), priority=50 , dnl
@@ -1685,16 +1689,8 @@ match=(eth.mcast && inport == "lrp-public"), dnl
action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;)
])
-# The order of the VIP addresses in the flow table entries doesn't
-# matter, so just replace each of them with a generic $vip for
-# testing. It would be better if we could ensure each one appeared
-# exactly once, but that's hard with sed.
-sed_vips() {
- sed 's/192\.168\.2\.[[1456]]/$vip/g'
-}
-
# Ingress router port ETH address is used for ARP reply/NA in lr_in_ip_input.
-AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sed_vips | sort], [0], [dnl
+AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
@@ -1708,28 +1704,28 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl
+match=(inport == "lrp" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl
+match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl
action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl
+match=(inport == "lrp" && nd_ns && nd.target == $lr_lb_ip6), dnl
action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100), dnl
action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl
+match=(inport == "lrp-public" && nd_ns && nd.target == $lr_lb_ip6), dnl
action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
])
@@ -1763,7 +1759,7 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;)
# Ingress router port is used for ARP reply/NA in lr_in_ip_input.
# xxreg0[0..47] is used unless external_mac is set.
# Priority 90 flows (per router).
-AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sed_vips | sort], [0], [dnl
+AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
@@ -1777,28 +1773,28 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl
+match=(inport == "lrp" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl
+match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl
action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl
+match=(inport == "lrp" && nd_ns && nd.target == $lr_lb_ip6), dnl
action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == $lr_lb_ip4 && is_chassis_resident("cr-lrp-public")), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip } && is_chassis_resident("cr-lrp-public")), dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
table=3 (lr_in_ip_input ), priority=90 , dnl
match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100 && is_chassis_resident("cr-lrp-public")), dnl
action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
table=3 (lr_in_ip_input ), priority=90 , dnl
-match=(inport == "lrp-public" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 } && is_chassis_resident("cr-lrp-public")), dnl
+match=(inport == "lrp-public" && nd_ns && nd.target == $lr_lb_ip6 && is_chassis_resident("cr-lrp-public")), dnl
action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
])
More information about the dev
mailing list