[ovs-dev] [PATCH ovn v2] northd: Generate ARP responder flows only for reachable VIPs.

Mark Michelson mmichels at redhat.com
Mon Nov 22 16:02:39 UTC 2021


Acked-by: Mark Michelson <mmichels at redhat.com>

On 11/19/21 06:51, Dumitru Ceara wrote:
> It's not useful to generate ARP responder flows for VIPs that are not
> reachable on any port of a logical router port.  On scaled
> ovn-kubernetes deployments the VIP ARP responder Southbound address
> sets become quite large, wasting resources because they are never
> used.
> 
> Stop generating ARP responder flows in these cases and update the tests
> to reflect this change.
> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2022403
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
> ---
> V2: Rebase after initial northd I-P.
> ---
>   northd/northd.c     | 87 ++++++++++++++++++++++++++++++++++++++++-----
>   tests/ovn-northd.at | 18 +++++++---
>   2 files changed, 93 insertions(+), 12 deletions(-)
> 
> diff --git a/northd/northd.c b/northd/northd.c
> index 0ff61deec..da5025fd0 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -622,8 +622,10 @@ struct ovn_datapath {
>        */
>       struct sset lb_ips_v4;
>       struct sset lb_ips_v4_routable;
> +    struct sset lb_ips_v4_reachable;
>       struct sset lb_ips_v6;
>       struct sset lb_ips_v6_routable;
> +    struct sset lb_ips_v6_reachable;
>   
>       struct ovn_port **localnet_ports;
>       size_t n_localnet_ports;
> @@ -918,8 +920,10 @@ init_lb_for_datapath(struct ovn_datapath *od)
>   {
>       sset_init(&od->lb_ips_v4);
>       sset_init(&od->lb_ips_v4_routable);
> +    sset_init(&od->lb_ips_v4_reachable);
>       sset_init(&od->lb_ips_v6);
>       sset_init(&od->lb_ips_v6_routable);
> +    sset_init(&od->lb_ips_v6_reachable);
>   
>       if (od->nbs) {
>           od->has_lb_vip = ls_has_lb_vip(od);
> @@ -937,8 +941,10 @@ destroy_lb_for_datapath(struct ovn_datapath *od)
>   
>       sset_destroy(&od->lb_ips_v4);
>       sset_destroy(&od->lb_ips_v4_routable);
> +    sset_destroy(&od->lb_ips_v4_reachable);
>       sset_destroy(&od->lb_ips_v6);
>       sset_destroy(&od->lb_ips_v6_routable);
> +    sset_destroy(&od->lb_ips_v6_reachable);
>   }
>   
>   /* A group of logical router datapaths which are connected - either
> @@ -3909,6 +3915,38 @@ build_lrouter_lb_ips(struct ovn_datapath *od, const struct ovn_northd_lb *lb)
>       }
>   }
>   
> +static bool lrouter_port_ipv4_reachable(const struct ovn_port *op,
> +                                        ovs_be32 addr);
> +static bool lrouter_port_ipv6_reachable(const struct ovn_port *op,
> +                                        const struct in6_addr *addr);
> +static void
> +build_lrouter_lb_reachable_ips(struct ovn_datapath *od,
> +                               const struct ovn_northd_lb *lb)
> +{
> +    for (size_t i = 0; i < lb->n_vips; i++) {
> +        if (IN6_IS_ADDR_V4MAPPED(&lb->vips[i].vip)) {
> +            ovs_be32 vip_ip4 = in6_addr_get_mapped_ipv4(&lb->vips[i].vip);
> +            struct ovn_port *op;
> +
> +            LIST_FOR_EACH (op, dp_node, &od->port_list) {
> +                if (lrouter_port_ipv4_reachable(op, vip_ip4)) {
> +                    sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str);
> +                    break;
> +                }
> +            }
> +        } else {
> +            struct ovn_port *op;
> +
> +            LIST_FOR_EACH (op, dp_node, &od->port_list) {
> +                if (lrouter_port_ipv6_reachable(op, &lb->vips[i].vip)) {
> +                    sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str);
> +                    break;
> +                }
> +            }
> +        }
> +    }
> +}
> +
>   static void
>   build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
>   {
> @@ -3939,6 +3977,36 @@ build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
>       }
>   }
>   
> +static void
> +build_lrouter_lbs_reachable_ips(struct hmap *datapaths, struct hmap *lbs)
> +{
> +    struct ovn_datapath *od;
> +
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbr) {
> +            continue;
> +        }
> +
> +        for (size_t i = 0; i < od->nbr->n_load_balancer; i++) {
> +            struct ovn_northd_lb *lb =
> +                ovn_northd_lb_find(lbs,
> +                                   &od->nbr->load_balancer[i]->header_.uuid);
> +            build_lrouter_lb_reachable_ips(od, lb);
> +        }
> +
> +        for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
> +            const struct nbrec_load_balancer_group *lbg =
> +                od->nbr->load_balancer_group[i];
> +            for (size_t j = 0; j < lbg->n_load_balancer; j++) {
> +                struct ovn_northd_lb *lb =
> +                    ovn_northd_lb_find(lbs,
> +                                       &lbg->load_balancer[j]->header_.uuid);
> +                build_lrouter_lb_reachable_ips(od, lb);
> +            }
> +        }
> +    }
> +}
> +
>   static bool
>   ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>   {
> @@ -12060,7 +12128,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
>                                      &op->nbrp->header_, lflows);
>           }
>   
> -        if (sset_count(&op->od->lb_ips_v4)) {
> +        if (sset_count(&op->od->lb_ips_v4_reachable)) {
>               ds_clear(match);
>               if (is_l3dgw_port(op)) {
>                   ds_put_format(match, "is_chassis_resident(%s)",
> @@ -12075,7 +12143,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
>               free(lb_ips_v4_as);
>           }
>   
> -        if (sset_count(&op->od->lb_ips_v6)) {
> +        if (sset_count(&op->od->lb_ips_v6_reachable)) {
>               ds_clear(match);
>   
>               if (is_l3dgw_port(op)) {
> @@ -13859,22 +13927,24 @@ sync_address_sets(struct northd_input *input_data,
>               continue;
>           }
>   
> -        if (sset_count(&od->lb_ips_v4)) {
> +        if (sset_count(&od->lb_ips_v4_reachable)) {
>               char *ipv4_addrs_name = lr_lb_address_set_name(od, AF_INET);
> -            const char **ipv4_addrs = sset_array(&od->lb_ips_v4);
> +            const char **ipv4_addrs = sset_array(&od->lb_ips_v4_reachable);
>   
>               sync_address_set(ovnsb_txn, ipv4_addrs_name, ipv4_addrs,
> -                             sset_count(&od->lb_ips_v4), &sb_address_sets);
> +                             sset_count(&od->lb_ips_v4_reachable),
> +                             &sb_address_sets);
>               free(ipv4_addrs_name);
>               free(ipv4_addrs);
>           }
>   
> -        if (sset_count(&od->lb_ips_v6)) {
> +        if (sset_count(&od->lb_ips_v6_reachable)) {
>               char *ipv6_addrs_name = lr_lb_address_set_name(od, AF_INET6);
> -            const char **ipv6_addrs = sset_array(&od->lb_ips_v6);
> +            const char **ipv6_addrs = sset_array(&od->lb_ips_v6_reachable);
>   
>               sync_address_set(ovnsb_txn, ipv6_addrs_name, ipv6_addrs,
> -                             sset_count(&od->lb_ips_v6), &sb_address_sets);
> +                             sset_count(&od->lb_ips_v6_reachable),
> +                             &sb_address_sets);
>               free(ipv6_addrs_name);
>               free(ipv6_addrs);
>           }
> @@ -14660,6 +14730,7 @@ ovnnb_db_run(struct northd_input *input_data,
>       build_ports(input_data, ovnsb_txn, sbrec_chassis_by_name,
>                   sbrec_chassis_by_hostname,
>                   &data->datapaths, &data->ports);
> +    build_lrouter_lbs_reachable_ips(&data->datapaths, &data->lbs);
>       build_ovn_lr_lbs(&data->datapaths, &data->lbs);
>       build_ovn_lb_svcs(input_data, ovnsb_txn, &data->ports, &data->lbs);
>       build_ipam(&data->datapaths, &data->ports);
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 85b47a18f..1e947a6c8 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -1670,7 +1670,7 @@ ovn_start
>   ovn-sbctl chassis-add ch geneve 127.0.0.1
>   
>   ovn-nbctl lr-add lr
> -ovn-nbctl lrp-add lr lrp-public 00:00:00:00:01:00 43.43.43.1/24
> +ovn-nbctl lrp-add lr lrp-public 00:00:00:00:01:00 43.43.43.1/24 4343::1/64
>   ovn-nbctl lrp-add lr lrp 00:00:00:00:00:01 42.42.42.1/24
>   
>   ovn-nbctl ls-add ls
> @@ -1693,21 +1693,25 @@ ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080"
>   ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080"
>   ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:101]]:8080" "[[fe02::200:ff:fe00:101]]:8080"
>   ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:102]]:8080" "[[fe02::200:ff:fe00:102]]:8080"
> +ovn-nbctl lb-add lb6 "43.43.43.43:8080" "10.0.0.8:8080" udp
> +ovn-nbctl lb-add lb7 "[[4343::4343]]:8080" "[[10::10]]:8080" udp
>   
>   ovn-nbctl lr-lb-add lr lb1
>   ovn-nbctl lr-lb-add lr lb2
>   ovn-nbctl lr-lb-add lr lb3
>   ovn-nbctl lr-lb-add lr lb4
>   ovn-nbctl lr-lb-add lr lb5
> +ovn-nbctl lr-lb-add lr lb6
> +ovn-nbctl lr-lb-add lr lb7
>   
>   ovn-nbctl --wait=sb sync
>   lr_key=$(fetch_column sb:datapath_binding tunnel_key external_ids:name=lr)
>   lb_as_v4="_rtr_lb_${lr_key}_ip4"
>   lb_as_v6="_rtr_lb_${lr_key}_ip6"
>   
> -# Check generated VIP address sets.
> -check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set addresses name=${lb_as_v4}
> -check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=${lb_as_v6}
> +# Check generated VIP address sets (only reachable IPs).
> +check_column '43.43.43.43' Address_Set addresses name=${lb_as_v4}
> +check_column '4343::4343 fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=${lb_as_v6}
>   
>   # Ingress router port ETH address is stored in lr_in_admission.
>   AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_admission.*xreg0\[[0..47\]]" | sort], [0], [dnl
> @@ -1758,6 +1762,9 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
>   match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
>   action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
>     table=3 (lr_in_ip_input     ), priority=90   , dnl
> +match=(inport == "lrp-public" && ip6.dst == {4343::1, ff02::1:ff00:1} && nd_ns && nd.target == 4343::1), dnl
> +action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
> +  table=3 (lr_in_ip_input     ), priority=90   , dnl
>   match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100), dnl
>   action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
>     table=3 (lr_in_ip_input     ), priority=90   , dnl
> @@ -1827,6 +1834,9 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */
>   match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl
>   action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;)
>     table=3 (lr_in_ip_input     ), priority=90   , dnl
> +match=(inport == "lrp-public" && ip6.dst == {4343::1, ff02::1:ff00:1} && nd_ns && nd.target == 4343::1 && is_chassis_resident("cr-lrp-public")), dnl
> +action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
> +  table=3 (lr_in_ip_input     ), priority=90   , dnl
>   match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100 && is_chassis_resident("cr-lrp-public")), dnl
>   action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };)
>     table=3 (lr_in_ip_input     ), priority=90   , dnl
> 



More information about the dev mailing list