[ovs-dev] [PATCH ovn 1/1] ovn-northd: Treat reachable and unreachable addresses identically.

Numan Siddique numans at ovn.org
Wed Nov 3 20:22:22 UTC 2021


On Fri, Oct 29, 2021 at 5:28 PM Mark Michelson <mmichels at redhat.com> wrote:
>
> When an ARP request reaches a logical switch that is connected to
> gateway routers, if the IP address in the ARP is an "unreachable"
> address, then the ARP is flooded out all logical switch ports.
>
> This can become an issue when a logical switch is connected to many
> gateway logical router ports. In one particular OpenStack case, we saw a
> logical switch connected to 471 gateway logical router ports. When ARP
> requests are flooded to all of those logical router ports, it results in
> hitting the resubmit limit in OVS.
>
> To correct this issue, this patch treats reachable and unreachable
> addresses the same when receiving an ARP request. If the address can be
> traced to a particular router port, then the ARP is unicast only to that
> router port. This helps to avoid hitting the resubmit ceiling.
>
> The test "ARP flood for unreachable addresses" is removed since we no
> longer flood ARPs when destined for unreachable addresses.
>
> Reported at: https://bugzilla.redhat.com/show_bug.cgi?id=2009323
>
> Signed-off-by: Mark Michelson <mmichels at redhat.com>

Thanks for (re)addressing this issue.

I applied this patch to the main branch and branch-21.09 with the below change.
It doesn't apply cleanly to older branches.  Please submit a backport
patch for branch-21.06 and others
if it needs to be backported.


-----------------------------------
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index d219c56186..b6a5f77670 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1631,14 +1631,6 @@ output;
         logical ports.
       </li>

-      <li>
-        Priority-90 flows for each IP address/VIP/NAT address configured
-        outside its owning router port's subnet. These flows match ARP
-        requests and ND packets for the specific IP addresses.  Matched packets
-        are forwarded to the <code>MC_FLOOD</code> multicast group which
-        contains all connected logical ports.
-      </li>
-
       <li>
         Priority-75 flows for each port connected to a logical router
         matching self originated ARP request/ND packets.  These packets
-----------------------------------

Thanks
Numan



> ---
>  northd/northd.c     | 185 ++++----------------------------------------
>  tests/ovn-northd.at |  99 ------------------------
>  tests/ovn.at        |  10 +++
>  3 files changed, 23 insertions(+), 271 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index da4f9cd14..59286034d 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -6876,42 +6876,6 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
>      }
>  }
>
> -/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the
> - * IPs configured on the router port.
> - */
> -static bool
> -lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr)
> -{
> -    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
> -        struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i];
> -
> -        if ((addr & op_addr->mask) == op_addr->network) {
> -            return true;
> -        }
> -    }
> -    return false;
> -}
> -
> -/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the
> - * IPs configured on the router port.
> - */
> -static bool
> -lrouter_port_ipv6_reachable(const struct ovn_port *op,
> -                            const struct in6_addr *addr)
> -{
> -    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> -        struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i];
> -
> -        struct in6_addr nat_addr6_masked =
> -            ipv6_addr_bitand(addr, &op_addr->mask);
> -
> -        if (ipv6_addr_equals(&nat_addr6_masked, &op_addr->network)) {
> -            return true;
> -        }
> -    }
> -    return false;
> -}
> -
>  /*
>   * Ingress table 22: Flows that flood self originated ARP/ND packets in the
>   * switching domain.
> @@ -6943,23 +6907,6 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
>          if (!nat->external_mac) {
>              continue;
>          }
> -
> -        /* Check if the ovn port has a network configured on which we could
> -         * expect ARP requests/NS for the DNAT external_ip.
> -         */
> -        if (nat_entry_is_v6(nat_entry)) {
> -            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
> -
> -            if (!lrouter_port_ipv6_reachable(op, addr)) {
> -                continue;
> -            }
> -        } else {
> -            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
> -
> -            if (!lrouter_port_ipv4_reachable(op, addr)) {
> -                continue;
> -            }
> -        }
>          sset_add(&all_eth_addrs, nat->external_mac);
>      }
>
> @@ -7012,7 +6959,7 @@ arp_nd_ns_match(const char *ips, int addr_family, struct ds *match)
>   * switching domain as regular broadcast.
>   */
>  static void
> -build_lswitch_rport_arp_req_flow_for_reachable_ip(const char *ips,
> +build_lswitch_rport_arp_req_flow(const char *ips,
>      int addr_family, struct ovn_port *patch_op, struct ovn_datapath *od,
>      uint32_t priority, struct hmap *lflows,
>      const struct ovsdb_idl_row *stage_hint)
> @@ -7043,30 +6990,6 @@ build_lswitch_rport_arp_req_flow_for_reachable_ip(const char *ips,
>      ds_destroy(&actions);
>  }
>
> -/*
> - * Ingress table 22: Flows that forward ARP/ND requests for "unreachable" IPs
> - * (NAT or load balancer IPs configured on a router that are outside the
> - * router's configured subnets).
> - * These ARP/ND packets are flooded in the switching domain as regular
> - * broadcast.
> - */
> -static void
> -build_lswitch_rport_arp_req_flow_for_unreachable_ip(const char *ips,
> -    int addr_family, struct ovn_datapath *od, uint32_t priority,
> -    struct hmap *lflows, const struct ovsdb_idl_row *stage_hint)
> -{
> -    struct ds match = DS_EMPTY_INITIALIZER;
> -
> -    arp_nd_ns_match(ips, addr_family, &match);
> -
> -    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP,
> -                            priority, ds_cstr(&match),
> -                            "outport = \""MC_FLOOD"\"; output;",
> -                            stage_hint);
> -
> -    ds_destroy(&match);
> -}
> -
>  /*
>   * Ingress table 22: Flows that forward ARP/ND requests only to the routers
>   * that own the addresses.
> @@ -7101,9 +7024,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
>          /* Check if the ovn port has a network configured on which we could
>           * expect ARP requests for the LB VIP.
>           */
> -        if (ip_parse(ip_addr, &ipv4_addr) &&
> -            lrouter_port_ipv4_reachable(op, ipv4_addr)) {
> -            build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +        if (ip_parse(ip_addr, &ipv4_addr)) {
> +            build_lswitch_rport_arp_req_flow(
>                  ip_addr, AF_INET, sw_op, sw_od, 80, lflows,
>                  stage_hint);
>          }
> @@ -7114,9 +7036,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
>          /* Check if the ovn port has a network configured on which we could
>           * expect NS requests for the LB VIP.
>           */
> -        if (ipv6_parse(ip_addr, &ipv6_addr) &&
> -            lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
> -            build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +        if (ipv6_parse(ip_addr, &ipv6_addr)) {
> +            build_lswitch_rport_arp_req_flow(
>                  ip_addr, AF_INET6, sw_op, sw_od, 80, lflows,
>                  stage_hint);
>          }
> @@ -7138,42 +7059,27 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
>           * expect ARP requests/NS for the DNAT external_ip.
>           */
>          if (nat_entry_is_v6(nat_entry)) {
> -            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
> -
>              if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) {
> -                if (lrouter_port_ipv6_reachable(op, addr)) {
> -                    build_lswitch_rport_arp_req_flow_for_reachable_ip(
> -                        nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows,
> -                        stage_hint);
> -                } else {
> -                    build_lswitch_rport_arp_req_flow_for_unreachable_ip(
> -                        nat->external_ip, AF_INET6, sw_od, 90, lflows,
> -                        stage_hint);
> -                }
> +                build_lswitch_rport_arp_req_flow(
> +                    nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows,
> +                    stage_hint);
>              }
>          } else {
> -            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
>              if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) {
> -                if (lrouter_port_ipv4_reachable(op, addr)) {
> -                    build_lswitch_rport_arp_req_flow_for_reachable_ip(
> -                        nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows,
> -                        stage_hint);
> -                } else {
> -                    build_lswitch_rport_arp_req_flow_for_unreachable_ip(
> -                        nat->external_ip, AF_INET, sw_od, 90, lflows,
> -                        stage_hint);
> -                }
> +                build_lswitch_rport_arp_req_flow(
> +                    nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows,
> +                    stage_hint);
>              }
>          }
>      }
>
>      for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
> -        build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +        build_lswitch_rport_arp_req_flow(
>              op->lrp_networks.ipv4_addrs[i].addr_s, AF_INET, sw_op, sw_od, 80,
>              lflows, stage_hint);
>      }
>      for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> -        build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +        build_lswitch_rport_arp_req_flow(
>              op->lrp_networks.ipv6_addrs[i].addr_s, AF_INET6, sw_op, sw_od, 80,
>              lflows, stage_hint);
>      }
> @@ -9665,69 +9571,6 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
>      ds_destroy(&defrag_actions);
>  }
>
> -static void
> -build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
> -                                  struct ovn_lb_vip *lb_vip,
> -                                  struct hmap *lflows,
> -                                  struct ds *match)
> -{
> -    static const char *action = "outport = \"_MC_flood\"; output;";
> -    bool ipv4 = IN6_IS_ADDR_V4MAPPED(&lb_vip->vip);
> -    ovs_be32 ipv4_addr;
> -
> -    ds_clear(match);
> -    if (ipv4) {
> -        if (!ip_parse(lb_vip->vip_str, &ipv4_addr)) {
> -            return;
> -        }
> -        ds_put_format(match, "%s && arp.op == 1 && arp.tpa == %s",
> -                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
> -    } else {
> -        ds_put_format(match, "%s && nd_ns && nd.target == %s",
> -                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
> -    }
> -
> -    struct ovn_lflow *lflow_ref = NULL;
> -    uint32_t hash = ovn_logical_flow_hash(
> -            ovn_stage_get_table(S_SWITCH_IN_L2_LKUP),
> -            ovn_stage_get_pipeline(S_SWITCH_IN_L2_LKUP), 90,
> -            ds_cstr(match), action);
> -
> -    for (size_t i = 0; i < lb->n_nb_lr; i++) {
> -        struct ovn_datapath *od = lb->nb_lr[i];
> -
> -        if (!od->is_gw_router && !od->n_l3dgw_ports) {
> -            continue;
> -        }
> -
> -        struct ovn_port *op;
> -        LIST_FOR_EACH (op, dp_node, &od->port_list) {
> -            if (!od->is_gw_router && !is_l3dgw_port(op)) {
> -                continue;
> -            }
> -
> -            struct ovn_port *peer = op->peer;
> -            if (!peer || !peer->nbsp || lsp_is_external(peer->nbsp)) {
> -                continue;
> -            }
> -
> -            if ((ipv4 && lrouter_port_ipv4_reachable(op, ipv4_addr)) ||
> -                (!ipv4 && lrouter_port_ipv6_reachable(op, &lb_vip->vip))) {
> -                continue;
> -            }
> -
> -            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
> -                continue;
> -            }
> -            lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
> -                                       S_SWITCH_IN_L2_LKUP, 90,
> -                                       ds_cstr(match), action,
> -                                       NULL, NULL, &peer->nbsp->header_,
> -                                       OVS_SOURCE_LOCATOR, hash);
> -        }
> -    }
> -}
> -
>  static void
>  build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
>                             struct shash *meter_groups, struct ds *match,
> @@ -9740,8 +9583,6 @@ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
>      for (size_t i = 0; i < lb->n_vips; i++) {
>          struct ovn_lb_vip *lb_vip = &lb->vips[i];
>
> -        build_lflows_for_unreachable_vips(lb, lb_vip, lflows, match);
> -
>          build_lrouter_nat_flows_for_lb(lb_vip, lb, &lb->vips_nb[i],
>                                         lflows, match, action,
>                                         meter_groups);
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index e2b9924b6..9d5d21316 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -4479,105 +4479,6 @@ check_lflows 0
>
>  AT_CLEANUP
>
> -OVN_FOR_EACH_NORTHD([
> -AT_SETUP([ovn -- ARP flood for unreachable addresses])
> -ovn_start
> -
> -AS_BOX([Setting up the logical network])
> -
> -# This network is the same as the one from "Router Address Propagation"
> -check ovn-nbctl ls-add sw
> -
> -check ovn-nbctl lr-add ro1
> -check ovn-nbctl lrp-add ro1 ro1-sw 00:00:00:00:00:01 10.0.0.1/24
> -check ovn-nbctl lsp-add sw sw-ro1
> -check ovn-nbctl lsp-set-type sw-ro1 router
> -check ovn-nbctl lsp-set-addresses sw-ro1 router
> -check ovn-nbctl lsp-set-options sw-ro1 router-port=ro1-sw
> -
> -check ovn-nbctl lr-add ro2
> -check ovn-nbctl lrp-add ro2 ro2-sw 00:00:00:00:00:02 20.0.0.1/24
> -check ovn-nbctl lsp-add sw sw-ro2
> -check ovn-nbctl lsp-set-type sw-ro2 router
> -check ovn-nbctl lsp-set-addresses sw-ro2 router
> -check ovn-nbctl --wait=sb lsp-set-options sw-ro2 router-port=ro2-sw
> -
> -check ovn-nbctl ls-add ls1
> -check ovn-nbctl lsp-add ls1 vm1
> -check ovn-nbctl lsp-set-addresses vm1 "00:00:00:00:01:02 192.168.1.2"
> -check ovn-nbctl lrp-add ro1 ro1-ls1 00:00:00:00:01:01 192.168.1.1/24
> -check ovn-nbctl lsp-add ls1 ls1-ro1
> -check ovn-nbctl lsp-set-type ls1-ro1 router
> -check ovn-nbctl lsp-set-addresses ls1-ro1 router
> -check ovn-nbctl lsp-set-options ls1-ro1 router-port=ro1-ls1
> -
> -check ovn-nbctl ls-add ls2
> -check ovn-nbctl lsp-add ls2 vm2
> -check ovn-nbctl lsp-set-addresses vm2 "00:00:00:00:02:02 192.168.2.2"
> -check ovn-nbctl lrp-add ro2 ro2-ls2 00:00:00:00:02:01 192.168.2.1/24
> -check ovn-nbctl lsp-add ls2 ls2-ro2
> -check ovn-nbctl lsp-set-type ls2-ro2 router
> -check ovn-nbctl lsp-set-addresses ls2-ro2 router
> -check ovn-nbctl lsp-set-options ls2-ro2 router-port=ro2-ls2
> -
> -AS_BOX([Ensure that unreachable flood flows are not installed, since no addresses are unreachable])
> -
> -AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" -c], [1], [dnl
> -0
> -])
> -
> -AS_BOX([Adding some reachable NAT addresses])
> -
> -check ovn-nbctl lr-nat-add ro1 dnat 10.0.0.100 192.168.1.100
> -check ovn-nbctl lr-nat-add ro1 snat 10.0.0.200 192.168.1.200/30
> -
> -check ovn-nbctl lr-nat-add ro2 dnat 20.0.0.100 192.168.2.100
> -check ovn-nbctl --wait=sb lr-nat-add ro2 snat 20.0.0.200 192.168.2.200/30
> -
> -AS_BOX([Ensure that unreachable flood flows are not installed, since all addresses are reachable])
> -
> -AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" -c], [1], [dnl
> -0
> -])
> -
> -AS_BOX([Adding some unreachable NAT addresses])
> -
> -check ovn-nbctl lr-nat-add ro1 dnat 30.0.0.100 192.168.1.130
> -check ovn-nbctl lr-nat-add ro1 snat 30.0.0.200 192.168.1.148/30
> -
> -check ovn-nbctl lr-nat-add ro2 dnat 40.0.0.100 192.168.2.130
> -check ovn-nbctl --wait=sb lr-nat-add ro2 snat 40.0.0.200 192.168.2.148/30
> -
> -AS_BOX([Ensure that unreachable flood flows are installed, since there are unreachable addresses])
> -
> -ovn-sbctl lflow-list
> -
> -# We expect two flows to be installed, one per connected router port on sw
> -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 -c], [0], [dnl
> -2
> -])
> -
> -# We expect that the installed flows will match the unreachable DNAT addresses only.
> -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 30.0.0.100" -c], [0], [dnl
> -1
> -])
> -
> -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 40.0.0.100" -c], [0], [dnl
> -1
> -])
> -
> -# Ensure that we do not create flows for SNAT addresses
> -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 30.0.0.200" -c], [1], [dnl
> -0
> -])
> -
> -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 40.0.0.200" -c], [1], [dnl
> -0
> -])
> -
> -AT_CLEANUP
> -])
> -
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([ovn -- LR NAT flows])
>  ovn_start
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7cfdede77..dd043987a 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -22131,9 +22131,17 @@ nd_target=fe80::200:ff:fe00:200
>  # - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs.
>  as hv1
>  AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl
> +arp_tpa=10.0.0.11
> +arp_tpa=10.0.0.111
> +arp_tpa=10.0.0.121
> +arp_tpa=10.0.0.122
>  arp_tpa=20.0.0.1
>  ])
>  AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl
> +nd_target=10::11
> +nd_target=10::111
> +nd_target=10::121
> +nd_target=10::122
>  nd_target=20::1
>  nd_target=fe80::200:1ff:fe00:0
>  ])
> @@ -22162,6 +22170,8 @@ dl_src=00:00:00:02:00:00
>  # - 00:00:01:00:00:00 - interface MAC.
>  as hv1
>  AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
> +dl_src=00:00:00:01:00:00
> +dl_src=00:00:00:02:00:00
>  dl_src=00:00:01:00:00:00
>  ])
>
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list