[ovs-dev] [PATCH ovn] ovn-northd: Limit ARP/ND broadcast domain whenever possible.

Dumitru Ceara dceara at redhat.com
Wed Oct 23 07:14:08 UTC 2019


On Thu, Oct 17, 2019 at 4:55 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> ARP request and ND NS packets for router owned IPs were being
> flooded in the complete L2 domain (using the MC_FLOOD multicast group).
> However this creates a scaling issue in scenarios where aggregation
> logical switches are connected to more logical routers (~350). The
> logical pipelines of all routers would have to be executed before the
> packet is finally replied to by a single router, the owner of the IP
> address.
>
> This commit limits the broadcast domain by bypassing the L2 Lookup stage
> for ARP requests that will be replied by a single router. The packets
> are still flooded in the L2 domain but not on any of the other patch
> ports towards other routers connected to the switch. This restricted
> flooding is done by using a new multicast group (MC_ARP_ND_FLOOD).
>
> IPs that are owned by the routers and for which this fix applies are:
> - IP addresses configured on the router ports.
> - VIPs.
> - NAT IPs.
>
> Reported-at: https://bugzilla.redhat.com/1756945
> Reported-by: Anil Venkata <vkommadi at redhat.com>
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>

Sorry for the noise, please disregard this version.

I had to send a v2 version in order to also address ARP requests and
ND-NS packets received on localnet ports:
https://patchwork.ozlabs.org/patch/1181862/

Thanks,
Dumitru

> ---
>  lib/mcast-group-index.h |   1 +
>  northd/ovn-northd.8.xml |  16 ++++
>  northd/ovn-northd.c     | 189 +++++++++++++++++++++++++++++++++++++++++-------
>  tests/ovn.at            |   2 +-
>  4 files changed, 179 insertions(+), 29 deletions(-)
>
> diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h
> index ba995ba..06bd8b3 100644
> --- a/lib/mcast-group-index.h
> +++ b/lib/mcast-group-index.h
> @@ -27,6 +27,7 @@ enum ovn_mcast_tunnel_keys {
>
>      OVN_MCAST_FLOOD_TUNNEL_KEY = OVN_MIN_MULTICAST,
>      OVN_MCAST_UNKNOWN_TUNNEL_KEY,
> +    OVN_MCAST_ARP_ND_TUNNEL_KEY,
>      OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY,
>      OVN_MCAST_MROUTER_STATIC_TUNNEL_KEY,
>      OVN_MCAST_STATIC_TUNNEL_KEY,
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index d3e0e5e..d942469 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -699,6 +699,22 @@ nd_na_router {
>        </li>
>
>        <li>
> +        Priority-40 flows for each port connected to a logical router
> +        matching self originated GARP/ARP request/ND packets. These packets
> +        are flooded to the <code>MC_FLOOD</code> which contains all logical
> +        ports.
> +      </li>
> +
> +      <li>
> +        Priority-30 flows for each IP address/VIP/NAT address owned by a
> +        router port connected to the switch. These flows match ARP requests
> +        and ND packets for the specific IP addresses.  Matched packets are
> +        forwarded in the L3 domain only to the router that owns the IP
> +        address and flooded in the L2 domain on all ports except patch
> +        ports connected to logical routers.
> +      </li>
> +
> +      <li>
>          One priority-0 fallback flow that matches all packets and advances to
>          the next table.
>        </li>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index ea8ad7c..7537a22 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -1193,6 +1193,34 @@ ovn_port_allocate_key(struct ovn_datapath *od)
>                            1, (1u << 15) - 1, &od->port_key_hint);
>  }
>
> +/* Returns true if the logical switch port 'enabled' column is empty or
> + * set to true.  Otherwise, returns false. */
> +static bool
> +lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
> +{
> +    return !lsp->n_enabled || *lsp->enabled;
> +}
> +
> +/* Returns true only if the logical switch port 'up' column is set to true.
> + * Otherwise, if the column is not set or set to false, returns false. */
> +static bool
> +lsp_is_up(const struct nbrec_logical_switch_port *lsp)
> +{
> +    return lsp->n_up && *lsp->up;
> +}
> +
> +static bool
> +lsp_is_external(const struct nbrec_logical_switch_port *nbsp)
> +{
> +    return !strcmp(nbsp->type, "external");
> +}
> +
> +static bool
> +lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
> +{
> +    return !lrport->enabled || *lrport->enabled;
> +}
> +
>  static char *
>  chassis_redirect_name(const char *port_name)
>  {
> @@ -3018,6 +3046,10 @@ static const struct multicast_group mc_static =
>  static const struct multicast_group mc_unknown =
>      { MC_UNKNOWN, OVN_MCAST_UNKNOWN_TUNNEL_KEY };
>
> +#define MC_ARP_ND "_MC_arp_nd"
> +static const struct multicast_group mc_arp_nd =
> +    { MC_ARP_ND, OVN_MCAST_ARP_ND_TUNNEL_KEY };
> +
>  static bool
>  multicast_group_equal(const struct multicast_group *a,
>                        const struct multicast_group *b)
> @@ -3719,28 +3751,6 @@ build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
>
>  }
>
> -/* Returns true if the logical switch port 'enabled' column is empty or
> - * set to true.  Otherwise, returns false. */
> -static bool
> -lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
> -{
> -    return !lsp->n_enabled || *lsp->enabled;
> -}
> -
> -/* Returns true only if the logical switch port 'up' column is set to true.
> - * Otherwise, if the column is not set or set to false, returns false. */
> -static bool
> -lsp_is_up(const struct nbrec_logical_switch_port *lsp)
> -{
> -    return lsp->n_up && *lsp->up;
> -}
> -
> -static bool
> -lsp_is_external(const struct nbrec_logical_switch_port *nbsp)
> -{
> -    return !strcmp(nbsp->type, "external");
> -}
> -
>  static bool
>  build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
>                      struct ds *options_action, struct ds *response_action,
> @@ -5143,6 +5153,119 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
>      }
>  }
>
> +/*
> + * Ingress table 11: Flows that forward ARP/ND requests only to the routers
> + * that own the addresses. Packets are still flooded in the switching domain
> + * as regular broadcast.
> + */
> +static void
> +build_lswitch_rport_arp_flow(const char *target_address, int addr_family,
> +                             struct ovn_port *patch_op,
> +                             struct ovn_datapath *od,
> +                             uint32_t priority,
> +                             struct hmap *lflows)
> +{
> +    struct ds match   = DS_EMPTY_INITIALIZER;
> +    struct ds actions = DS_EMPTY_INITIALIZER;
> +
> +    if (addr_family == AF_INET) {
> +        ds_put_format(&match, "arp.tpa == %s && arp.op == 1", target_address);
> +    } else {
> +        ds_put_format(&match, "nd.target == %s && nd_ns", target_address);
> +    }
> +
> +    /* Send a clone of the packet to the router pipeline and flood the
> +     * original in the broadcast domain (skipping router ports). */
> +    ds_put_format(&actions,
> +                  "clone { outport = %s; output; }; "
> +                  "outport = \""MC_ARP_ND"\"; output;",
> +                  patch_op->json_key);
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, priority,
> +                  ds_cstr(&match), ds_cstr(&actions));
> +
> +    ds_destroy(&match);
> +    ds_destroy(&actions);
> +}
> +
> +/*
> + * Ingress table 11: Flows that forward ARP/ND requests only to the routers
> + * that own the addresses.
> + * Priorities:
> + * - 40: self originated GARPs that need to follow regular processing.
> + * - 30: ARP requests to router owned IPs (interface IP/LB/NAT).
> + */
> +static void
> +build_lswitch_rport_arp_responders(struct ovn_port *op,
> +                                   struct ovn_datapath *sw_od,
> +                                   struct ovn_port *sw_op,
> +                                   struct hmap *lflows)
> +{
> +    if (!op || !op->nbrp) {
> +        return;
> +    }
> +
> +    if (!lrport_is_enabled(op->nbrp)) {
> +        return;
> +    }
> +
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +
> +    /* Self originated (G)ARP requests/ND need to be flooded as usual.
> +     * Priority: 40.
> +     */
> +    ds_put_format(&match, "inport == %s && (arp.op == 1 || nd_ns)",
> +                  sw_op->json_key);
> +    ovn_lflow_add(lflows, sw_od, S_SWITCH_IN_ARP_ND_RSP, 40,
> +                  ds_cstr(&match),
> +                  "outport = \""MC_FLOOD"\"; output;");
> +
> +    ds_destroy(&match);
> +
> +    /* Forward ARP requests for IPs configured on the router only to this
> +     * router port.
> +     * Priority: 30.
> +     */
> +    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
> +        build_lswitch_rport_arp_flow(op->lrp_networks.ipv4_addrs[i].addr_s,
> +                                     AF_INET, sw_op, sw_od, 30, lflows);
> +    }
> +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> +        build_lswitch_rport_arp_flow(op->lrp_networks.ipv6_addrs[i].addr_s,
> +                                     AF_INET6, sw_op, sw_od, 30, lflows);
> +    }
> +
> +    /* Forward ARP requests to load-balancer VIPs configured on the router
> +     * only to this router port.
> +     * Priority: 30.
> +     */
> +    struct sset all_ips = SSET_INITIALIZER(&all_ips);
> +    const char *ip_address;
> +    int addr_family;
> +
> +    get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
> +
> +    SSET_FOR_EACH (ip_address, &all_ips) {
> +        build_lswitch_rport_arp_flow(ip_address, addr_family, sw_op, sw_od,
> +                                     30, lflows);
> +    }
> +    sset_destroy(&all_ips);
> +
> +    /* Forward ARP requests to NAT addresses configured on the router
> +     * only to this router port.
> +     * Priority: 30.
> +     */
> +    for (int i = 0; i < op->od->nbr->n_nat; i++) {
> +        const struct nbrec_nat *nat = op->od->nbr->nat[i];
> +
> +        if (!strcmp(nat->type, "snat")) {
> +            continue;
> +        }
> +
> +        build_lswitch_rport_arp_flow(nat->external_ip, AF_INET, sw_op, sw_od,
> +                                     30, lflows);
> +    }
> +}
> +
>  static void
>  build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>                      struct hmap *port_groups, struct hmap *lflows,
> @@ -5266,6 +5389,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>
>              free(tokstr);
>          } else {
> +
> +            /* For ports connected to logical routers add flows to bypass the
> +             * broadcast flooding of ARP/ND requests in table 17. We direct the
> +             * requests only to the router port that owns the IP address.
> +             * */
> +            if (!strcmp(op->nbsp->type, "router")) {
> +                build_lswitch_rport_arp_responders(op->peer, op->od, op,
> +                                                   lflows);
> +            }
> +
>              /*
>               * Add ARP/ND reply flows if either the
>               *  - port is up or
> @@ -5861,12 +5994,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>      ds_destroy(&actions);
>  }
>
> -static bool
> -lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
> -{
> -    return !lrport->enabled || *lrport->enabled;
> -}
> -
>  /* Returns a string of the IP address of the router port 'op' that
>   * overlaps with 'ip_s".  If one is not found, returns NULL.
>   *
> @@ -9129,6 +9256,12 @@ build_mcast_groups(struct northd_context *ctx,
>          } else if (op->nbsp && lsp_is_enabled(op->nbsp)) {
>              ovn_multicast_add(mcast_groups, &mc_flood, op);
>
> +            /* Add all non-router ports to the ARP ND L2 broadcast flood
> +             * domain entry. */
> +            if (strcmp(op->nbsp->type, "router")) {
> +                ovn_multicast_add(mcast_groups, &mc_arp_nd, op);
> +            }
> +
>              /* If this port is connected to a multicast router then add it
>               * to the MC_MROUTER_FLOOD group.
>               */
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 6bdb9e8..30b42e6 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -10719,7 +10719,7 @@ ovn-nbctl --wait=hv --timeout=3 sync
>  # Check that there is a logical flow in logical switch foo's pipeline
>  # to set the outport to rp-foo (which is expected).
>  OVS_WAIT_UNTIL([test 1 = `ovn-sbctl dump-flows foo | grep ls_in_l2_lkup | \
> -grep rp-foo | grep -v is_chassis_resident | wc -l`])
> +grep rp-foo | grep -v is_chassis_resident | grep "eth.dst" | wc -l`])
>
>  # Set the option 'reside-on-redirect-chassis' for foo
>  ovn-nbctl set logical_router_port foo options:reside-on-redirect-chassis=true
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



More information about the dev mailing list