[ovs-dev] [PATCH v2 ovn 1/2] northd: refactor unreachable IPs lb flows

Mark Michelson mmichels at redhat.com
Wed Aug 25 20:55:57 UTC 2021


Hi Lorenzo, I discovered some bugs I missed in my first pass-through. 
Sorry for missing them the first time around. See below.

On 8/25/21 7:58 AM, Lorenzo Bianconi wrote:
> Refactor unreachable IPs for vip load-balancers inverting the logic used
> during the lb flow creation in order to visit lb first and then related
> datapath/ovn_ports. This is a preliminary patch to avoid recomputing
> flow hash and lflow lookup when not necessary.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
> ---
>   northd/ovn-northd.c | 138 ++++++++++++++++++++++++++++++++------------
>   1 file changed, 102 insertions(+), 36 deletions(-)
> 
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index af413aba4..f4b5a4a61 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4400,6 +4400,26 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
>   }
>   
>   /* Adds a row with the specified contents to the Logical_Flow table. */
> +static void
> +ovn_lflow_add_at_with_hash(struct hmap *lflow_map, struct ovn_datapath *od,
> +                           enum ovn_stage stage, uint16_t priority,
> +                           const char *match, const char *actions,
> +                           const char *io_port, const char *ctrl_meter,
> +                           const struct ovsdb_idl_row *stage_hint,
> +                           const char *where, uint32_t hash)
> +{
> +    ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
> +    if (use_logical_dp_groups && use_parallel_build) {
> +        lock_hash_row(&lflow_locks, hash);
> +        do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> +                         actions, io_port, stage_hint, where, ctrl_meter);
> +        unlock_hash_row(&lflow_locks, hash);
> +    } else {
> +        do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> +                         actions, io_port, stage_hint, where, ctrl_meter);
> +    }
> +}
> +
>   static void
>   ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
>                    enum ovn_stage stage, uint16_t priority,
> @@ -4407,24 +4427,14 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
>                    const char *ctrl_meter,
>                    const struct ovsdb_idl_row *stage_hint, const char *where)
>   {
> -    ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
> -
>       uint32_t hash;
>   
>       hash = ovn_logical_flow_hash(ovn_stage_get_table(stage),
>                                    ovn_stage_get_pipeline_name(stage),
>                                    priority, match,
>                                    actions);
> -
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        lock_hash_row(&lflow_locks, hash);
> -        do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> -                         actions, io_port, stage_hint, where, ctrl_meter);
> -        unlock_hash_row(&lflow_locks, hash);
> -    } else {
> -        do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> -                         actions, io_port, stage_hint, where, ctrl_meter);
> -    }
> +    ovn_lflow_add_at_with_hash(lflow_map, od, stage, priority, match, actions,
> +                               io_port, ctrl_meter, stage_hint, where, hash);
>   }
>   
>   /* Adds a row with the specified contents to the Logical_Flow table. */
> @@ -6879,16 +6889,11 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
>           /* Check if the ovn port has a network configured on which we could
>            * expect ARP requests for the LB VIP.
>            */
> -        if (ip_parse(ip_addr, &ipv4_addr)) {
> -            if (lrouter_port_ipv4_reachable(op, ipv4_addr)) {
> -                build_lswitch_rport_arp_req_flow_for_reachable_ip(
> -                    ip_addr, AF_INET, sw_op, sw_od, 80, lflows,
> -                    stage_hint);
> -            } else {
> -                build_lswitch_rport_arp_req_flow_for_unreachable_ip(
> -                        ip_addr, AF_INET, sw_od, 90, lflows,
> -                        stage_hint);
> -            }
> +        if (ip_parse(ip_addr, &ipv4_addr) &&
> +            lrouter_port_ipv4_reachable(op, ipv4_addr)) {
> +            build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +                ip_addr, AF_INET, sw_op, sw_od, 80, lflows,
> +                stage_hint);
>           }
>       }
>       SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v6) {
> @@ -6897,16 +6902,11 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
>           /* Check if the ovn port has a network configured on which we could
>            * expect NS requests for the LB VIP.
>            */
> -        if (ipv6_parse(ip_addr, &ipv6_addr)) {
> -            if (lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
> -                build_lswitch_rport_arp_req_flow_for_reachable_ip(
> -                    ip_addr, AF_INET6, sw_op, sw_od, 80, lflows,
> -                    stage_hint);
> -            } else {
> -                build_lswitch_rport_arp_req_flow_for_unreachable_ip(
> -                    ip_addr, AF_INET6, sw_od, 90, lflows,
> -                    stage_hint);
> -            }
> +        if (ipv6_parse(ip_addr, &ipv6_addr) &&
> +            lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
> +            build_lswitch_rport_arp_req_flow_for_reachable_ip(
> +                ip_addr, AF_INET6, sw_op, sw_od, 80, lflows,
> +                stage_hint);
>           }
>       }
>   
> @@ -9441,9 +9441,72 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
>       ds_destroy(&defrag_actions);
>   }
>   
> +static void
> +build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
> +                                  struct ovn_lb_vip *lb_vip,
> +                                  struct hmap *lflows,
> +                                  struct hmap *ports,
> +                                  struct ds *match)
> +{
> +    static const char *action = "outport = _MC_flood; output;";
> +    bool ipv4 = IN6_IS_ADDR_V4MAPPED(&lb_vip->vip);
> +    ovs_be32 ipv4_addr;
> +
> +    ds_clear(match);
> +    if (ipv4) {
> +        if (!ip_parse(lb_vip->vip_str, &ipv4_addr)) {
> +            return;
> +        }
> +        ds_put_format(match, "%s && arp.op == 1 && arp.tpa == %s",
> +                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
> +    } else {
> +        ds_put_format(match, "%s && nd_ns && nd.target == %s",
> +                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
> +    }
> +
> +    uint32_t hash = ovn_logical_flow_hash(
> +            ovn_stage_get_table(S_SWITCH_IN_L2_LKUP),
> +            ovn_stage_get_pipeline_name(S_SWITCH_IN_L2_LKUP), 90,
> +            ds_cstr(match), action);
> +
> +    for (size_t i = 0; i < lb->n_nb_lr; i++) {
> +        struct ovn_datapath *od = lb->nb_lr[i];

Let's establish here that od is a logical router. This will be the basis 
for my comments for the rest of this review.

> +        if (!od->is_gw_router && !od->n_l3dgw_ports) {
> +            continue;
> +        }
> +
> +        for (size_t j = 0; j < od->n_router_ports; j++) {

od->router_ports and od->n_router_ports are only used by logical 
switches. Each logical switch port of type "router" is included in this 
array. In other words, od->router_ports is a list of logical switch 
ports that are connected to logical routers. So I think this means that 
this for loop is a no-op.

I think the appropriate change is to use od->nbr->n_ports here...

> +            struct ovn_port *op = ovn_port_get_peer(ports,
> +                                                    od->router_ports[j]);

... and use od->nbr->ports[j] here.

Also, ovn_port_get_peer() only works for logical switch ports (it should 
probably be renamed), so this will return NULL every time if a logical 
router port is passed in.

> +            if (!op) {
> +                continue;
> +            }
> +
> +            if (!od->is_gw_router  && !is_l3dgw_port(op)) {

You already checked od->is_gw_router outside this loop, so you can 
remove the redundant check here.

Also, this code is hurting my brain a bit. In the outer loop, "od" is a 
logical router. We then iterate over od's ports and set "op" to the 
*peer* of the current port. So "op" should presumably be a logical 
switch port. So testing is_l3dgw_port(op) should always return false, right?

> +                continue;
> +            }
> +
> +            struct ovn_port *peer = op->peer;
> +            if (!peer || !peer->nbsp || lsp_is_external(peer->nbsp)) {
> +                continue;
> +            }

Now we set peer to op->peer. Since we already know that op is the peer 
of the logical router port, then op->peer should just be the logical 
router port again. Checks for peer->nbsp and lsp_is_external(peer->nbsp) 
will always be false.

> +
> +            if ((ipv4 && lrouter_port_ipv4_reachable(op, ipv4_addr)) ||
> +                (!ipv4 && lrouter_port_ipv6_reachable(op, &lb_vip->vip))) {
> +                continue;
> +            }
> +            ovn_lflow_add_at_with_hash(lflows, peer->od,
> +                                       S_SWITCH_IN_L2_LKUP, 90,

And then finally here, since peer is a logical router port, adding the 
lflow to S_SWITCH_IN_L2_LKUP table doesn't work.

I think the issue here is that "op" shouldn't be set to the peer of the 
logical router port. Instead, I think it should be set directly to the 
logical router port. If you do this, then the rest of the logic in this 
loop should work correctly.

> +                                       ds_cstr(match), action,
> +                                       NULL, NULL, &peer->nbsp->header_,
> +                                       OVS_SOURCE_LOCATOR, hash);
> +        }
> +    }
> +}
> +
>   static void
>   build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
> -                           struct shash *meter_groups,
> +                           struct hmap *ports, struct shash *meter_groups,
>                              struct ds *match, struct ds *action)
>   {
>       if (!lb->n_nb_lr) {
> @@ -9453,6 +9516,8 @@ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
>       for (size_t i = 0; i < lb->n_vips; i++) {
>           struct ovn_lb_vip *lb_vip = &lb->vips[i];
>   
> +        build_lflows_for_unreachable_vips(lb, lb_vip, lflows, ports, match);
> +
>           build_lrouter_nat_flows_for_lb(lb_vip, lb, &lb->vips_nb[i],
>                                          lflows, match, action,
>                                          meter_groups);
> @@ -12778,7 +12843,7 @@ build_lflows_thread(void *arg)
>                       build_lrouter_defrag_flows_for_lb(lb, lsi->lflows,
>                                                         &lsi->match);
>                       build_lrouter_flows_for_lb(lb, lsi->lflows,
> -                                               lsi->meter_groups,
> +                                               lsi->ports, lsi->meter_groups,
>                                                  &lsi->match, &lsi->actions);
>                       build_lswitch_flows_for_lb(lb, lsi->lflows,
>                                                  lsi->meter_groups,
> @@ -12947,8 +13012,9 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                                                    &lsi.actions,
>                                                    &lsi.match);
>               build_lrouter_defrag_flows_for_lb(lb, lsi.lflows, &lsi.match);
> -            build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
> -                                       &lsi.match, &lsi.actions);
> +            build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.ports,
> +                                       lsi.meter_groups, &lsi.match,
> +                                       &lsi.actions);
>               build_lswitch_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
>                                          &lsi.match, &lsi.actions);
>           }
> 



More information about the dev mailing list