[ovs-dev] [PATCH v3 ovn 8/9] northd: move build_empty_lb_event_flow in build_lswitch_flows_for_lb

Dumitru Ceara dceara at redhat.com
Fri Jul 2 12:16:27 UTC 2021


On 6/30/21 11:34 AM, Lorenzo Bianconi wrote:
> Introduce build_lswitch_flows_for_lb routine in order to visit first
> each load_balancer and then related datapath (logical switches) during
> lb flow installation.
> This patch allows to reduce memory footprint and cpu utilization in
> ovn-northd.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
> ---
>  northd/ovn-northd.c | 140 ++++++++++++++++++++++++--------------------
>  1 file changed, 78 insertions(+), 62 deletions(-)
> 
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 37a13c56c..e653b0dd5 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5198,7 +5198,7 @@ ls_has_lb_vip(struct ovn_datapath *od)
>  
>  static void
>  build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> -             struct shash *meter_groups, struct hmap *lbs)
> +             struct hmap *lbs)
>  {
>      /* Do not send ND packets to conntrack */
>      ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
> @@ -5229,73 +5229,49 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>                                   110, lflows);
>      }
>  
> -    bool vip_configured = false;
>      for (int i = 0; i < od->nbs->n_load_balancer; i++) {
>          struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
>          struct ovn_northd_lb *lb =
>              ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
>          ovs_assert(lb);
>  
> -        struct ds action = DS_EMPTY_INITIALIZER;
> -        struct ds match = DS_EMPTY_INITIALIZER;
> -
> -        for (size_t j = 0; j < lb->n_vips; j++) {
> -            struct ovn_lb_vip *lb_vip = &lb->vips[j];
> -
> -            ds_clear(&action);
> -            ds_clear(&match);
> -            if (build_empty_lb_event_flow(lb_vip, nb_lb, meter_groups,
> -                                           &match, &action)) {
> -                ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_LB, 130,
> -                                        ds_cstr(&match), ds_cstr(&action),
> -                                        &nb_lb->header_);
> -            }
> -
> -            /* Ignore L4 port information in the key because fragmented packets
> -             * may not have L4 information.  The pre-stateful table will send
> -             * the packet through ct() action to de-fragment. In stateful
> -             * table, we will eventually look at L4 information. */
> +        /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
> +         * packet to conntrack for defragmentation and possibly for unNATting.
> +         *
> +         * Send all the packets to conntrack in the ingress pipeline if the
> +         * logical switch has a load balancer with VIP configured. Earlier
> +         * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress
> +         * pipeline if the IP destination matches the VIP. But this causes
> +         * few issues when a logical switch has no ACLs configured with
> +         * allow-related.
> +         * To understand the issue, lets a take a TCP load balancer -
> +         * 10.0.0.10:80=10.0.0.3:80.
> +         * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection
> +         * with the VIP - 10.0.0.10, then the packet in the ingress pipeline
> +         * of 'p1' is sent to the p1's conntrack zone id and the packet is
> +         * load balanced to the backend - 10.0.0.3. For the reply packet from
> +         * the backend lport, it is not sent to the conntrack of backend
> +         * lport's zone id. This is fine as long as the packet is valid.
> +         * Suppose the backend lport sends an invalid TCP packet (like
> +         * incorrect sequence number), the packet gets * delivered to the
> +         * lport 'p1' without unDNATing the packet to the VIP - 10.0.0.10.
> +         * And this causes the connection to be reset by the lport p1's VIF.
> +         *
> +         * We can't fix this issue by adding a logical flow to drop ct.inv
> +         * packets in the egress pipeline since it will drop all other
> +         * connections not destined to the load balancers.
> +         *
> +         * To fix this issue, we send all the packets to the conntrack in the
> +         * ingress pipeline if a load balancer is configured. We can now
> +         * add a lflow to drop ct.inv packets.
> +         */
> +        if (lb->n_vips) {
> +            ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> +                          100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> +            ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
> +                          100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> +            break;
>          }
> -        ds_destroy(&action);
> -        ds_destroy(&match);
> -
> -        vip_configured = (vip_configured || lb->n_vips);
> -    }
> -
> -    /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
> -     * packet to conntrack for defragmentation and possibly for unNATting.
> -     *
> -     * Send all the packets to conntrack in the ingress pipeline if the
> -     * logical switch has a load balancer with VIP configured. Earlier
> -     * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
> -     * if the IP destination matches the VIP. But this causes few issues when
> -     * a logical switch has no ACLs configured with allow-related.
> -     * To understand the issue, lets a take a TCP load balancer -
> -     * 10.0.0.10:80=10.0.0.3:80.
> -     * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
> -     * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
> -     * is sent to the p1's conntrack zone id and the packet is load balanced
> -     * to the backend - 10.0.0.3. For the reply packet from the backend lport,
> -     * it is not sent to the conntrack of backend lport's zone id. This is fine
> -     * as long as the packet is valid. Suppose the backend lport sends an
> -     *  invalid TCP packet (like incorrect sequence number), the packet gets
> -     * delivered to the lport 'p1' without unDNATing the packet to the
> -     * VIP - 10.0.0.10. And this causes the connection to be reset by the
> -     * lport p1's VIF.
> -     *
> -     * We can't fix this issue by adding a logical flow to drop ct.inv packets
> -     * in the egress pipeline since it will drop all other connections not
> -     * destined to the load balancers.
> -     *
> -     * To fix this issue, we send all the packets to the conntrack in the
> -     * ingress pipeline if a load balancer is configured. We can now
> -     * add a lflow to drop ct.inv packets.
> -     */
> -    if (vip_configured) {
> -        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> -                      100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> -        ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
> -                      100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
>      }
>  }
>  
> @@ -6911,7 +6887,7 @@ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
>          ls_get_acl_flags(od);
>  
>          build_pre_acls(od, port_groups, lflows);
> -        build_pre_lb(od, lflows, meter_groups, lbs);
> +        build_pre_lb(od, lflows, lbs);
>          build_pre_stateful(od, lflows);
>          build_acl_hints(od, lflows);
>          build_acls(od, lflows, port_groups, meter_groups);
> @@ -8995,6 +8971,43 @@ next:
>      free(new_match);
>  }
>  
> +static void
> +build_lswitch_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
> +                           struct shash *meter_groups)
> +{
> +    if (!lb->n_nb_ls) {
> +        return;
> +    }
> +
> +    struct ds action = DS_EMPTY_INITIALIZER;
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +
> +    for (size_t i = 0; i < lb->n_vips; i++) {
> +        struct ovn_lb_vip *lb_vip = &lb->vips[i];
> +
> +        ds_clear(&action);
> +        ds_clear(&match);
> +
> +        if (build_empty_lb_event_flow(lb_vip, lb->nlb, meter_groups,
> +                                       &match, &action)) {
> +            for (int j = 0; j < lb->n_nb_ls; j++) {
> +                ovn_lflow_add_with_hint(lflows, lb->nb_ls[j],
> +                                        S_SWITCH_IN_PRE_LB, 130,
> +                                        ds_cstr(&match), ds_cstr(&action),
> +                                        &lb->nlb->header_);
> +            }
> +            /* Ignore L4 port information in the key because fragmented packets
> +             * may not have L4 information.  The pre-stateful table will send
> +             * the packet through ct() action to de-fragment. In stateful
> +             * table, we will eventually look at L4 information. */
> +        }

To avoid extra indentation I would:

if (!build_empty_lb_event_flow(...)) {
    continue;
}
for (...) {
    ovn_lflow_add_with_hint(...)
}

> +    }
> +
> +    ds_destroy(&action);
> +    ds_destroy(&match);
> +
> +}
> +
>  static void
>  build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
>                             struct shash *meter_groups)
> @@ -12190,6 +12203,8 @@ build_lflows_thread(void *arg)
>                                                           &lsi->actions);
>                      build_lrouter_flows_for_lb(lb, lsi->lflows,
>                                                 lsi->meter_groups);
> +                    build_lswitch_flows_for_lb(lb, lsi->lflows,
> +                                               lsi->meter_groups);

We can pass the &lsi->match and &lsi->actions "scratchpads" here.

>                  }
>              }
>              for (bnum = control->id;
> @@ -12354,6 +12369,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                                                   &lsi.actions,
>                                                   &lsi.match);
>              build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.meter_groups);
> +            build_lswitch_flows_for_lb(lb, lsi.lflows, lsi.meter_groups);

Same here.

>          }
>          HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) {
>              build_lswitch_ip_mcast_igmp_mld(igmp_group,
> 



More information about the dev mailing list