[ovs-dev] [PATCH v3 ovn 8/9] northd: move build_empty_lb_event_flow in build_lswitch_flows_for_lb
Dumitru Ceara
dceara at redhat.com
Fri Jul 2 12:16:27 UTC 2021
On 6/30/21 11:34 AM, Lorenzo Bianconi wrote:
> Introduce build_lswitch_flows_for_lb routine in order to visit first
> each load_balancer and then related datapath (logical switches) during
> lb flow installation.
> This patch allows to reduce memory footprint and cpu utilization in
> ovn-northd.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
> ---
> northd/ovn-northd.c | 140 ++++++++++++++++++++++++--------------------
> 1 file changed, 78 insertions(+), 62 deletions(-)
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 37a13c56c..e653b0dd5 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5198,7 +5198,7 @@ ls_has_lb_vip(struct ovn_datapath *od)
>
> static void
> build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> - struct shash *meter_groups, struct hmap *lbs)
> + struct hmap *lbs)
> {
> /* Do not send ND packets to conntrack */
> ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
> @@ -5229,73 +5229,49 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> 110, lflows);
> }
>
> - bool vip_configured = false;
> for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> struct ovn_northd_lb *lb =
> ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
> ovs_assert(lb);
>
> - struct ds action = DS_EMPTY_INITIALIZER;
> - struct ds match = DS_EMPTY_INITIALIZER;
> -
> - for (size_t j = 0; j < lb->n_vips; j++) {
> - struct ovn_lb_vip *lb_vip = &lb->vips[j];
> -
> - ds_clear(&action);
> - ds_clear(&match);
> - if (build_empty_lb_event_flow(lb_vip, nb_lb, meter_groups,
> - &match, &action)) {
> - ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_LB, 130,
> - ds_cstr(&match), ds_cstr(&action),
> - &nb_lb->header_);
> - }
> -
> - /* Ignore L4 port information in the key because fragmented packets
> - * may not have L4 information. The pre-stateful table will send
> - * the packet through ct() action to de-fragment. In stateful
> - * table, we will eventually look at L4 information. */
> + /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
> + * packet to conntrack for defragmentation and possibly for unNATting.
> + *
> + * Send all the packets to conntrack in the ingress pipeline if the
> + * logical switch has a load balancer with VIP configured. Earlier
> + * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress
> + * pipeline if the IP destination matches the VIP. But this causes
> + * few issues when a logical switch has no ACLs configured with
> + * allow-related.
> + * To understand the issue, lets a take a TCP load balancer -
> + * 10.0.0.10:80=10.0.0.3:80.
> + * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection
> + * with the VIP - 10.0.0.10, then the packet in the ingress pipeline
> + * of 'p1' is sent to the p1's conntrack zone id and the packet is
> + * load balanced to the backend - 10.0.0.3. For the reply packet from
> + * the backend lport, it is not sent to the conntrack of backend
> + * lport's zone id. This is fine as long as the packet is valid.
> + * Suppose the backend lport sends an invalid TCP packet (like
> + * incorrect sequence number), the packet gets * delivered to the
> + * lport 'p1' without unDNATing the packet to the VIP - 10.0.0.10.
> + * And this causes the connection to be reset by the lport p1's VIF.
> + *
> + * We can't fix this issue by adding a logical flow to drop ct.inv
> + * packets in the egress pipeline since it will drop all other
> + * connections not destined to the load balancers.
> + *
> + * To fix this issue, we send all the packets to the conntrack in the
> + * ingress pipeline if a load balancer is configured. We can now
> + * add a lflow to drop ct.inv packets.
> + */
> + if (lb->n_vips) {
> + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> + 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
> + 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> + break;
> }
> - ds_destroy(&action);
> - ds_destroy(&match);
> -
> - vip_configured = (vip_configured || lb->n_vips);
> - }
> -
> - /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
> - * packet to conntrack for defragmentation and possibly for unNATting.
> - *
> - * Send all the packets to conntrack in the ingress pipeline if the
> - * logical switch has a load balancer with VIP configured. Earlier
> - * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
> - * if the IP destination matches the VIP. But this causes few issues when
> - * a logical switch has no ACLs configured with allow-related.
> - * To understand the issue, lets a take a TCP load balancer -
> - * 10.0.0.10:80=10.0.0.3:80.
> - * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
> - * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
> - * is sent to the p1's conntrack zone id and the packet is load balanced
> - * to the backend - 10.0.0.3. For the reply packet from the backend lport,
> - * it is not sent to the conntrack of backend lport's zone id. This is fine
> - * as long as the packet is valid. Suppose the backend lport sends an
> - * invalid TCP packet (like incorrect sequence number), the packet gets
> - * delivered to the lport 'p1' without unDNATing the packet to the
> - * VIP - 10.0.0.10. And this causes the connection to be reset by the
> - * lport p1's VIF.
> - *
> - * We can't fix this issue by adding a logical flow to drop ct.inv packets
> - * in the egress pipeline since it will drop all other connections not
> - * destined to the load balancers.
> - *
> - * To fix this issue, we send all the packets to the conntrack in the
> - * ingress pipeline if a load balancer is configured. We can now
> - * add a lflow to drop ct.inv packets.
> - */
> - if (vip_configured) {
> - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> - 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
> - 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
> }
> }
>
> @@ -6911,7 +6887,7 @@ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
> ls_get_acl_flags(od);
>
> build_pre_acls(od, port_groups, lflows);
> - build_pre_lb(od, lflows, meter_groups, lbs);
> + build_pre_lb(od, lflows, lbs);
> build_pre_stateful(od, lflows);
> build_acl_hints(od, lflows);
> build_acls(od, lflows, port_groups, meter_groups);
> @@ -8995,6 +8971,43 @@ next:
> free(new_match);
> }
>
> +static void
> +build_lswitch_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
> + struct shash *meter_groups)
> +{
> + if (!lb->n_nb_ls) {
> + return;
> + }
> +
> + struct ds action = DS_EMPTY_INITIALIZER;
> + struct ds match = DS_EMPTY_INITIALIZER;
> +
> + for (size_t i = 0; i < lb->n_vips; i++) {
> + struct ovn_lb_vip *lb_vip = &lb->vips[i];
> +
> + ds_clear(&action);
> + ds_clear(&match);
> +
> + if (build_empty_lb_event_flow(lb_vip, lb->nlb, meter_groups,
> + &match, &action)) {
> + for (int j = 0; j < lb->n_nb_ls; j++) {
> + ovn_lflow_add_with_hint(lflows, lb->nb_ls[j],
> + S_SWITCH_IN_PRE_LB, 130,
> + ds_cstr(&match), ds_cstr(&action),
> + &lb->nlb->header_);
> + }
> + /* Ignore L4 port information in the key because fragmented packets
> + * may not have L4 information. The pre-stateful table will send
> + * the packet through ct() action to de-fragment. In stateful
> + * table, we will eventually look at L4 information. */
> + }
To avoid extra indentation I would:
if (!build_empty_lb_event_flow(...)) {
continue;
}
for (...) {
ovn_lflow_add_with_hint(...)
}
> + }
> +
> + ds_destroy(&action);
> + ds_destroy(&match);
> +
> +}
> +
> static void
> build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
> struct shash *meter_groups)
> @@ -12190,6 +12203,8 @@ build_lflows_thread(void *arg)
> &lsi->actions);
> build_lrouter_flows_for_lb(lb, lsi->lflows,
> lsi->meter_groups);
> + build_lswitch_flows_for_lb(lb, lsi->lflows,
> + lsi->meter_groups);
We can pass the &lsi->match and &lsi->actions "scratchpads" here.
> }
> }
> for (bnum = control->id;
> @@ -12354,6 +12369,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
> &lsi.actions,
> &lsi.match);
> build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.meter_groups);
> + build_lswitch_flows_for_lb(lb, lsi.lflows, lsi.meter_groups);
Same here.
> }
> HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) {
> build_lswitch_ip_mcast_igmp_mld(igmp_group,
>
More information about the dev
mailing list