[ovs-dev] [PATCH v4 ovn 7/8] northd: move build_empty_lb_event_flow in build_lswitch_flows_for_lb
Lorenzo Bianconi
lorenzo.bianconi at redhat.com
Fri Jul 2 17:16:04 UTC 2021
Introduce build_lswitch_flows_for_lb routine in order to visit first
each load_balancer and then related datapath (logical switches) during
lb flow installation.
This patch allows to reduce memory footprint and cpu utilization in
ovn-northd.
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
---
northd/ovn-northd.c | 131 ++++++++++++++++++++++++--------------------
1 file changed, 71 insertions(+), 60 deletions(-)
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index f344ef7c8..2133048b1 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -5200,7 +5200,7 @@ ls_has_lb_vip(struct ovn_datapath *od)
static void
build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
- struct shash *meter_groups, struct hmap *lbs)
+ struct hmap *lbs)
{
/* Do not send ND packets to conntrack */
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
@@ -5231,71 +5231,49 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
110, lflows);
}
- bool vip_configured = false;
for (int i = 0; i < od->nbs->n_load_balancer; i++) {
struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
struct ovn_northd_lb *lb =
ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
ovs_assert(lb);
- struct ds action = DS_EMPTY_INITIALIZER;
- struct ds match = DS_EMPTY_INITIALIZER;
-
- for (size_t j = 0; j < lb->n_vips; j++) {
- struct ovn_lb_vip *lb_vip = &lb->vips[j];
-
- if (build_empty_lb_event_flow(lb_vip, nb_lb, meter_groups,
- &match, &action)) {
- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_LB, 130,
- ds_cstr(&match), ds_cstr(&action),
- &nb_lb->header_);
- }
-
- /* Ignore L4 port information in the key because fragmented packets
- * may not have L4 information. The pre-stateful table will send
- * the packet through ct() action to de-fragment. In stateful
- * table, we will eventually look at L4 information. */
+ /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
+ * packet to conntrack for defragmentation and possibly for unNATting.
+ *
+ * Send all the packets to conntrack in the ingress pipeline if the
+ * logical switch has a load balancer with VIP configured. Earlier
+ * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress
+ * pipeline if the IP destination matches the VIP. But this causes
+ * few issues when a logical switch has no ACLs configured with
+ * allow-related.
+ * To understand the issue, lets a take a TCP load balancer -
+ * 10.0.0.10:80=10.0.0.3:80.
+ * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection
+ * with the VIP - 10.0.0.10, then the packet in the ingress pipeline
+ * of 'p1' is sent to the p1's conntrack zone id and the packet is
+ * load balanced to the backend - 10.0.0.3. For the reply packet from
+ * the backend lport, it is not sent to the conntrack of backend
+ * lport's zone id. This is fine as long as the packet is valid.
+ * Suppose the backend lport sends an invalid TCP packet (like
+ * incorrect sequence number), the packet gets * delivered to the
+ * lport 'p1' without unDNATing the packet to the VIP - 10.0.0.10.
+ * And this causes the connection to be reset by the lport p1's VIF.
+ *
+ * We can't fix this issue by adding a logical flow to drop ct.inv
+ * packets in the egress pipeline since it will drop all other
+ * connections not destined to the load balancers.
+ *
+ * To fix this issue, we send all the packets to the conntrack in the
+ * ingress pipeline if a load balancer is configured. We can now
+ * add a lflow to drop ct.inv packets.
+ */
+ if (lb->n_vips) {
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
+ 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
+ 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
+ break;
}
- ds_destroy(&action);
- ds_destroy(&match);
-
- vip_configured = (vip_configured || lb->n_vips);
- }
-
- /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
- * packet to conntrack for defragmentation and possibly for unNATting.
- *
- * Send all the packets to conntrack in the ingress pipeline if the
- * logical switch has a load balancer with VIP configured. Earlier
- * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
- * if the IP destination matches the VIP. But this causes few issues when
- * a logical switch has no ACLs configured with allow-related.
- * To understand the issue, lets a take a TCP load balancer -
- * 10.0.0.10:80=10.0.0.3:80.
- * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
- * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
- * is sent to the p1's conntrack zone id and the packet is load balanced
- * to the backend - 10.0.0.3. For the reply packet from the backend lport,
- * it is not sent to the conntrack of backend lport's zone id. This is fine
- * as long as the packet is valid. Suppose the backend lport sends an
- * invalid TCP packet (like incorrect sequence number), the packet gets
- * delivered to the lport 'p1' without unDNATing the packet to the
- * VIP - 10.0.0.10. And this causes the connection to be reset by the
- * lport p1's VIF.
- *
- * We can't fix this issue by adding a logical flow to drop ct.inv packets
- * in the egress pipeline since it will drop all other connections not
- * destined to the load balancers.
- *
- * To fix this issue, we send all the packets to the conntrack in the
- * ingress pipeline if a load balancer is configured. We can now
- * add a lflow to drop ct.inv packets.
- */
- if (vip_configured) {
- ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
- 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
- ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
- 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
}
}
@@ -6911,7 +6889,7 @@ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
ls_get_acl_flags(od);
build_pre_acls(od, port_groups, lflows);
- build_pre_lb(od, lflows, meter_groups, lbs);
+ build_pre_lb(od, lflows, lbs);
build_pre_stateful(od, lflows);
build_acl_hints(od, lflows);
build_acls(od, lflows, port_groups, meter_groups);
@@ -8974,6 +8952,34 @@ next:
free(new_match);
}
+static void
+build_lswitch_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
+ struct shash *meter_groups, struct ds *match,
+ struct ds *action)
+{
+ if (!lb->n_nb_ls) {
+ return;
+ }
+
+ for (size_t i = 0; i < lb->n_vips; i++) {
+ struct ovn_lb_vip *lb_vip = &lb->vips[i];
+
+ if (!build_empty_lb_event_flow(lb_vip, lb->nlb, meter_groups,
+ match, action)) {
+ continue;
+ }
+ for (int j = 0; j < lb->n_nb_ls; j++) {
+ ovn_lflow_add_with_hint(lflows, lb->nb_ls[j],
+ S_SWITCH_IN_PRE_LB, 130, ds_cstr(match),
+ ds_cstr(action), &lb->nlb->header_);
+ }
+ /* Ignore L4 port information in the key because fragmented packets
+ * may not have L4 information. The pre-stateful table will send
+ * the packet through ct() action to de-fragment. In stateful
+ * table, we will eventually look at L4 information. */
+ }
+}
+
static void
build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
struct shash *meter_groups,
@@ -12167,6 +12173,9 @@ build_lflows_thread(void *arg)
lsi->meter_groups,
lsi->nat_entries,
&lsi->match, &lsi->actions);
+ build_lswitch_flows_for_lb(lb, lsi->lflows,
+ lsi->meter_groups,
+ &lsi->match, &lsi->actions);
}
}
for (bnum = control->id;
@@ -12336,6 +12345,8 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
lsi.nat_entries, &lsi.match,
&lsi.actions);
+ build_lswitch_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
+ &lsi.match, &lsi.actions);
}
HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) {
build_lswitch_ip_mcast_igmp_mld(igmp_group,
--
2.31.1
More information about the dev
mailing list