[ovs-dev] [PATCH ovn] northd: Add lflows to send all pkts to conntrack if LB is configured on a lswitch.
Mark Michelson
mmichels at redhat.com
Thu Sep 10 17:09:06 UTC 2020
On 9/7/20 8:43 AM, numans at ovn.org wrote:
> From: Numan Siddique <numans at ovn.org>
>
> Prior to this patch, if a load balancer is configured on a logical switch but with no
> ACLs with allow-related configured, then in the ingress pipeline only the packets
> with ip.dst = VIP will be sent to conntrack using the zone id of the source logical port.
>
> If the backend of the load balancer, sends an invalid packet (for example invalid tcp
> sequence number), then such packets will be delivered to the source logical port VIF
> without unDNATting. This causes the source to reset the connection.
>
> This patch fixes this issue by sending all the packets to conntrack if a load balancer
> is configured on the logical switch. Because of this, any invalid (ct.inv) packets will
> be dropped in the ingress pipeline itself.
>
> Unfortunately this impacts the performance as now there will be extra recirculations
> because of ct and ct_commit (for new connections) actions.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1870359
> Reported-by: Tim Rozet (trozet at redhat.com)
> Signed-off-by: Numan Siddique <numans at ovn.org>
> ---
> northd/ovn-northd.8.xml | 14 +++---
> northd/ovn-northd.c | 99 +++++++++++++++++++++++++++--------------
> 2 files changed, 72 insertions(+), 41 deletions(-)
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 989e3643b8..89711c5a6c 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -340,15 +340,12 @@
> it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic
> to the next table. If load balancing rules with virtual IP addresses
> (and ports) are configured in <code>OVN_Northbound</code> database for a
> - logical switch datapath, a priority-100 flow is added for each configured
> - virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is
> - <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6
> - <var>VIPs</var>, the match is <code>ip &&
> - ip6.dst == <var>VIP</var></code>. The flow sets an action
> + logical switch datapath, a priority-100 flow is added with the match
> + <code>ip</code> to match on IP packets and sets the action
> <code>reg0[0] = 1; next;</code> to act as a hint for table
> <code>Pre-stateful</code> to send IP packets to the connection tracker
> - for packet de-fragmentation before eventually advancing to ingress table
> - <code>LB</code>.
> + for packet de-fragmentation before eventually advancing to ingress
> + table <code>LB</code>.
> If controller_event has been enabled and load balancing rules with
> empty backends have been added in <code>OVN_Northbound</code>, a 130 flow
> is added to trigger ovn-controller events whenever the chassis receives a
> @@ -430,7 +427,8 @@
> <p>
> This table also contains a priority 0 flow with action
> <code>next;</code>, so that ACLs allow packets by default. If the
> - logical datapath has a statetful ACL, the following flows will
> + logical datapath has a statetful ACL or a load balancer with VIP
> + configured, the following flows will
> also be added:
> </p>
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 3de71612b8..c322558051 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
> free(action);
> }
>
> +static bool
> +has_lb_vip(struct ovn_datapath *od, struct hmap *lbs)
> +{
> + for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> + struct ovn_lb *lb =
> + ovn_lb_find(lbs, &nb_lb->header_.uuid);
> + ovs_assert(lb);
> +
> + if (lb->n_vips) {
> + return true;
> + }
I'm not sure why you need to look up the ovn_lb here. Why not just
return true if smap_count(&nb_lb->vips) > 0 ?
lb is guaranteed to be found in the lbs hmap, and lb->n_vips is always
set to smap_count(&nb_lb->vips) (see ovn_lb_create).
> + }
> +
> + return false;
> +}
> +
> static void
> build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> struct shash *meter_groups, struct hmap *lbs)
> @@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> 110, lflows);
> }
>
> - struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
> - struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
> bool vip_configured = false;
> for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> @@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>
> for (size_t j = 0; j < lb->n_vips; j++) {
> struct lb_vip *lb_vip = &lb->vips[j];
> - if (lb_vip->addr_family == AF_INET) {
> - sset_add(&all_ips_v4, lb_vip->vip);
> - } else {
> - sset_add(&all_ips_v6, lb_vip->vip);
> - }
> -
> build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb,
> S_SWITCH_IN_PRE_LB, meter_groups);
>
> @@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> }
>
> /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
> - * packet to conntrack for defragmentation. */
> - const char *ip_address;
> - SSET_FOR_EACH (ip_address, &all_ips_v4) {
> - char *match = xasprintf("ip && ip4.dst == %s", ip_address);
> - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> - free(match);
> - }
> -
> - SSET_FOR_EACH (ip_address, &all_ips_v6) {
> - char *match = xasprintf("ip && ip6.dst == %s", ip_address);
> - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> - free(match);
> - }
> -
> - sset_destroy(&all_ips_v4);
> - sset_destroy(&all_ips_v6);
> -
> + * packet to conntrack for defragmentation.
> + *
> + * Send all the packets to conntrack in the ingress pipeline if the
> + * logical switch has a load balancer with VIP configured. Earlier
> + * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
> + * if the IP destination matches the VIP. But this causes few issues when
> + * a logical switch has no ACLs configured with allow-related.
> + * To understand the issue, lets a take a TCP load balancer -
> + * 10.0.0.10:80=10.0.0.3:80.
> + * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
> + * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
> + * is sent to the p1's conntrack zone id and the packet is load balanced
> + * to the backend - 10.0.0.3. For the reply packet from the backend lport,
> + * it is not sent to the conntrack of backend lport's zone id. This is fine
> + * as long as the packet is valid. Suppose the backend lport sends an
> + * invalid TCP packet (like incorrect sequence number), the packet gets
> + * delivered to the lport 'p1' without unDNATing the packet to the
> + * VIP - 10.0.0.10. And this causes the connection to be reset by the
> + * lport p1's VIF.
> + *
> + * We can't fix this issue by adding a logical flow to drop ct.inv packets
> + * in the egress pipeline since it will drop all other connections not
> + * destined to the load balancers.
> + *
> + * To fix this issue, we send all the packets to the conntrack in the
> + * ingress pipeline if a load balancer is configured. We can now
> + * add a lflow to drop ct.inv packets.
> + */
> if (vip_configured) {
> + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> + 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
> 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> }
> @@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
>
> static void
> build_acls(struct ovn_datapath *od, struct hmap *lflows,
> - struct hmap *port_groups)
> + struct hmap *port_groups, struct hmap *lbs)
> {
> - bool has_stateful = has_stateful_acl(od);
> + bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs);
>
> /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
> * default. A related rule at priority 1 is added below if there
> @@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
> }
>
> static void
> -build_lb(struct ovn_datapath *od, struct hmap *lflows)
> +build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
> {
> /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
> * default. */
> ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
> ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
>
> - if (od->nbs->load_balancer) {
> + if (od->nbs->n_load_balancer) {
> for (size_t i = 0; i < od->n_router_ports; i++) {
> skip_port_from_conntrack(od, od->router_ports[i],
> S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
> UINT16_MAX, lflows);
> }
> + }
> +
> + bool vip_configured = false;
> + for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> + struct ovn_lb *lb =
> + ovn_lb_find(lbs, &nb_lb->header_.uuid);
> + ovs_assert(lb);
> +
> + vip_configured = (vip_configured || lb->n_vips);
As Dumitru stated, you can use the new has_lb_vip() function here, but
it's another instance where ovn_lb_find() seems unnecessary.
> + }
> +
> + if (vip_configured) {
> /* Ingress and Egress LB Table (Priority 65534).
> *
> * Send established traffic through conntrack for just NAT. */
> @@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
> build_pre_acls(od, lflows);
> build_pre_lb(od, lflows, meter_groups, lbs);
> build_pre_stateful(od, lflows);
> - build_acls(od, lflows, port_groups);
> + build_acls(od, lflows, port_groups, lbs);
> build_qos(od, lflows);
> - build_lb(od, lflows);
> + build_lb(od, lflows, lbs);
> build_stateful(od, lflows, lbs);
> }
>
>
More information about the dev
mailing list