[ovs-dev] [PATCH ovn] northd: Add lflows to send all pkts to conntrack if LB is configured on a lswitch.

numans at ovn.org numans at ovn.org
Mon Sep 7 12:43:20 UTC 2020


From: Numan Siddique <numans at ovn.org>

Prior to this patch, if a load balancer is configured on a logical switch but with no
ACLs with allow-related configured, then in the ingress pipeline only the packets
with ip.dst = VIP will be sent to conntrack using the zone id of the source logical port.

If the backend of the load balancer, sends an invalid packet (for example invalid tcp
sequence number), then such packets will be delivered to the source logical port VIF
without unDNATting. This causes the source to reset the connection.

This patch fixes this issue by sending all the packets to conntrack if a load balancer
is configured on the logical switch. Because of this, any invalid (ct.inv) packets will
be dropped in the ingress pipeline itself.

Unfortunately this impacts the performance as now there will be extra recirculations
because of ct and ct_commit (for new connections) actions.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1870359
Reported-by: Tim Rozet (trozet at redhat.com)
Signed-off-by: Numan Siddique <numans at ovn.org>
---
 northd/ovn-northd.8.xml | 14 +++---
 northd/ovn-northd.c     | 99 +++++++++++++++++++++++++++--------------
 2 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 989e3643b8..89711c5a6c 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -340,15 +340,12 @@
       it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic
       to the next table. If load balancing rules with virtual IP addresses
       (and ports) are configured in <code>OVN_Northbound</code> database for a
-      logical switch datapath, a priority-100 flow is added for each configured
-      virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is
-      <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6
-      <var>VIPs</var>, the match is <code>ip &&
-      ip6.dst == <var>VIP</var></code>. The flow sets an action
+      logical switch datapath, a priority-100 flow is added with the match
+      <code>ip</code> to match on IP packets and sets the action
       <code>reg0[0] = 1; next;</code> to act as a hint for table
       <code>Pre-stateful</code> to send IP packets to the connection tracker
-      for packet de-fragmentation before eventually advancing to ingress table
-      <code>LB</code>.
+      for packet de-fragmentation before eventually advancing to ingress
+      table <code>LB</code>.
       If controller_event has been enabled and load balancing rules with
       empty backends have been added in <code>OVN_Northbound</code>, a 130 flow
       is added to trigger ovn-controller events whenever the chassis receives a
@@ -430,7 +427,8 @@
     <p>
       This table also contains a priority 0 flow with action
       <code>next;</code>, so that ACLs allow packets by default.  If the
-      logical datapath has a statetful ACL, the following flows will
+      logical datapath has a statetful ACL or a load balancer with VIP
+      configured, the following flows will
       also be added:
     </p>
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3de71612b8..c322558051 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
     free(action);
 }
 
+static bool
+has_lb_vip(struct ovn_datapath *od, struct hmap *lbs)
+{
+    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
+        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
+        struct ovn_lb *lb =
+            ovn_lb_find(lbs, &nb_lb->header_.uuid);
+        ovs_assert(lb);
+
+        if (lb->n_vips) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void
 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
              struct shash *meter_groups, struct hmap *lbs)
@@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
                                  110, lflows);
     }
 
-    struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
-    struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
     bool vip_configured = false;
     for (int i = 0; i < od->nbs->n_load_balancer; i++) {
         struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
@@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
 
         for (size_t j = 0; j < lb->n_vips; j++) {
             struct lb_vip *lb_vip = &lb->vips[j];
-            if (lb_vip->addr_family == AF_INET) {
-                sset_add(&all_ips_v4, lb_vip->vip);
-            } else {
-                sset_add(&all_ips_v6, lb_vip->vip);
-            }
-
             build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb,
                                       S_SWITCH_IN_PRE_LB, meter_groups);
 
@@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
     }
 
     /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
-     * packet to conntrack for defragmentation. */
-    const char *ip_address;
-    SSET_FOR_EACH (ip_address, &all_ips_v4) {
-        char *match = xasprintf("ip && ip4.dst == %s", ip_address);
-        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
-                      100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
-        free(match);
-    }
-
-    SSET_FOR_EACH (ip_address, &all_ips_v6) {
-        char *match = xasprintf("ip && ip6.dst == %s", ip_address);
-        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
-                      100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
-        free(match);
-    }
-
-    sset_destroy(&all_ips_v4);
-    sset_destroy(&all_ips_v6);
-
+     * packet to conntrack for defragmentation.
+     *
+     * Send all the packets to conntrack in the ingress pipeline if the
+     * logical switch has a load balancer with VIP configured. Earlier
+     * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
+     * if the IP destination matches the VIP. But this causes few issues when
+     * a logical switch has no ACLs configured with allow-related.
+     * To understand the issue, lets a take a TCP load balancer -
+     * 10.0.0.10:80=10.0.0.3:80.
+     * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
+     * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
+     * is sent to the p1's conntrack zone id and the packet is load balanced
+     * to the backend - 10.0.0.3. For the reply packet from the backend lport,
+     * it is not sent to the conntrack of backend lport's zone id. This is fine
+     * as long as the packet is valid. Suppose the backend lport sends an
+     *  invalid TCP packet (like incorrect sequence number), the packet gets
+     * delivered to the lport 'p1' without unDNATing the packet to the
+     * VIP - 10.0.0.10. And this causes the connection to be reset by the
+     * lport p1's VIF.
+     *
+     * We can't fix this issue by adding a logical flow to drop ct.inv packets
+     * in the egress pipeline since it will drop all other connections not
+     * destined to the load balancers.
+     *
+     * To fix this issue, we send all the packets to the conntrack in the
+     * ingress pipeline if a load balancer is configured. We can now
+     * add a lflow to drop ct.inv packets.
+     */
     if (vip_configured) {
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
+                      100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
                       100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
     }
@@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
 
 static void
 build_acls(struct ovn_datapath *od, struct hmap *lflows,
-           struct hmap *port_groups)
+           struct hmap *port_groups, struct hmap *lbs)
 {
-    bool has_stateful = has_stateful_acl(od);
+    bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs);
 
     /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
      * default.  A related rule at priority 1 is added below if there
@@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
 }
 
 static void
-build_lb(struct ovn_datapath *od, struct hmap *lflows)
+build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
 {
     /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
      * default.  */
     ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
     ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
 
-    if (od->nbs->load_balancer) {
+    if (od->nbs->n_load_balancer) {
         for (size_t i = 0; i < od->n_router_ports; i++) {
             skip_port_from_conntrack(od, od->router_ports[i],
                                      S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
                                      UINT16_MAX, lflows);
         }
+    }
+
+    bool vip_configured = false;
+    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
+        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
+        struct ovn_lb *lb =
+            ovn_lb_find(lbs, &nb_lb->header_.uuid);
+        ovs_assert(lb);
+
+        vip_configured = (vip_configured || lb->n_vips);
+    }
+
+    if (vip_configured) {
         /* Ingress and Egress LB Table (Priority 65534).
          *
          * Send established traffic through conntrack for just NAT. */
@@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         build_pre_acls(od, lflows);
         build_pre_lb(od, lflows, meter_groups, lbs);
         build_pre_stateful(od, lflows);
-        build_acls(od, lflows, port_groups);
+        build_acls(od, lflows, port_groups, lbs);
         build_qos(od, lflows);
-        build_lb(od, lflows);
+        build_lb(od, lflows, lbs);
         build_stateful(od, lflows, lbs);
     }
 
-- 
2.26.2



More information about the dev mailing list