[ovs-dev] [RFC 7/8] ovn-northd: Introduce the loadbalancing table.

Gurucharan Shetty guru at ovn.org
Mon Feb 29 06:33:23 UTC 2016


Invalid packets get dropped. Established connectections
need to be sent through conntrack with just "nat" so
that the VIP is changed to a loadbalanced IP address (by
setting reg1 = 2). New connections need to be loadbalanced
to one of the destination IP addresses (by setting reg1 = 3).

This introduces 2 OVN lflow actions.
1. ct_nat. This is used to send packets to conntrack to NAT
them for already established connections.
2. ct_lb($ips). This is used to choose one of the $ips as
destination ip address for new connections that need to
be loadbalanced.

Signed-off-by: Gurucharan Shetty <guru at ovn.org>
---
 ovn/northd/ovn-northd.8.xml | 68 ++++++++++++++++++++++++++++++-----
 ovn/northd/ovn-northd.c     | 86 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 140 insertions(+), 14 deletions(-)

diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index 3117b9a..4c476ee 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -218,15 +218,58 @@
       </li>
     </ul>
 
-    <h3>Ingress Table 5: STATEFUL</h3>
+    <h3>Ingress Table 5: loadbalancer</h3>
 
     <p>
-      It contains a priority-0 flow that simply moves traffic to table 5.
-      A priority-100 flow commits packets to connection tracker based on a hint
-      provided by the previous tables (with a match for reg1 == 1).
+      It contains a priority-0 flow that simply moves traffic to table 6.
     </p>
 
-    <h3>Ingress Table 6: Destination Lookup</h3>
+    <ul>
+      <li>
+        For any established traffic, a priority-65535 flow that sets
+        'reg1 = 2' as a hint for STATEFUL table to send the packet
+        through connection tracker to NAT it.
+      </li>
+
+      <li>
+        A priority-65535 flow that drops all traffic marked by the
+        connection tracker as invalid.
+      </li>
+
+      <li>
+        A priority-100 flow that sets 'reg1 = 3' as a hint for STATEFUL
+        table to choose one of the destination endpoints associated with
+        a VIP when the destination IP address of the packet is that VIP.
+      </li>
+    </ul>
+
+    <h3>Ingress Table 6: STATEFUL</h3>
+
+    <p>
+      It contains a priority-0 flow that simply moves traffic to table 6.
+    </p>
+
+    <ul>
+      <li>
+        A priority-100 flow commits packets to connection tracker based on a
+        hint provided by the previous tables (with a match for reg1 == 1).
+      </li>
+
+      <li>
+        A priority-100 flow sends packets through connection tracker for NAT
+        based on a hint provided by the previous tables (with a match for
+        reg1 == 2).
+      </li>
+
+      <li>
+        A priority-100 flow commits the connection to the connection tracker
+        for NATting (choosing the destination IP address based on choices
+        provided in the loadbalancer table) based on a hint provided by the
+        previous tables (with a match for reg1 == 3).
+      </li>
+    </ul>
+
+    <h3>Ingress Table 7: Destination Lookup</h3>
 
     <p>
       This table implements switching behavior.  It contains these logical
@@ -294,19 +337,26 @@ output;
       This is similar to ingress table 3.
     </p>
 
-    <h3>Egress Table 3: <code>to-lport</code> ACLs</h3>
+    <h3>Egress Table 3:  loadbalancer</h3>
+
+    <p>
+      This is similar to ingress table 5 except that it does not have
+      any rules to loadbalance new connections.
+    </p>
+
+    <h3>Egress Table 4: <code>to-lport</code> ACLs</h3>
 
     <p>
       This is similar to ingress table 4 except for <code>to-lport</code> ACLs.
     </p>
 
-    <h3>Egress Table 4: STATEFUL</h3>
+    <h3>Egress Table 5: STATEFUL</h3>
 
     <p>
-      This is similar to ingress table 5.
+      This is similar to ingress table 6.
     </p>
 
-    <h3>Egress Table 5: Egress Port Security</h3>
+    <h3>Egress Table 6: Egress Port Security</h3>
 
     <p>
       This is similar to the ingress port security logic in ingress table 0,
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 28f5b45..c5c956b 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -90,16 +90,18 @@ enum ovn_stage {
     PIPELINE_STAGE(SWITCH, IN,  PRE_LB,       2, "ls_in_pre_lb")        \
     PIPELINE_STAGE(SWITCH, IN,  PRE_STATEFUL, 3, "ls_in_pre_stateful")  \
     PIPELINE_STAGE(SWITCH, IN,  ACL,          4, "ls_in_acl")           \
-    PIPELINE_STAGE(SWITCH, IN,  STATEFUL,     5, "ls_in_stateful")      \
-    PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,      6, "ls_in_l2_lkup")       \
+    PIPELINE_STAGE(SWITCH, IN,  LB,           5, "ls_in_lb")            \
+    PIPELINE_STAGE(SWITCH, IN,  STATEFUL,     6, "ls_in_stateful")      \
+    PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,      7, "ls_in_l2_lkup")       \
                                                                         \
     /* Logical switch egress stages. */                                 \
     PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")       \
     PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,      1, "ls_out_pre_acl")      \
     PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
-    PIPELINE_STAGE(SWITCH, OUT, ACL,          3, "ls_out_acl")          \
-    PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     4, "ls_out_stateful")     \
-    PIPELINE_STAGE(SWITCH, OUT, PORT_SEC,     5, "ls_out_port_sec")     \
+    PIPELINE_STAGE(SWITCH, OUT, LB,           3, "ls_out_lb")           \
+    PIPELINE_STAGE(SWITCH, OUT, ACL,          4, "ls_out_acl")          \
+    PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     5, "ls_out_stateful")     \
+    PIPELINE_STAGE(SWITCH, OUT, PORT_SEC,     6, "ls_out_port_sec")     \
                                                                         \
     /* Logical router ingress stages. */                                \
     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")      \
@@ -1173,6 +1175,51 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows)
 }
 
 static void
+build_lb(struct ovn_datapath *od, struct hmap *lflows)
+{
+    /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
+     * default.  */
+    ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
+    ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
+
+    if (od->nbs->loadbalancer) {
+        struct nbrec_load_balancer *lb = od->nbs->loadbalancer;
+        struct smap *vips = &lb->vips;
+        struct smap_node *node;
+
+        /* Ingress and Egress LB Table (Priority 65535).
+         *
+         * Always drop traffic that's in an invalid state.  This is
+         * enforced at a higher priority than ACLs can be defined. */
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
+                      "ct.inv", "drop;");
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
+                      "ct.inv", "drop;");
+
+        /* Ingress and Egress LB Table (Priority 65535).
+         *
+         * Send established traffic through conntrack for just NAT. */
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
+                      "ct.est && !ct.rel && !ct.new && !ct.inv",
+                      "reg1 = 2; next;");
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
+                      "ct.est && !ct.rel && !ct.new && !ct.inv",
+                      "reg1 = 2; next;");
+
+        SMAP_FOR_EACH (node, vips) {
+            struct ds match = DS_EMPTY_INITIALIZER;
+
+            /* New connections in Ingress table. */
+            ds_put_format(&match, "ip && ip4.dst == %s", node->key);
+            ovn_lflow_add(lflows, od, S_SWITCH_IN_LB,
+                          100, ds_cstr(&match), "reg1 = 3; next;");
+
+            ds_destroy(&match);
+        }
+    }
+}
+
+static void
 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
 {
     /* Ingress and Egress STATEFUL Table (Priority 0): Packets are
@@ -1186,6 +1233,34 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows)
                   "ct_commit; next;");
     ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, "reg1 == 1",
                   "ct_commit; next;");
+
+    /* If reg1 is set as 2, then packets should just be sent through
+     * nat (without committing). */
+    ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, "reg1 == 2",
+                  "ct_nat;");
+    ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, "reg1 == 2",
+                  "ct_nat;");
+
+    if (od->nbs->loadbalancer) {
+        struct nbrec_load_balancer *lb = od->nbs->loadbalancer;
+        struct smap *vips = &lb->vips;
+        struct smap_node *node;
+
+        SMAP_FOR_EACH (node, vips) {
+            struct ds match = DS_EMPTY_INITIALIZER;
+            struct ds action = DS_EMPTY_INITIALIZER;
+
+            /* If reg1 is set as 3, then packets should be sent through
+             * nat and committed). */
+            ds_put_format(&match, "ip && ip4.dst == %s && reg1 == 3",
+                          node->key);
+            ds_put_format(&action, "ct_lb(\"%s\");", node->value);
+            ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
+                          ds_cstr(&match), ds_cstr(&action));
+            ds_destroy(&match);
+            ds_destroy(&action);
+        }
+    }
 }
 
 static void
@@ -1207,6 +1282,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         build_pre_lb(od, lflows);
         build_pre_stateful(od, lflows);
         build_acls(od, lflows);
+        build_lb(od, lflows);
         build_stateful(od, lflows);
     }
 
-- 
1.9.1




More information about the dev mailing list