[ovs-dev] [PATCH] ovn: Add stateful ACL support.

Justin Pettit jpettit at nicira.com
Thu Oct 15 17:32:51 UTC 2015


Add support for the "allow-related" ACL action.  This is dependent on
the OVS conntrack functionality, which is not available on all platforms
or kernel versions.

Here is a sample policy that will allow all tenants in logical switch
"ls0" to SSH to each other.  Anyone can make an HTTP request to "lp0".
All other IP traffic is dropped:

  ovn-nbctl acl-add ls0 from-lport 100 ip allow-related
  ovn-nbctl acl-add ls0 to-lport 100 tcp.dst==22 allow-related
  ovn-nbctl acl-add ls0 to-lport 100 "outport == \"lp0\" \
            && tcp.dst==80" allow-related
  ovn-nbctl acl-add ls0 to-lport 1 ip drop

Note: Kernel conntrack support is checked into the mainline Linux
kernel, but hasn't been backported to the main OVS repo yet.
---
I've pushed this patch on a partial backport of conntrack here:

    https://github.com/justinpettit/ovs/tree/ovn-acl
---
 ovn/TODO                            |    8 ++
 ovn/controller/binding.c            |   51 +++++++++-
 ovn/controller/binding.h            |    4 +-
 ovn/controller/lflow.c              |   16 +++-
 ovn/controller/lflow.h              |    8 +-
 ovn/controller/ovn-controller.8.xml |    5 +
 ovn/controller/ovn-controller.c     |   33 ++++++-
 ovn/controller/ovn-controller.h     |    4 +
 ovn/controller/physical.c           |   16 +++-
 ovn/controller/physical.h           |    3 +-
 ovn/lib/actions.c                   |   48 ++++++++-
 ovn/lib/actions.h                   |   13 ++-
 ovn/northd/ovn-northd.c             |  196 ++++++++++++++++++++++++++---------
 ovn/ovn-architecture.7.xml          |    8 ++
 ovn/ovn-sb.xml                      |   60 ++++++++++--
 tests/ovn.at                        |    2 +
 tests/test-ovn.c                    |    8 +-
 17 files changed, 400 insertions(+), 83 deletions(-)

diff --git a/ovn/TODO b/ovn/TODO
index a48251f..8d22c2c 100644
--- a/ovn/TODO
+++ b/ovn/TODO
@@ -137,3 +137,11 @@
    Both ovn-controller and ovn-contorller-vtep should use BFD to
    monitor the tunnel liveness.  Both ovs-vswitchd schema and
    VTEP schema supports BFD.
+
+* ACL
+
+** Support FTP ALGs.
+
+** Support reject action.
+
+** Support log option.
diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index fca2430..9759312 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -16,6 +16,7 @@
 #include <config.h>
 #include "binding.h"
 
+#include "lib/bitmap.h"
 #include "lib/sset.h"
 #include "lib/util.h"
 #include "lib/vswitch-idl.h"
@@ -71,9 +72,55 @@ get_local_iface_ids(const struct ovsrec_bridge *br_int, struct sset *lports)
     }
 }
 
+static void
+update_ct_zones(struct sset *lports, struct simap *ct_zones,
+                unsigned long *ct_zone_bitmap)
+{
+    struct simap_node *ct_zone, *ct_zone_next;
+    const char *iface_id;
+    int scan_start = 1;
+
+    /* xxx This is wasteful to assign a zone to each port--even if no
+     * xxx security policy is applied. */
+
+    /* Delete any zones that are associated with removed ports. */
+    SIMAP_FOR_EACH_SAFE(ct_zone, ct_zone_next, ct_zones) {
+        if (!sset_contains(lports, ct_zone->name)) {
+            bitmap_set0(ct_zone_bitmap, ct_zone->data);
+            simap_delete(ct_zones, ct_zone);
+        }
+    }
+
+    /* Assign a unique zone id for each logical port. */
+    SSET_FOR_EACH(iface_id, lports) {
+        size_t zone;
+
+        if (simap_contains(ct_zones, iface_id)) {
+            continue;
+        }
+
+        /* We assume that there are 64K zones and that we own them all. */
+        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
+        if (zone == MAX_CT_ZONES + 1) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+            VLOG_WARN_RL(&rl, "exhausted all ct zones");
+            return;
+        }
+        scan_start = zone + 1;
+
+        bitmap_set1(ct_zone_bitmap, zone);
+        simap_put(ct_zones, iface_id, zone);
+
+        /* xxx We should erase any old entries for this
+         * xxx zone, but we need a generic interface to the conntrack
+         * xxx table. */
+    }
+}
+
 void
 binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
-            const char *chassis_id)
+            const char *chassis_id, struct simap *ct_zones,
+            unsigned long *ct_zone_bitmap)
 {
     const struct sbrec_chassis *chassis_rec;
     const struct sbrec_port_binding *binding_rec;
@@ -97,6 +144,7 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
         /* We have no integration bridge, therefore no local logical ports.
          * We'll remove our chassis from all port binding records below. */
     }
+    update_ct_zones(&lports, ct_zones, ct_zone_bitmap);
     sset_clone(&all_lports, &lports);
 
     ovsdb_idl_txn_add_comment(
@@ -141,6 +189,7 @@ binding_cleanup(struct controller_ctx *ctx, const char *chassis_id)
     if (!chassis_id) {
         return true;
     }
+
     const struct sbrec_chassis *chassis_rec
         = get_chassis_by_name(ctx->ovnsb_idl, chassis_id);
     if (!chassis_rec) {
diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
index 708a6e6..3d4a91e 100644
--- a/ovn/controller/binding.h
+++ b/ovn/controller/binding.h
@@ -22,10 +22,12 @@
 struct controller_ctx;
 struct ovsdb_idl;
 struct ovsrec_bridge;
+struct simap;
 
 void binding_register_ovs_idl(struct ovsdb_idl *);
 void binding_run(struct controller_ctx *, const struct ovsrec_bridge *br_int,
-                 const char *chassis_id);
+                 const char *chassis_id, struct simap *ct_zones,
+                 unsigned long *ct_zone_bitmap);
 bool binding_cleanup(struct controller_ctx *, const char *chassis_id);
 
 #endif /* ovn/binding.h */
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 9246e61..8e25857 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -58,6 +58,15 @@ symtab_init(void)
     MFF_LOG_REGS;
 #undef MFF_LOG_REG
 
+    /* Connection tracking state. */
+    expr_symtab_add_field(&symtab, "ct_state", MFF_CT_STATE, NULL, false);
+    expr_symtab_add_predicate(&symtab, "ct.trk", "ct_state[7]");
+    expr_symtab_add_subfield(&symtab, "ct.new", "ct.trk", "ct_state[0]");
+    expr_symtab_add_subfield(&symtab, "ct.est", "ct.trk", "ct_state[1]");
+    expr_symtab_add_subfield(&symtab, "ct.rel", "ct.trk", "ct_state[2]");
+    expr_symtab_add_subfield(&symtab, "ct.inv", "ct.trk", "ct_state[5]");
+    expr_symtab_add_subfield(&symtab, "ct.rpl", "ct.trk", "ct_state[6]");
+
     /* Data fields. */
     expr_symtab_add_field(&symtab, "eth.src", MFF_ETH_SRC, NULL, false);
     expr_symtab_add_field(&symtab, "eth.dst", MFF_ETH_DST, NULL, false);
@@ -245,7 +254,8 @@ lflow_init(void)
 /* Translates logical flows in the Logical_Flow table in the OVN_SB database
  * into OpenFlow flows.  See ovn-architecture(7) for more information. */
 void
-lflow_run(struct controller_ctx *ctx, struct hmap *flow_table)
+lflow_run(struct controller_ctx *ctx, struct hmap *flow_table,
+          const struct simap *ct_zones)
 {
     struct hmap flows = HMAP_INITIALIZER(&flows);
     uint32_t conj_id_ofs = 1;
@@ -284,8 +294,8 @@ lflow_run(struct controller_ctx *ctx, struct hmap *flow_table)
 
         ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
         error = actions_parse_string(lflow->actions, &symtab, &ldp->ports,
-                                     next_phys_table, output_phys_table,
-                                     &ofpacts, &prereqs);
+                                     ct_zones, next_phys_table,
+                                     output_phys_table, &ofpacts, &prereqs);
         if (error) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
             VLOG_WARN_RL(&rl, "error parsing actions \"%s\": %s",
diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index 5cac76c..c3a92f6 100644
--- a/ovn/controller/lflow.h
+++ b/ovn/controller/lflow.h
@@ -36,6 +36,7 @@
 
 struct controller_ctx;
 struct hmap;
+struct simap;
 struct uuid;
 
 /* OpenFlow table numbers.
@@ -58,6 +59,7 @@ struct uuid;
  * These values are documented in ovn-architecture(7), please update the
  * documentation if you change any of them. */
 #define MFF_LOG_DATAPATH MFF_METADATA /* Logical datapath (64 bits). */
+#define MFF_LOG_CT_ZONE  MFF_REG5     /* Logical conntrack zone (32 bits). */
 #define MFF_LOG_INPORT   MFF_REG6     /* Logical input port (32 bits). */
 #define MFF_LOG_OUTPORT  MFF_REG7     /* Logical output port (32 bits). */
 
@@ -69,11 +71,11 @@ struct uuid;
     MFF_LOG_REG(MFF_REG1) \
     MFF_LOG_REG(MFF_REG2) \
     MFF_LOG_REG(MFF_REG3) \
-    MFF_LOG_REG(MFF_REG4) \
-    MFF_LOG_REG(MFF_REG5)
+    MFF_LOG_REG(MFF_REG4)
 
 void lflow_init(void);
-void lflow_run(struct controller_ctx *, struct hmap *flow_table);
+void lflow_run(struct controller_ctx *, struct hmap *flow_table,
+               const struct simap *ct_zones);
 void lflow_destroy(void);
 
 #endif /* ovn/lflow.h */
diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml
index aadec15..98aa460 100644
--- a/ovn/controller/ovn-controller.8.xml
+++ b/ovn/controller/ovn-controller.8.xml
@@ -173,6 +173,11 @@
       <dd>
         Causes <code>ovn-controller</code> to gracefully terminate.
       </dd>
+
+      <dt><code>ct-zone-list</code></dt>
+      <dd>
+        Lists each local logical port and its connection tracking zone.
+      </dd>
       </dl>
     </p>
 
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index f1bc524..3a31828 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -27,6 +27,7 @@
 #include "compiler.h"
 #include "daemon.h"
 #include "dirs.h"
+#include "dynamic-string.h"
 #include "openvswitch/vconn.h"
 #include "openvswitch/vlog.h"
 #include "ovn/lib/ovn-sb-idl.h"
@@ -49,6 +50,7 @@
 VLOG_DEFINE_THIS_MODULE(main);
 
 static unixctl_cb_func ovn_controller_exit;
+static unixctl_cb_func ct_zone_list;
 
 #define DEFAULT_BRIDGE_NAME "br-int"
 
@@ -435,6 +437,13 @@ main(int argc, char *argv[])
         ovsdb_idl_create(ovnsb_remote, &sbrec_idl_class, true, true));
     ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
 
+    /* Initialize connection tracking zones. */
+    struct simap ct_zones = SIMAP_INITIALIZER(&ct_zones);
+    unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
+    bitmap_set1(ct_zone_bitmap, 0); /* Zone 0 is reserved. */
+    unixctl_command_register("ct-zone-list", "", 0, 0,
+                             ct_zone_list, &ct_zones);
+
     /* Main loop. */
     exiting = false;
     while (!exiting) {
@@ -456,17 +465,17 @@ main(int argc, char *argv[])
         if (chassis_id) {
             chassis_run(&ctx, chassis_id);
             encaps_run(&ctx, br_int, chassis_id);
-            binding_run(&ctx, br_int, chassis_id);
+            binding_run(&ctx, br_int, chassis_id, &ct_zones, ct_zone_bitmap);
         }
 
         if (br_int) {
             enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int);
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
-            lflow_run(&ctx, &flow_table);
+            lflow_run(&ctx, &flow_table, &ct_zones);
             if (chassis_id) {
                 physical_run(&ctx, mff_ovn_geneve,
-                             br_int, chassis_id, &flow_table);
+                             br_int, chassis_id, &ct_zones, &flow_table);
             }
             ofctrl_put(&flow_table);
             hmap_destroy(&flow_table);
@@ -522,6 +531,8 @@ main(int argc, char *argv[])
     lflow_destroy();
     ofctrl_destroy();
 
+    simap_destroy(&ct_zones);
+
     ovsdb_idl_loop_destroy(&ovs_idl_loop);
     ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
 
@@ -629,3 +640,19 @@ ovn_controller_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
     unixctl_command_reply(conn, NULL);
 }
+
+static void
+ct_zone_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
+             const char *argv[] OVS_UNUSED, void *ct_zones_)
+{
+    struct simap *ct_zones = ct_zones_;
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    struct simap_node *zone;
+
+    SIMAP_FOR_EACH(zone, ct_zones) {
+        ds_put_format(&ds, "%s %d\n", zone->name, zone->data);
+    }
+
+    unixctl_command_reply(conn, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index be89b5f..c47212c 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -17,8 +17,12 @@
 #ifndef OVN_CONTROLLER_H
 #define OVN_CONTROLLER_H 1
 
+#include "simap.h"
 #include "ovn/lib/ovn-sb-idl.h"
 
+/* Linux supports a maximum of 64K zones, which seems like a fine default. */
+#define MAX_CT_ZONES 65535
+
 struct controller_ctx {
     struct ovsdb_idl *ovnsb_idl;
     struct ovsdb_idl_txn *ovnsb_idl_txn;
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 0c239df..630d420 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -136,7 +136,7 @@ put_stack(enum mf_field_id field, struct ofpact_stack *stack)
 void
 physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
              const struct ovsrec_bridge *br_int, const char *this_chassis_id,
-             struct hmap *flow_table)
+             const struct simap *ct_zones, struct hmap *flow_table)
 {
     struct simap localvif_to_ofport = SIMAP_INITIALIZER(&localvif_to_ofport);
     struct hmap tunnels = HMAP_INITIALIZER(&tunnels);
@@ -293,6 +293,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
 
         struct match match;
         if (!tun) {
+            int zone_id = simap_get(ct_zones, binding->logical_port);
             /* Packets that arrive from a vif can belong to a VM or
              * to a container located inside that VM. Packets that
              * arrive from containers have a tag (vlan) associated with them.
@@ -360,6 +361,10 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
                     match_set_dl_vlan(&match, htons(tag));
                 }
 
+                if (zone_id) {
+                    put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
+                }
+
                 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
                 put_load(binding->datapath->tunnel_key, MFF_LOG_DATAPATH, 0, 64,
                          &ofpacts);
@@ -393,6 +398,10 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
             match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
                           binding->tunnel_key);
 
+            if (zone_id) {
+                put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
+            }
+
             /* Resubmit to table 34. */
             put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
             ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, &match,
@@ -500,6 +509,11 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
                 continue;
             }
 
+            int zone_id = simap_get(ct_zones, port->logical_port);
+            if (zone_id) {
+                put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
+            }
+
             if (simap_contains(&localvif_to_ofport,
                                port->parent_port
                                ? port->parent_port : port->logical_port)) {
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index c79e97b..a06f759 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -31,6 +31,7 @@ struct controller_ctx;
 struct hmap;
 struct ovsdb_idl;
 struct ovsrec_bridge;
+struct simap;
 
 /* OVN Geneve option information.
  *
@@ -44,6 +45,6 @@ struct ovsrec_bridge;
 void physical_register_ovs_idl(struct ovsdb_idl *);
 void physical_run(struct controller_ctx *, enum mf_field_id mff_ovn_geneve,
                   const struct ovsrec_bridge *br_int, const char *chassis_id,
-                  struct hmap *flow_table);
+                  const struct simap *ct_zones, struct hmap *flow_table);
 
 #endif /* ovn/physical.h */
diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 4170fc5..09a7a63 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -24,6 +24,7 @@
 #include "lex.h"
 #include "ofp-actions.h"
 #include "ofpbuf.h"
+#include "simap.h"
 
 /* Context maintained during actions_parse(). */
 struct action_context {
@@ -33,6 +34,7 @@ struct action_context {
     uint8_t next_table_id;      /* OpenFlow table for 'next' to resubmit. */
     uint8_t output_table_id;    /* OpenFlow table for 'output' to resubmit. */
     const struct simap *ports;  /* Map from port name to number. */
+    const struct simap *ct_zones; /* Map from port name to conntrack zone. */
 
     /* State. */
     char *error;                /* Error, if any, otherwise NULL. */
@@ -132,6 +134,32 @@ emit_resubmit(struct action_context *ctx, uint8_t table_id)
 }
 
 static void
+emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
+{
+    struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
+    ct->flags |= commit ? NX_CT_F_COMMIT : 0;
+
+    /* If "recirc" is set, we automatically go to the next table. */
+    ct->recirc_table = recirc_next ? ctx->next_table_id: NX_CT_RECIRC_NONE;
+
+    /* xxx Should remove hard-coding reg5 if we refactor library. */
+    ct->zone_src.field = mf_from_id(MFF_REG5);
+    ct->zone_src.ofs = 0;
+    ct->zone_src.n_bits = 16;
+
+    /* We do not support ALGs yet. */
+    ct->alg = htons(0);
+
+    /* CT only works with IP, so set up a prerequisite. */
+    struct expr *expr;
+    char *error;
+
+    expr = expr_parse_string("ip", ctx->symtab, &error);
+    ovs_assert(!error);
+    ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, expr);
+}
+
+static void
 parse_actions(struct action_context *ctx)
 {
     /* "drop;" by itself is a valid (empty) set of actions, but it can't be
@@ -165,6 +193,10 @@ parse_actions(struct action_context *ctx)
             }
         } else if (lexer_match_id(ctx->lexer, "output")) {
             emit_resubmit(ctx, ctx->output_table_id);
+        } else if (lexer_match_id(ctx->lexer, "ct_next")) {
+            emit_ct(ctx, true, false);
+        } else if (lexer_match_id(ctx->lexer, "ct_commit")) {
+            emit_ct(ctx, false, true);
         } else {
             action_syntax_error(ctx, "expecting action");
         }
@@ -188,6 +220,8 @@ parse_actions(struct action_context *ctx)
  * (as one would provide to expr_to_matches()).  Strings used in the actions
  * that are not in 'ports' are translated to zero.
  *
+ * 'ct_zones' provides a map from a port name to its connection tracking zone.
+ *
  * 'next_table_id' should be the OpenFlow table to which the "next" action will
  * resubmit, or 0 to disable "next".
  *
@@ -206,8 +240,9 @@ parse_actions(struct action_context *ctx)
   */
 char * OVS_WARN_UNUSED_RESULT
 actions_parse(struct lexer *lexer, const struct shash *symtab,
-              const struct simap *ports, uint8_t next_table_id,
-              uint8_t output_table_id, struct ofpbuf *ofpacts,
+              const struct simap *ports, const struct simap *ct_zones,
+              uint8_t next_table_id, uint8_t output_table_id,
+              struct ofpbuf *ofpacts,
               struct expr **prereqsp)
 {
     size_t ofpacts_start = ofpacts->size;
@@ -216,6 +251,7 @@ actions_parse(struct lexer *lexer, const struct shash *symtab,
     ctx.lexer = lexer;
     ctx.symtab = symtab;
     ctx.ports = ports;
+    ctx.ct_zones = ct_zones;
     ctx.next_table_id = next_table_id;
     ctx.output_table_id = output_table_id;
     ctx.error = NULL;
@@ -238,16 +274,16 @@ actions_parse(struct lexer *lexer, const struct shash *symtab,
 /* Like actions_parse(), but the actions are taken from 's'. */
 char * OVS_WARN_UNUSED_RESULT
 actions_parse_string(const char *s, const struct shash *symtab,
-                     const struct simap *ports, uint8_t next_table_id,
-                     uint8_t output_table_id, struct ofpbuf *ofpacts,
-                     struct expr **prereqsp)
+                     const struct simap *ports, const struct simap *ct_zones,
+                     uint8_t next_table_id, uint8_t output_table_id,
+                     struct ofpbuf *ofpacts, struct expr **prereqsp)
 {
     struct lexer lexer;
     char *error;
 
     lexer_init(&lexer, s);
     lexer_get(&lexer);
-    error = actions_parse(&lexer, symtab, ports, next_table_id,
+    error = actions_parse(&lexer, symtab, ports, ct_zones, next_table_id,
                           output_table_id, ofpacts, prereqsp);
     lexer_destroy(&lexer);
 
diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
index 74cd185..377b273 100644
--- a/ovn/lib/actions.h
+++ b/ovn/lib/actions.h
@@ -27,14 +27,15 @@ struct shash;
 struct simap;
 
 char *actions_parse(struct lexer *, const struct shash *symtab,
-                    const struct simap *ports, uint8_t next_table_id,
-                    uint8_t output_table_id, struct ofpbuf *ofpacts,
-                    struct expr **prereqsp)
+                    const struct simap *ports, const struct simap *ct_zones,
+                    uint8_t next_table_id, uint8_t output_table_id,
+                    struct ofpbuf *ofpacts, struct expr **prereqsp)
     OVS_WARN_UNUSED_RESULT;
 char *actions_parse_string(const char *s, const struct shash *symtab,
-                           const struct simap *ports, uint8_t next_table_id,
-                           uint8_t output_table_id, struct ofpbuf *ofpacts,
-                           struct expr **prereqsp)
+                           const struct simap *ports,
+                           const struct simap *ct_zones,
+                           uint8_t next_table_id, uint8_t output_table_id,
+                           struct ofpbuf *ofpacts, struct expr **prereqsp)
     OVS_WARN_UNUSED_RESULT;
 
 #endif /* ovn/actions.h */
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index e698907..ca2eca6 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -60,6 +60,7 @@ static const char *default_db(void);
  * These must be listed in the order that the stages will be executed. */
 #define INGRESS_STAGES                         \
     INGRESS_STAGE(PORT_SEC, port_sec)          \
+    INGRESS_STAGE(PRE_ACL, pre_acl)            \
     INGRESS_STAGE(ACL, acl)                    \
     INGRESS_STAGE(L2_LKUP, l2_lkup)
 
@@ -73,8 +74,9 @@ enum ingress_stage {
 /* Egress pipeline stages.
  *
  * These must be listed in the order that the stages will be executed. */
-#define EGRESS_STAGES                         \
-    EGRESS_STAGE(ACL, acl)                    \
+#define EGRESS_STAGES                          \
+    EGRESS_STAGE(PRE_ACL, pre_acl)             \
+    EGRESS_STAGE(ACL, acl)                     \
     EGRESS_STAGE(PORT_SEC, port_sec)
 
 enum egress_stage {
@@ -722,6 +724,140 @@ lport_is_enabled(const struct nbrec_logical_port *lport)
     return !lport->enabled || *lport->enabled;
 }
 
+static bool
+has_stateful_acl(struct ovn_datapath *od)
+{
+    for (size_t i = 0; i < od->nb->n_acls; i++) {
+        struct nbrec_acl *acl = od->nb->acls[i];
+        if (!strcmp(acl->action, "allow-related")) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void
+build_acls(struct ovn_datapath *od, struct hmap *lflows)
+{
+    bool has_stateful = has_stateful_acl(od);
+
+    /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
+     * allowed by default. */
+    ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 0, "1", "next;");
+    ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 0, "1", "next;");
+
+    /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
+     * default.  A related rule at priority 1 is added below if there
+     * are any stateful ACLs in this datapath. */
+    ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 0, "1", "next;");
+    ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 0, "1", "next;");
+
+    /* If there are any stateful ACL rules in this dapapath, we must
+     * send all IP packets through the conntrack action, which handles
+     * defragmentation, in order to match L4 headers. */
+    if (has_stateful) {
+        /* Ingress and Egress Pre-ACL Table (Priority 100).
+         *
+         * Regardless of whether the ACL is "from-lport" or "to-lport",
+         * we need rules in both the ingress and egress table, because
+         * the return traffic needs to be followed. */
+        ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 100,
+                      "ip", "ct_next;");
+        ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 100,
+                      "ip", "ct_next;");
+
+        /* Ingress and Egress ACL Table (Priority 1).
+         *
+         * By default, traffic is allowed.  This is partially handled by
+         * the Priority 0 ACL flows added earlier, but we also need to
+         * commit IP flows.  This is because, while the initiater's
+         * direction may not have any stateful rules, the server's may
+         * and then its return traffic would not have an associated
+         * conntrack entry and would return "+invalid". */
+        ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 1, "ip",
+                      "ct_commit; next;");
+        ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 1, "ip",
+                      "ct_commit; next;");
+
+        /* Ingress and Egress ACL Table (Priority 65535).
+         *
+         * Always drop traffic that's in an invalid state.  This is
+         * enforced at a higher priority than ACLs can be defined. */
+        ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
+                      "ct.inv", "drop;");
+        ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
+                      "ct.inv", "drop;");
+
+        /* Ingress and Egress ACL Table (Priority 65535).
+         *
+         * Always allow traffic that is established to a committed
+         * conntrack entry.  This is enforced at a higher priority than
+         * ACLs can be defined. */
+        ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
+                      "ct.est && !ct.rel && !ct.new && !ct.inv",
+                      "next;");
+        ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
+                      "ct.est && !ct.rel && !ct.new && !ct.inv",
+                      "next;");
+
+        /* Ingress and Egress ACL Table (Priority 65535).
+         *
+         * Always allow traffic that is related to an existing conntrack
+         * entry.  This is enforced at a higher priority than ACLs can
+         * be defined.
+         *
+         * NOTE: This does not support related data sessions (eg,
+         * a dynamically negotiated FTP data channel), but will allow
+         * related traffic such as an ICMP Port Unreachable through
+         * that's generated from a non-listening UDP port.  */
+        ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
+                      "!ct.est && ct.rel && !ct.new && !ct.inv",
+                      "next;");
+        ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
+                      "!ct.est && ct.rel && !ct.new && !ct.inv",
+                      "next;");
+    }
+
+    /* Ingress or Egress ACL Table (Various priorities). */
+    for (size_t i = 0; i < od->nb->n_acls; i++) {
+        struct nbrec_acl *acl = od->nb->acls[i];
+        bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
+        enum ovn_pipeline pipeline = ingress ? P_IN : P_OUT;
+        uint8_t stage = ingress ? S_IN_ACL : S_OUT_ACL;
+
+        if (!strcmp(acl->action, "allow")) {
+            /* If there are any stateful flows, we must even commit "allow"
+             * actions.  This is because, while the initiater's
+             * direction may not have any stateful rules, the server's
+             * may and then its return traffic would not have an
+             * associated conntrack entry and would return "+invalid". */
+            const char *actions = has_stateful ? "ct_commit; next;" : "next;";
+            ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
+                          acl->match, actions);
+        } else if (!strcmp(acl->action, "allow-related")) {
+            struct ds match = DS_EMPTY_INITIALIZER;
+
+            /* Commit the connection tracking entry, which allows all
+             * other traffic related to this entry to flow due to the
+             * 65535 priority flow defined earlier. */
+            ds_put_format(&match, "ct.new && (%s)", acl->match);
+            ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
+                          ds_cstr(&match), "ct_commit; next;");
+
+            ds_destroy(&match);
+        } else if (!strcmp(acl->action, "drop")) {
+            ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
+                          acl->match, "drop;");
+        } else if (!strcmp(acl->action, "reject")) {
+            /* xxx Need to support "reject". */
+            VLOG_INFO("reject is not a supported action");
+            ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
+                          acl->match, "drop;");
+        }
+    }
+}
+
 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
  * constructing their contents based on the OVN_NB database. */
 static void
@@ -769,27 +905,6 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
         ds_destroy(&match);
     }
 
-    /* Ingress table 1: ACLs (any priority). */
-    HMAP_FOR_EACH (od, key_node, datapaths) {
-        for (size_t i = 0; i < od->nb->n_acls; i++) {
-            const struct nbrec_acl *acl = od->nb->acls[i];
-            const char *action;
-
-            if (strcmp(acl->direction, "from-lport")) {
-                continue;
-            }
-
-            action = (!strcmp(acl->action, "allow") ||
-                      !strcmp(acl->action, "allow-related"))
-                ? "next;" : "drop;";
-            ovn_lflow_add(&lflows, od, P_IN, S_IN_ACL, acl->priority,
-                          acl->match, action);
-        }
-    }
-    HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, P_IN, S_IN_ACL, 0, "1", "next;");
-    }
-
     /* Ingress table 2: Destination lookup, broadcast and multicast handling
      * (priority 100). */
     HMAP_FOR_EACH (op, key_node, ports) {
@@ -802,7 +917,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
                       "outport = \""MC_FLOOD"\"; output;");
     }
 
-    /* Ingress table 2: Destination lookup, unicast handling (priority 50), */
+    /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
     HMAP_FOR_EACH (op, key_node, ports) {
         for (size_t i = 0; i < op->nb->n_macs; i++) {
             struct eth_addr mac;
@@ -835,7 +950,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
         }
     }
 
-    /* Ingress table 2: Destination lookup for unknown MACs (priority 0). */
+    /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
         if (od->has_unknown) {
             ovn_lflow_add(&lflows, od, P_IN, S_IN_L2_LKUP, 0, "1",
@@ -843,35 +958,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
         }
     }
 
-    /* Egress table 0: ACLs (any priority). */
-    HMAP_FOR_EACH (od, key_node, datapaths) {
-        for (size_t i = 0; i < od->nb->n_acls; i++) {
-            const struct nbrec_acl *acl = od->nb->acls[i];
-            const char *action;
-
-            if (strcmp(acl->direction, "to-lport")) {
-                continue;
-            }
-
-            action = (!strcmp(acl->action, "allow") ||
-                      !strcmp(acl->action, "allow-related"))
-                ? "next;" : "drop;";
-            ovn_lflow_add(&lflows, od, P_OUT, S_OUT_ACL, acl->priority,
-                          acl->match, action);
-        }
-    }
-    HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, P_OUT, S_OUT_ACL, 0, "1", "next;");
-    }
-
-    /* Egress table 1: Egress port security multicast/broadcast (priority
+    /* Egress table 2: Egress port security multicast/broadcast (priority
      * 100). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
         ovn_lflow_add(&lflows, od, P_OUT, S_OUT_PORT_SEC, 100, "eth.dst[40]",
                       "output;");
     }
 
-    /* Egress table 1: Egress port security (priorities 50 and 150).
+    /* Egress table 2: Egress port security (priorities 50 and 150).
      *
      * Priority 50 rules implement port security for enabled logical port.
      *
@@ -897,6 +991,12 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
         ds_destroy(&match);
     }
 
+    /* Build pre-ACL and ACL tables for both ingress and egress.
+     * Ingress tables 1 and 2.  Egress tables 0 and 1. */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        build_acls(od, &lflows);
+    }
+
     /* Push changes to the Logical_Flow table to database. */
     const struct sbrec_logical_flow *sbflow, *next_sbflow;
     SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
diff --git a/ovn/ovn-architecture.7.xml b/ovn/ovn-architecture.7.xml
index 47dfc2a..f5b8440 100644
--- a/ovn/ovn-architecture.7.xml
+++ b/ovn/ovn-architecture.7.xml
@@ -654,6 +654,14 @@
       tunnels as part of the tunnel key.)
     </dd>
 
+    <dt>conntrack zone field</dt>
+    <dd>
+      A field that denotes the connection tracking zone.  The value only
+      has local significance and is not meaningful between chassis.
+      This is initialized to 0 at the beginning of the logical ingress
+      pipeline.  OVN stores this in Nicira extension register number 5.
+    </dd>
+
     <dt>VLAN ID</dt>
     <dd>
       The VLAN ID is used as an interface between OVN and containers nested
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 088c4b7..60f06d1 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -297,10 +297,11 @@
       <code>inport</code> to <code>outport</code>; if they are equal, it treats
       the <code>output</code> as a no-op.  In the common case, where they are
       different, the packet enters the egress pipeline.  This transition to the
-      egress pipeline discards register data, e.g. <code>reg0</code>
-      ... <code>reg5</code>, to achieve uniform behavior regardless of whether
-      the egress pipeline is on a different hypervisor (because registers
-      aren't preserve across tunnel encapsulation).
+      egress pipeline discards register data, e.g. <code>reg0</code> ...
+      <code>reg4</code> and connection tracking state, to achieve
+      uniform behavior regardless of whether the egress pipeline is on a
+      different hypervisor (because registers aren't preserve across
+      tunnel encapsulation).
     </p>
 
     <p>
@@ -684,7 +685,7 @@
       </p>
 
       <ul>
-        <li><code>reg0</code>...<code>reg5</code></li>
+        <li><code>reg0</code>...<code>reg4</code></li>
         <li><code>inport</code> <code>outport</code></li>
         <li><code>eth.src</code> <code>eth.dst</code> <code>eth.type</code></li>
         <li><code>vlan.tci</code> <code>vlan.vid</code> <code>vlan.pcp</code> <code>vlan.present</code></li>
@@ -698,6 +699,22 @@
         <li><code>icmp4.type</code> <code>icmp4.code</code></li>
         <li><code>icmp6.type</code> <code>icmp6.code</code></li>
         <li><code>nd.target</code> <code>nd.sll</code> <code>nd.tll</code></li>
+        <li>
+          <p>
+            <code>ct_state</code>, which has the following Boolean subfields:
+          </p>
+          <ul>
+            <li><code>ct.new</code>: True for a new flow</li>
+            <li><code>ct.est</code>: True for an established flow</li>
+            <li><code>ct.rel</code>: True for a related flow</li>
+            <li><code>ct.rpl</code>: True for a reply flow</li>
+            <li><code>ct.inv</code>: True for a connection entry in a bad state</li>
+          </ul>
+          <p>
+            <code>ct_state</code> and its subfields are initialized by the
+            <code>ct_next</code> action, described below.
+          </p>
+        </li>
       </ul>
 
       <p>
@@ -836,6 +853,37 @@
             <var>field1</var> and <var>field2</var> must modifiable.
           </p>
         </dd>
+
+        <dt><code>ct_next;</code></dt>
+        <dd>
+          <p>
+            Apply connection tracking to the flow, initializing
+            <code>ct_state</code> for matching in later tables.
+            Automatically moves on to the next table, as if followed by
+            <code>next</code>.
+          </p>
+
+          <p>
+            As a side effect, IP fragments will be reassembled for matching.
+            If a fragmented packet is output, then it will be sent with any
+            overlapping fragments squashed.  The connection tracking state is
+            scoped by the logical port, so overlapping addresses may be used.
+            To allow traffic related to the matched flow, execute
+            <code>ct_commit</code>.
+          </p>
+
+          <p>
+            It is possible to have actions follow <code>ct_next</code>,
+            but they will not have access to any of its side-effects and
+            is not generally useful.
+          </p>
+        </dd>
+
+        <dt><code>ct_commit;</code></dt>
+        <dd>
+          Commit the flow to the connection tracking entry associated
+          with it by a previous call to <code>ct_next</code>.
+        </dd>
       </dl>
 
       <p>
@@ -846,8 +894,6 @@
       <dl>
         <dt><code>learn</code></dt>
 
-        <dt><code>conntrack</code></dt>
-
         <dt><code>dec_ttl { <var>action</var>, </code>...<code> } { <var>action</var>; </code>...<code>};</code></dt>
         <dd>
           decrement TTL; execute first set of actions if
diff --git a/tests/ovn.at b/tests/ovn.at
index b8b9e36..1c41a30 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -439,6 +439,8 @@ AT_DATA([test-cases.txt], [[
 # Positive tests.
 drop; => actions=drop, prereqs=1
 next; => actions=resubmit(,11), prereqs=1
+ct_next; => actions=ct(table=11,zone=NXM_NX_REG5[0..15]), prereqs=ip
+ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip
 output; => actions=resubmit(,64), prereqs=1
 outport="eth0"; next; outport="LOCAL"; next; => actions=set_field:0x5->reg7,resubmit(,11),set_field:0xfffe->reg7,resubmit(,11), prereqs=1
 tcp.dst=80; => actions=set_field:80->tcp_dst, prereqs=ip.proto == 0x6 && (eth.type == 0x800 || eth.type == 0x86dd)
diff --git a/tests/test-ovn.c b/tests/test-ovn.c
index 774ebdf..440e84a 100644
--- a/tests/test-ovn.c
+++ b/tests/test-ovn.c
@@ -1208,7 +1208,7 @@ static void
 test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
 {
     struct shash symtab;
-    struct simap ports;
+    struct simap ports, ct_zones;
     struct ds input;
 
     create_symtab(&symtab);
@@ -1217,6 +1217,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
     simap_put(&ports, "eth0", 5);
     simap_put(&ports, "eth1", 6);
     simap_put(&ports, "LOCAL", ofp_to_u16(OFPP_LOCAL));
+    simap_init(&ct_zones);
 
     ds_init(&input);
     while (!ds_get_test_line(&input, stdin)) {
@@ -1225,8 +1226,8 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
         char *error;
 
         ofpbuf_init(&ofpacts, 0);
-        error = actions_parse_string(ds_cstr(&input), &symtab, &ports, 11, 64,
-                                     &ofpacts, &prereqs);
+        error = actions_parse_string(ds_cstr(&input), &symtab, &ports,
+                                     &ct_zones, 11, 64, &ofpacts, &prereqs);
         if (!error) {
             struct ds output;
 
@@ -1252,6 +1253,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
     ds_destroy(&input);
 
     simap_destroy(&ports);
+    simap_destroy(&ct_zones);
     expr_symtab_destroy(&symtab);
     shash_destroy(&symtab);
 }
-- 
1.7.5.4




More information about the dev mailing list