[ovs-dev] [PATCH RFC] dpif-netdev: ACL+dpcls for Wildcard matching.

antonio.fischetti at intel.com antonio.fischetti at intel.com
Wed Apr 13 09:45:09 UTC 2016


The purpose of this implementation is to improve the performance
of wildcard matching in user-space.
This RFC patch shows the basic functionality, some aspects were not
covered yet.

I would like to get some feedback on whether people think integrating
the DPDK ACL table in this manner is potentially a good solution or not.

DPDK ACL tables show a better performance on lookup operations than the
Classifier.  However their insertion time for new rules is unacceptable.
This solution attempts to combine the better performance of ACL lookups
with the lower insertion latency of the Classifier.

One ACL and one Classifier are being used and are interchangeable.
One table at a time is operating, the other one is kept in stand-by.
Lookups are performed on the operating table only.
Both tables must have the same content.

Whenever we need to insert a new batch of rules we first do that into
the Classifier, which becomes the operating table.
In the meantime the ACL gets updated by a separate thread.  When
it's ready, the ACL will become the operating table and all subsequent
lookups will be performed on the ACL.

EMC table is bypassed for debugging and testing purposes, so each
incoming packet will hit the wildcarded table (see define DEBUG_BYPASS_EMC).

PLEASE NOTE:
============
Some aspects were not implemented yet, so this RFC Patch has the
following limitations.

    - ACL Rules are not removed after a Flow deletion.
      That is due to a limitation of ACL tables.
      A solution for a future implementation can be: when we need to delete
      an ACL rule the dpcls becomes the operating table.  In the background
      the ACL gets populated from scratch and then rebuilt.

    - Flows can be installed with Rules on fields like:
      Protocol Type, IP src, IP dest, Port Src, Port Dest.
      In a later implementation all other fields will be added.

    - Works with 1 PMD only. Will be extended in a later implementation.

    - Builds with DPDK only.

    - The ACL builder thread model will be changed, so that the thread will be
      created once at the startup and will run all the time. More details
      in comments inside dpacl_build() and acl_builder_thread() functions.

Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
---
 lib/dpif-netdev.c | 634 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 633 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2870951..3d12fe8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -69,6 +69,12 @@
 #include "unixctl.h"
 #include "util.h"
 
+/* FIXME Other compiler guards like the following one
+ * will need to be added. */
+#ifdef DPDK_NETDEV
+#include "rte_acl.h"
+#endif /* DPDK_NETDEV */
+
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev);
@@ -78,6 +84,60 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MAX_RECIRC_DEPTH 5
 DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
+/* The next define is just for debugging/testing purposes, will be removed.
+ * When enabled, it forces to skip the EMC search and pretends it fails.
+ * So that any packet will be checked against the ACL/Classifier. */
+#define DEBUG_BYPASS_EMC
+
+/* Unique name for the ACL table. */
+#define ACL_NAME "OVS-DPDK"
+/* Setup nr of categories for ACL. */
+#define ACL_NR_CATEGORIES 1
+/* Max elements in the wait queue. */
+#define ACL_WAIT_QUEUE_MAX_LEN 100
+/* Number of elements in the intermediate table. */
+#define ACL_DPCLS_RULES_MAX 100
+
+/* The following offsets and bit postitions are hard-coded. They were
+ * calculated on the received miniflows values. */
+#define ACL_OFFSET_MASK_MAC_DST 32
+#define ACL_FIELD_IS_PRESENT_BIT_MAC 0x40
+#define ACL_FIELD_IS_PRESENT_BIT_IP 0x04
+
+#define ACL_OFFSET_IP_SRC 40
+#define ACL_OFFSET_IP_DST 44
+#define ACL_OFFSET_PROTO 55
+#define ACL_OFFSET_PORT_SRC 64
+#define ACL_OFFSET_PORT_DST (ACL_OFFSET_PORT_SRC + 2)
+
+/*
+ * Finite State Machine to keep track and manage ACL table setup, rule
+ * insertions and the swapping between the ACL and the Classifier.
+ */
+enum acl_state {
+    S_ACL_INIT,     /* Initial state.
+                     * The Classifier is the operating tbl.
+                     * No ACL has been created yet. */
+    S_ACL_STANDBY,  /* ACL contains no entry.
+                     * The Classifier is the operating tbl.
+                     * At the startup, after the ACL is created we enter
+                     * this state.  We can't perform lookups on ACL when
+                     * it's empty.  Rule insertions are allowed. */
+    S_ACL_BUILDING, /* New rules were inserted and ACL building is in progress.
+                     * The Classifier is the operating tbl.
+                     * ACL insertions are not allowed now, so any new rule
+                     * will be stored in a wait queue. */
+    S_ACL_OPERATING,/* ACL contains rules and was built.
+                     * The ACL is the operating table.
+                     * We keep staying in this state as long as we
+                     * don't need to insert new rules. */
+    S_ACL_ERROR,    /* Something wrong happened, stop using ACL.
+                     * The Classifier is the operating tbl. */
+                    /* FIXME Backfall to use the dpcls Classifier only.
+                     * Implemented but not tested. */
+    S_ACL_MAX
+};
+
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
 
@@ -163,6 +223,175 @@ struct dpcls_rule {
     /* 'flow' must be the last field, additional space is allocated here. */
 };
 
+/* Structure definition of an ACL rule. */
+RTE_ACL_RULE_DEF(acl_rule, RTE_ACL_MAX_FIELDS);
+
+/* Max nr of ACL rules. */
+#define	ACL_RULES_MAX 0x10000
+
+/*
+ * ACL Rule format definitions.
+ */
+
+/* Rule Search Key structure.
+ * FIXME It's just a 5-tuple for now. */
+enum {
+    ACL_KEYFIELD_IP_PROTO,
+    ACL_KEYFIELD_IP_SRC,
+    ACL_KEYFIELD_IP_DST,
+    ACL_KEYFIELD_PORT_SRC,
+    ACL_KEYFIELD_PORT_DST,
+    ACL_KEYFIELD_MAX
+};
+
+struct acl_search_key {
+    uint8_t ip_proto;
+    uint32_t ip_src;
+    uint32_t ip_dst;
+    uint16_t prt_src;
+    uint16_t prt_dst;
+};
+
+/* As for ACL design the 1st element of the search key must be 8-bit long.
+ * All other elements are instead 32-bit long.
+ * We will accomodate our variables into the search key in the following way.
+ *
+ *  idx    Variable                 Element length
+ * +----+-------------------------+----------------+
+ * | 0  |  Protocol Type          |    8-bit long  |
+ * +----+-------------------------+----------------+
+ * | 1  |  IP Src                 |   32-bit long  |
+ * +----+-------------------------+----------------+
+ * | 2  |  IP Dst                 |   32-bit long  |
+ * +----+-------------------------+----------------+
+ * | 3  |  Port Src and Port Dst  |   32-bit long  |
+ * +----+-------------------------+----------------+
+ */
+struct rte_acl_field_def rule_defs[ACL_KEYFIELD_MAX] = {
+    {   /* Protocol type. */
+        .type = RTE_ACL_FIELD_TYPE_BITMASK,
+        .size = sizeof(uint8_t),
+        .field_index = ACL_KEYFIELD_IP_PROTO,
+        .input_index = 0,
+        .offset = offsetof(struct acl_search_key, ip_proto),
+    },
+    {   /* IP src. */
+        .type = RTE_ACL_FIELD_TYPE_BITMASK,
+        .size = sizeof(uint32_t),
+        .field_index = ACL_KEYFIELD_IP_SRC,
+        .input_index = 1,
+        .offset = offsetof(struct acl_search_key, ip_src),
+    },
+    {   /* IP dst */
+        .type = RTE_ACL_FIELD_TYPE_BITMASK,
+        .size = sizeof(uint32_t),
+        .field_index = ACL_KEYFIELD_IP_DST,
+        .input_index = 2,
+        .offset = offsetof(struct acl_search_key, ip_dst),
+    },
+    {   /* Port Src */
+        .type = RTE_ACL_FIELD_TYPE_BITMASK,
+        .size = sizeof(uint16_t),
+        .field_index = ACL_KEYFIELD_PORT_SRC,
+        /* Port Src and Port Dst will be accomodated into
+         * the same element of the search key. */
+        .input_index = 3,
+        .offset = offsetof(struct acl_search_key, prt_src),
+    },
+    {   /* Port Dst */
+        .type = RTE_ACL_FIELD_TYPE_BITMASK,
+        .size = sizeof(uint16_t),
+        .field_index = ACL_KEYFIELD_PORT_DST,
+        /* Port Src and Port Dst will be accomodated into
+         * the same element of the search key. */
+        .input_index = 3,
+        .offset = offsetof(struct acl_search_key, prt_dst),
+    },
+};
+
+/* FIXME This RFC patch works with 1 PMD only.
+ * Each PMD should have its own ACL table.  All the content of the struct
+ * acl_info will be moved inside struct dp_netdev_pmd_thread. */
+
+/* Collects info about the ACL, the intermediate table, the Finite State
+ * Machine. */
+struct acl_info {
+    /* Generic ACL info. */
+    struct rte_acl_ctx *ctx;    /* DPDK ACL context. */
+    struct rte_acl_config cfg;  /* DPDK ACL table configuration info. */
+    enum acl_state state;       /* Current ACL state. */
+    bool added_new_rules;
+
+    /* Locking mechanism */
+    /* FIXME At this stage of the implementation:
+     * - I'm not considering the deletion of old unused flows, when
+     *   dpif_netdev_flow_del() is called it will have no effect on ACL.
+     * - I read/change the state only in those functions called by the
+     *   PMD thread.
+     * So for now a locking mechanism to r/w the state is not needed. */
+
+    /* Builder Thread */
+    pthread_t acl_builder_thread_id; /* ID of the thread that builds ACL. */
+    int build_ret;           /* Return code from the build attempt. */
+    bool build_attempt_finished;    /* Insert and Build attempt are done. */
+
+    /* Intermediate table. Stores dpcls_rule pointers. */
+    void *dpcls_rules[ACL_DPCLS_RULES_MAX];
+    uint32_t dpcls_rules_idx;     /* Index to the Intermediate table. */
+
+    /* Wait queue */
+    /* New rules can't be inserted into ACL when a building is in progress.
+     * So they are stored in a ring buffer as a waiting queue. */
+    struct acl_rule wait_queue[ACL_WAIT_QUEUE_MAX_LEN]; /* Wait queue. */
+    uint32_t wait_q_head;        /* Index to the current entry to be read. */
+    uint32_t wait_q_next_free;   /* Index to the 1st free location. */
+} acl_info = {
+    .ctx = 0,
+    .added_new_rules = false,
+    .state = S_ACL_INIT,
+    .build_attempt_finished = false,
+    .dpcls_rules_idx = 0,
+    .wait_q_head = 0,
+    .wait_q_next_free = 0,
+};
+
+static inline bool
+dpacl_is_build_finished(void)
+{
+    return acl_info.build_attempt_finished;
+}
+
+static inline void
+dpacl_clear_build_finished(void)
+{
+    acl_info.build_attempt_finished = false;
+}
+
+static inline void
+dpacl_set_build_finished(void)
+{
+    acl_info.build_attempt_finished = true;
+}
+
+static bool
+dpacl_rules_are_in_q(void)
+{
+    if (acl_info.state == S_ACL_ERROR) {
+        return false;
+    }
+
+    return acl_info.wait_q_next_free != acl_info.wait_q_head;
+}
+
+static void dpacl_build(void);
+static void dpacl_destroy(void);
+static void dpacl_init(void);
+static void dpacl_insert(struct dpcls *cls, struct dpcls_rule *rule,
+                         const struct netdev_flow_key *mask);
+static void dpacl_insert_from_q(void);
+static bool dpacl_lookup(const struct netdev_flow_key keys[],
+                         struct dpcls_rule **rules, const size_t cnt);
+static void dpacl_process_wait_queue(void);
 static void dpcls_init(struct dpcls *);
 static void dpcls_destroy(struct dpcls *);
 static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
@@ -1450,6 +1679,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
     cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
     flow->dead = true;
 
+    /* FIXME The ACL deletion is not implemented yet. */
     dp_netdev_flow_unref(flow);
 }
 
@@ -1750,6 +1980,14 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
     struct emc_entry *to_be_replaced = NULL;
     struct emc_entry *current_entry;
 
+#ifdef DEBUG_BYPASS_EMC
+    /* Just for debugging/testing purposes.
+     * ------------------------------------
+     * We skip the EMC insertion. */
+
+    return;
+#endif
+
     EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
         if (netdev_flow_key_equal(&current_entry->key, key)) {
             /* We found the entry with the 'mf' miniflow */
@@ -1777,6 +2015,15 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
 {
     struct emc_entry *current_entry;
 
+#ifdef DEBUG_BYPASS_EMC
+    /* Just for debugging/testing purposes.
+     * ------------------------------------
+     * We skip the EMC search and pretend it fails.
+     * This way any packet will be checked against the ACL/Classifier. */
+
+    return NULL;
+#endif
+
     EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
         if (current_entry->key.hash == key->hash
             && emc_entry_alive(current_entry)
@@ -2031,6 +2278,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 
     netdev_flow_key_init_masked(&flow->cr.flow, &match->flow, &mask);
     dpcls_insert(&pmd->cls, &flow->cr, &mask);
+    /* Both tables must have the same content. */
+    dpacl_insert(&pmd->cls, &flow->cr, &mask);
 
     cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
                 dp_netdev_flow_hash(&flow->ufid));
@@ -2571,9 +2820,20 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
     if (!error) {
         *recirc_depth_get() = 0;
 
+        if ((acl_info.state == S_ACL_BUILDING) && dpacl_is_build_finished()) {
+            dpacl_clear_build_finished();
+            if (!acl_info.build_ret) {
+                acl_info.state = S_ACL_OPERATING;
+            } else {
+                acl_info.state = S_ACL_ERROR; /* FIXME Fallback on dpcls. */
+                VLOG_ERR("%s", rte_strerror(-acl_info.build_ret));
+            }
+        }
         cycles_count_start(pmd);
         dp_netdev_input(pmd, packets, cnt, port->port_no);
         cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
+
+        dpacl_process_wait_queue();
     } else if (error != EAGAIN && error != EOPNOTSUPP) {
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
@@ -2852,6 +3112,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     dpcls_init(&pmd->cls);
     cmap_init(&pmd->flow_table);
     ovs_list_init(&pmd->poll_list);
+    dpacl_init();
     /* init the 'flow_cache' since there is no
      * actual thread created for NON_PMD_CORE_ID. */
     if (core_id == NON_PMD_CORE_ID) {
@@ -2867,6 +3128,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
     dp_netdev_pmd_flow_flush(pmd);
     dpcls_destroy(&pmd->cls);
     cmap_destroy(&pmd->flow_table);
+    dpacl_destroy();
     ovs_mutex_destroy(&pmd->flow_mutex);
     latch_destroy(&pmd->exit_latch);
     xpthread_cond_destroy(&pmd->cond);
@@ -3458,6 +3720,41 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
     return n_missed;
 }
 
+/* Thread to build the ACL table.
+ * FIXME Should implement a different mechanism, for example:
+ *  - enter a loop and wait for a signal.
+ *  - when it's signaled do a build.
+ *  - restart a new loop and wait. */
+static void*
+acl_builder_thread(void *arg)
+{
+    struct rte_acl_ctx *ctx = (struct rte_acl_ctx *)arg;
+    /* Build ACL table.  This function is not multi-thread safe. */
+    acl_info.build_ret = rte_acl_build(ctx, &acl_info.cfg);
+
+    dpacl_set_build_finished();
+
+    return NULL;
+}
+
+/*
+ * Triggers the ACL building by launching a separate thread and returns.
+ */
+static void
+dpacl_build(void)
+{
+    dpacl_clear_build_finished();    /* Reset synchro mechanism. */
+
+    /* FIXME We shouldn't create a new thread everytime we need a rebuild.
+     * Instead, the 'ACL Builder' thread should be created only one time
+     * at the start up.  When it starts it enters a loop and waits.  It awakes
+     * when it is signaled by the PMD thread and builds the ACL.  After the
+     * build is done it starts a new loop and waits until it gets signaled
+     * again. */
+    acl_info.acl_builder_thread_id = ovs_thread_create("acl_builder",
+        acl_builder_thread, acl_info.ctx);
+}
+
 static inline void
 fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                      struct dp_packet **packets, size_t cnt,
@@ -3481,7 +3778,14 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         /* Key length is needed in all the cases, hash computed on demand. */
         keys[i].len = netdev_flow_key_size(miniflow_n_values(&keys[i].mf));
     }
-    any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
+
+    /* Lookup is performed on the operating table only. */
+    if (acl_info.state == S_ACL_OPERATING) {
+        any_miss = !dpacl_lookup(keys, rules, cnt);
+    } else {
+        any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
+    }
+
     if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
         uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
         struct ofpbuf actions, put_actions;
@@ -3557,6 +3861,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                     netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
                                                      add_actions->data,
                                                      add_actions->size);
+                    acl_info.added_new_rules = true;
                 }
                 ovs_mutex_unlock(&pmd->flow_mutex);
 
@@ -4047,6 +4352,55 @@ struct dpcls_subtable {
     /* 'mask' must be the last field, additional space is allocated here. */
 };
 
+/* Create and setup the ACL table. */
+static void
+dpacl_init(void)
+{
+    int ret;
+
+    struct rte_acl_param table_info;
+
+    if (acl_info.state != S_ACL_INIT) {
+        return;
+    }
+
+    acl_info.dpcls_rules_idx = 0;
+    memset(&acl_info.cfg, 0, sizeof(acl_info.cfg));
+    acl_info.cfg.num_fields = RTE_DIM(rule_defs);
+    memcpy(&acl_info.cfg.defs, rule_defs, sizeof(rule_defs));
+    acl_info.cfg.num_categories = ACL_NR_CATEGORIES;
+    acl_info.cfg.max_size = SIZE_MAX;
+
+    /* setup ACL creation parameters. */
+    table_info.name = ACL_NAME;
+    table_info.socket_id = SOCKET_ID_ANY;
+    table_info.rule_size = RTE_ACL_RULE_SZ(acl_info.cfg.num_fields);
+    table_info.max_rule_num = ACL_RULES_MAX;
+
+    acl_info.ctx = rte_acl_create(&table_info);
+    if (!acl_info.ctx) {
+        acl_info.state = S_ACL_ERROR;
+        /* FIXME should fall back on using the dpcls classifier only. */
+        rte_exit(rte_errno, "Failed to create ACL context\n");
+        return;
+    }
+
+    /* set default classify method for this context. */
+    ret = rte_acl_set_ctx_classify(acl_info.ctx, RTE_ACL_CLASSIFY_SSE);
+    if (ret) {
+        acl_info.state = S_ACL_ERROR;
+        /* FIXME fallback on the dpcls classifier instead of rte_exit(). */
+        rte_exit(ret, "failed to setup SSE method for ACL context\n");
+        return;
+    }
+
+    /* We're not yet allowed to perform lookups on ACL.  We can do that
+     * only after some rules aX inserted and the table is built. Otherwise
+     * a crash will occur.
+     * The Classifier is now the operating table. */
+    acl_info.state = S_ACL_STANDBY;
+}
+
 /* Initializes 'cls' as a classifier that initially contains no classification
  * rules. */
 static void
@@ -4057,6 +4411,17 @@ dpcls_init(struct dpcls *cls)
 }
 
 static void
+dpacl_destroy(void)
+{
+    if (!acl_info.ctx) {
+        return;
+    }
+
+    rte_acl_free(acl_info.ctx);
+    acl_info.state = S_ACL_INIT;
+}
+
+static void
 dpcls_destroy_subtable(struct dpcls *cls, struct dpcls_subtable *subtable)
 {
     pvector_remove(&cls->subtables, subtable);
@@ -4115,6 +4480,171 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
     return dpcls_create_subtable(cls, mask);
 }
 
+/* Insert 'rule' from wait queue into ACL table. */
+static void
+dpacl_insert_from_q(void)
+{
+    int rc = 0;
+
+    if (acl_info.state == S_ACL_ERROR) {
+        return;
+    }
+
+    while (dpacl_rules_are_in_q()) {
+        /* This function is not multi-thread safe. */
+        rc = rte_acl_add_rules(acl_info.ctx,
+            (struct rte_acl_rule *)&acl_info.wait_queue[acl_info.wait_q_head],
+             1);
+        if (rc != 0) {
+            VLOG_WARN("%s rte_acl_add_rules ret %d", __func__, rc);
+            /* FIXME fallback on the dpcls classifier. */
+            acl_info.state = S_ACL_ERROR;
+            break;
+        }
+        acl_info.wait_q_head =
+            (acl_info.wait_q_head + 1) % ACL_WAIT_QUEUE_MAX_LEN;
+    }
+}
+
+/* Insert 'rule' into ACL table or store into the wait queue if a build is
+ * in progress. */
+static void
+dpacl_insert(struct dpcls *cls, struct dpcls_rule *rule,
+             const struct netdev_flow_key *mask)
+{
+    int rc = 0;
+    struct acl_rule acl_entry;
+    uint8_t * p_mf;
+    uint8_t * p_val;
+    uint8_t * p_mask;
+
+    if (acl_info.state == S_ACL_ERROR) {
+        return;
+    }
+
+    /* dpcls_rules_idx is pointing to the next free location. */
+    if (acl_info.dpcls_rules_idx >= ACL_DPCLS_RULES_MAX) {
+        VLOG_ERR("ACL intermediate table is full! "
+                 "Reached max nr. of entries: %d.", ACL_DPCLS_RULES_MAX);
+        acl_info.state = S_ACL_ERROR;
+        return;
+    }
+
+    /* We need to fill in each field of the ACL entry with a value/mask pair.
+     * Values and their masks will be retrieved from 'rule' and 'mask'
+     * input parameters. */
+
+    /* FIXME I setup the traffic generator to send Eth-II/IPv4/UDP packets
+     * with no tunneling.
+     * Many offsets are hardcoded and are specific to the traffic I used. */
+    struct dpcls_subtable *subtable = dpcls_find_subtable(cls, mask);
+
+    memset(&acl_entry, 0, sizeof(acl_entry));
+    p_mf = (uint8_t *)&rule->flow.mf;
+    p_val = (uint8_t *)&rule->flow.mf;
+    /* Skip the map section. */
+    p_val += ACL_OFFSET_MASK_MAC_DST;
+    p_mask = (uint8_t *)&subtable->mask.mf;
+    p_mask += ACL_OFFSET_MASK_MAC_DST;
+
+    /* Are MAC fields present? */
+    /* FIXME Hardcoded offsets are specific to the traffic I used. */
+    p_mf += 6;
+    if (*p_mf & ACL_FIELD_IS_PRESENT_BIT_MAC) {
+        /* FIXME read MAC values. */
+        p_val += ETH_ADDR_LEN * 2;
+        p_mask += ETH_ADDR_LEN * 2;
+    } else {
+        p_val += 4;
+        p_mask += 4;
+    }
+
+    /* Skip EthType field '08 00 00 00'. */
+    p_val += 4;
+    p_mask += 4;
+
+    /* Are IP fields present? */
+    p_mf++;
+    if (*p_mf & ACL_FIELD_IS_PRESENT_BIT_IP) {
+        /* FIXME this works with IPv4 only. */
+        /* ACL_KEYFIELD_IP_SC */
+        memcpy(&acl_entry.field[ACL_KEYFIELD_IP_SRC].value.u32, p_val, 4);
+        memcpy(&acl_entry.field[ACL_KEYFIELD_IP_SRC].mask_range.u32,
+            p_mask, 4);
+        p_val += sizeof(ovs_be32);
+        p_mask += sizeof(ovs_be32);
+
+        /* ACL_KEYFIELD_IP_DST */
+        memcpy(&acl_entry.field[ACL_KEYFIELD_IP_DST].value.u32, p_val, 4);
+        memcpy(&acl_entry.field[ACL_KEYFIELD_IP_DST].mask_range.u32,
+            p_mask, 4);
+        p_val += sizeof(ovs_be32);
+        p_mask += sizeof(ovs_be32);
+    }
+
+    p_mf++;
+
+    /* Read Protocol type. */
+    p_val += 7;
+    p_mask += 7;
+
+    /* ACL_KEYFIELD_IP_PROTO */
+    acl_entry.field[ACL_KEYFIELD_IP_PROTO].value.u8 = *p_val;
+    acl_entry.field[ACL_KEYFIELD_IP_PROTO].mask_range.u8 = *p_mask;
+
+    /* Read TCP/UDP ports. */
+    p_val += 1;
+    p_mask += 1;
+
+    /* ACL_KEYFIELD_PORT_SRC */
+    memcpy(&acl_entry.field[ACL_KEYFIELD_PORT_SRC].value.u16, p_val, 2);
+    memcpy(&acl_entry.field[ACL_KEYFIELD_PORT_SRC].mask_range.u16, p_mask, 2);
+
+    /* ACL_KEYFIELD_PORT_DST */
+    memcpy(&acl_entry.field[ACL_KEYFIELD_PORT_DST].value.u16, p_val, 2);
+    memcpy(&acl_entry.field[ACL_KEYFIELD_PORT_DST].mask_range.u16, p_mask, 2);
+
+    /* FIXME read priority. */
+    acl_entry.data.priority = 3;
+    acl_entry.data.category_mask = RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES,
+        typeof(acl_entry.data.category_mask));
+    /* ACL table accepts 32-bit values in its userdata field.  We need instead
+     * to save a pointer to the dpcls_rule, which is 64-bit.  So we use an
+     * intermediate table to store the pointers, while the ACL will store its
+     * indexes.
+     * Furthermore ACL can't store 0 values as userdata.  So we will store
+     * the index + 1. */
+
+    acl_info.dpcls_rules[acl_info.dpcls_rules_idx] = rule;
+
+    /* Save index + 1 into ACL rule. */
+    /* FIXME This looks a bit prone to error. Maybe we could never use the 0-th
+     * element of dpcls_rules[] so to have a 1:1 index correspondance between
+     * acl_entry.data.userdata and acl_info.dpcls_rules_idx ? */
+    acl_info.dpcls_rules_idx++;
+    acl_entry.data.userdata = acl_info.dpcls_rules_idx;
+
+    if (acl_info.state == S_ACL_BUILDING) {
+        /* Add into the wait queue. */
+        memcpy(&acl_info.wait_queue[acl_info.wait_q_next_free],
+            &acl_entry, sizeof(struct acl_rule));
+        acl_info.wait_q_next_free =
+            (acl_info.wait_q_next_free + 1) % ACL_WAIT_QUEUE_MAX_LEN;
+        if (acl_info.wait_q_next_free == acl_info.wait_q_head) {
+            VLOG_WARN("Wait Q FULL!");
+            acl_info.state = S_ACL_ERROR;
+        }
+    } else {
+        /* This function is not multi-thread safe. */
+        rc = rte_acl_add_rules(acl_info.ctx,
+            (struct rte_acl_rule *)&acl_entry, 1);
+        if (rc != 0) {
+            VLOG_WARN("%s rte_acl_add_rules ret %d", __func__, rc);
+            acl_info.state = S_ACL_ERROR;
+        }
+    }
+}
+
 /* Insert 'rule' into 'cls'. */
 static void
 dpcls_insert(struct dpcls *cls, struct dpcls_rule *rule,
@@ -4161,6 +4691,108 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
     return true;
 }
 
+static void
+dpacl_process_wait_queue(void)
+{
+    if (acl_info.state != S_ACL_BUILDING && acl_info.state != S_ACL_ERROR) {
+        /* ACL building is not in progress.  In case some rules are
+         * waiting in their queue, we can insert them now into ACL. */
+        if (dpacl_rules_are_in_q()) {
+            dpacl_insert_from_q();
+            acl_info.added_new_rules = true;
+        }
+        /* If we inserted new rules into ACL - either by dpacl_insert()
+         * or by dpacl_insert_from_q() - we need to start a new build. */
+        if (acl_info.added_new_rules) {
+            acl_info.added_new_rules = false;
+            acl_info.state = S_ACL_BUILDING;
+            dpacl_build(); /* Triggers a thread and returns. */
+        }
+    }
+}
+
+static bool
+dpacl_lookup(const struct netdev_flow_key keys[],
+             struct dpcls_rule **rules, const size_t cnt)
+{
+    const uint8_t *data[cnt]; /* ptrs to n keys */
+    struct acl_search_key acl_key[cnt];
+    uint32_t results[cnt];
+    int i;
+    uint8_t * p_src;
+    uint8_t * p;
+    int ret;
+    bool all_found = true;
+    int rules_idx;
+
+    if (acl_info.state == S_ACL_ERROR) {
+        return false;
+    }
+
+    memset(rules, 0, cnt * sizeof(*rules));
+
+    for (i = 0; i < cnt; i++) {
+        /* Fill search key */
+        p_src = (uint8_t *)&keys[i].mf;
+        acl_key[i].ip_proto = 0;
+
+        acl_key[i].ip_proto = *(p_src  + ACL_OFFSET_PROTO);
+
+        /* Revert the byte order before lookup. */
+        p = (uint8_t *)&acl_key[i].ip_src;
+        *(p + 0) = *(p_src  + ACL_OFFSET_IP_SRC + 3);
+        *(p + 1) = *(p_src  + ACL_OFFSET_IP_SRC + 2);
+        *(p + 2) = *(p_src  + ACL_OFFSET_IP_SRC + 1);
+        *(p + 3) = *(p_src  + ACL_OFFSET_IP_SRC + 0);
+
+        p = (uint8_t *)&acl_key[i].ip_dst;
+        *(p + 0) = *(p_src  + ACL_OFFSET_IP_DST + 3);
+        *(p + 1) = *(p_src  + ACL_OFFSET_IP_DST + 2);
+        *(p + 2) = *(p_src  + ACL_OFFSET_IP_DST + 1);
+        *(p + 3) = *(p_src  + ACL_OFFSET_IP_DST + 0);
+
+        p = (uint8_t *)&acl_key[i].prt_src;
+        *(p + 0) = *(p_src  + ACL_OFFSET_PORT_SRC + 1);
+        *(p + 1) = *(p_src  + ACL_OFFSET_PORT_SRC + 0);
+
+        p = (uint8_t *)&acl_key[i].prt_dst;
+        *(p + 0) = *(p_src  + ACL_OFFSET_PORT_DST + 1);
+        *(p + 1) = *(p_src  + ACL_OFFSET_PORT_DST + 0);
+
+        data[i] = (const uint8_t *)&acl_key[i];
+    }
+
+    ret = rte_acl_classify(acl_info.ctx, data, results, cnt,
+        ACL_NR_CATEGORIES);
+
+    if (ret != 0) {
+        VLOG_WARN("Classify call returns %d", ret);
+        acl_info.state = S_ACL_ERROR;
+        /* FIXME fallback on the dpcls classifier. */
+        all_found = false;
+    } else {
+        all_found = true;
+
+        for (int i_result = 0; i_result < cnt; i_result++) {
+            if (!results[i_result]) {  /* Not found. */
+                all_found = false;
+                continue;
+            }
+
+            rules_idx = results[i_result] - 1;
+
+            if (rules_idx < 0 || rules_idx >= ACL_DPCLS_RULES_MAX) {
+                all_found = false;
+                VLOG_WARN("|  Got idx %d for Pkt %d", rules_idx, i_result);
+                continue;
+            }
+            rules[i_result] = acl_info.dpcls_rules[rules_idx];
+        }
+    }
+
+    return all_found;
+}
+
 /* For each miniflow in 'flows' performs a classifier lookup writing the result
  * into the corresponding slot in 'rules'.  If a particular entry in 'flows' is
  * NULL it is skipped.
-- 
1.9.3




More information about the dev mailing list