[ovs-dev] [v6 01/11] dpif-netdev: Add command line and function pointer for miniflow extract

Cian Ferriter cian.ferriter at intel.com
Tue Jul 6 13:11:40 UTC 2021


From: Kumar Amber <kumar.amber at intel.com>

This patch introduces the mfex function pointers which allows
the user to switch between different miniflow extract implementations
which are provided by the OVS based on optimized ISA CPU.

The user can query for the available minflow extract variants available
for that CPU by following commands:

$ovs-appctl dpif-netdev/miniflow-parser-get

Similarly an user can set the miniflow implementation by the following
command :

$ ovs-appctl dpif-netdev/miniflow-parser-set name

This allows for more performance and flexibility to the user to choose
the miniflow implementation according to the needs.

Signed-off-by: Kumar Amber <kumar.amber at intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren at intel.com>
Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>

---

v5:
- fix review comments(Ian, Flavio, Eelco)
- add enum to hold mfex indexes
- add new get and set implemenatations
- add Atomic set and get
---
---
 NEWS                              |   1 +
 lib/automake.mk                   |   2 +
 lib/dpif-netdev-avx512.c          |  32 +++++-
 lib/dpif-netdev-private-extract.c | 159 ++++++++++++++++++++++++++++++
 lib/dpif-netdev-private-extract.h | 105 ++++++++++++++++++++
 lib/dpif-netdev-private-thread.h  |   8 ++
 lib/dpif-netdev.c                 | 127 +++++++++++++++++++++++-
 7 files changed, 429 insertions(+), 5 deletions(-)
 create mode 100644 lib/dpif-netdev-private-extract.c
 create mode 100644 lib/dpif-netdev-private-extract.h

diff --git a/NEWS b/NEWS
index be96fc57f..60db823c4 100644
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,7 @@ Post-v2.15.0
      * Enable the AVX512 DPCLS implementation to use VPOPCNT instruction if the
        CPU supports it. This enhances performance by using the native vpopcount
        instructions, instead of the emulated version of vpopcount.
+     * Add command line option to switch between mfex function pointers.
    - ovs-ctl:
      * New option '--no-record-hostname' to disable hostname configuration
        in ovsdb on startup.
diff --git a/lib/automake.mk b/lib/automake.mk
index 49f42c2a3..6657b9ae5 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/dpif-netdev-private-dpcls.h \
 	lib/dpif-netdev-private-dpif.c \
 	lib/dpif-netdev-private-dpif.h \
+	lib/dpif-netdev-private-extract.c \
+	lib/dpif-netdev-private-extract.h \
 	lib/dpif-netdev-private-flow.h \
 	lib/dpif-netdev-private-hwol.h \
 	lib/dpif-netdev-private-thread.h \
diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 9a5189145..91fad92db 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -149,6 +149,16 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
      *     // do all processing (HWOL->MFEX->EMC->SMC)
      * }
      */
+
+    /* Do a batch minfilow extract into keys. */
+    uint32_t mf_mask = 0;
+    miniflow_extract_func mfex_func;
+    atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
+    if (mfex_func) {
+        mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+    }
+
+    /* Perform first packet interation. */
     uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1;
     uint32_t iter = lookup_pkts_bitmask;
     while (iter) {
@@ -167,6 +177,13 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
         pkt_metadata_init(&packet->md, in_port);
 
         struct dp_netdev_flow *f = NULL;
+        struct netdev_flow_key *key = &keys[i];
+
+        /* Check the minfiflow mask to see if the packet was correctly
+         * classifed by vector mfex else do a scalar miniflow extract
+         * for that packet.
+         */
+        uint32_t mfex_hit = (mf_mask & (1 << i));
 
         /* Check for a partial hardware offload match. */
         if (hwol_enabled) {
@@ -177,7 +194,13 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
             }
             if (f) {
                 rules[i] = &f->cr;
-                pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
+                /* If AVX512 MFEX already classified the packet, use it. */
+                if (mfex_hit) {
+                    pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf);
+                } else {
+                    pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
+                }
+
                 pkt_meta[i].bytes = dp_packet_size(packet);
                 phwol_hits++;
                 hwol_emc_smc_hitmask |= (1 << i);
@@ -185,9 +208,10 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
             }
         }
 
-        /* Do miniflow extract into keys. */
-        struct netdev_flow_key *key = &keys[i];
-        miniflow_extract(packet, &key->mf);
+        if (!mfex_hit) {
+            /* Do a scalar miniflow extract into keys. */
+            miniflow_extract(packet, &key->mf);
+        }
 
         /* Cache TCP and byte values for all packets. */
         pkt_meta[i].bytes = dp_packet_size(packet);
diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
new file mode 100644
index 000000000..f7ad2d5b5
--- /dev/null
+++ b/lib/dpif-netdev-private-extract.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2021 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "dp-packet.h"
+#include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-extract.h"
+#include "dpif-netdev-private-thread.h"
+#include "flow.h"
+#include "openvswitch/vlog.h"
+#include "ovs-thread.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
+
+/* Variable to hold the default mfex implementation. */
+static miniflow_extract_func default_mfex_func = NULL;
+
+/* Implementations of available extract options and
+ * the implementations are always in order of preference.
+ */
+static struct dpif_miniflow_extract_impl mfex_impls[] = {
+
+    [MFEX_IMPL_SCALAR] = {
+        .probe = NULL,
+        .extract_func = NULL,
+        .name = "scalar", },
+};
+
+BUILD_ASSERT_DECL(MFEX_IMPL_MAX >= ARRAY_SIZE(mfex_impls));
+
+void
+dpif_miniflow_extract_init(void)
+{
+    /* Call probe on each impl, and cache the result. */
+    uint32_t i;
+    for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
+        bool avail = true;
+        if (mfex_impls[i].probe) {
+            /* Return zero is success, non-zero means error. */
+            avail = (mfex_impls[i].probe() == 0);
+        }
+        VLOG_INFO("Miniflow Extract implementation %s (available: %s)\n",
+                  mfex_impls[i].name, avail ? "True" : "False");
+        mfex_impls[i].available = avail;
+    }
+}
+
+miniflow_extract_func
+dp_mfex_impl_get_default(void)
+{
+    /* For the first call, this will be NULL. Compute the compile time default.
+     */
+    if (!default_mfex_func) {
+
+        VLOG_INFO("Default MFEX implementation is %s.\n",
+                  mfex_impls[MFEX_IMPL_SCALAR].name);
+        default_mfex_func = mfex_impls[MFEX_IMPL_SCALAR].extract_func;
+    }
+
+    return default_mfex_func;
+}
+
+int32_t
+dp_mfex_impl_set_default_by_name(const char *name)
+{
+    miniflow_extract_func new_default;
+
+
+    int32_t err = dp_mfex_impl_get_by_name(name, &new_default);
+
+    if (!err) {
+        default_mfex_func = new_default;
+    }
+
+    return err;
+
+}
+
+uint32_t
+dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
+                 size_t n)
+{
+    /* Add all mfex functions to reply string. */
+    ds_put_cstr(reply, "Available MFEX implementations:\n");
+
+    for (uint32_t i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
+
+        ds_put_format(reply, "  %s (available: %s)(pmds: ",
+                      mfex_impls[i].name, mfex_impls[i].available ?
+                      "True" : "False");
+
+        for (size_t j = 0; j < n; j++) {
+            struct dp_netdev_pmd_thread *pmd = pmd_list[j];
+            if (pmd->core_id == NON_PMD_CORE_ID) {
+                continue;
+            }
+
+            if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
+                ds_put_format(reply, "%u,", pmd->core_id);
+            }
+        }
+
+        ds_chomp(reply, ',');
+
+        if (ds_last(reply) == ' ') {
+            ds_put_cstr(reply, "none");
+        }
+
+        ds_put_cstr(reply, ")\n");
+    }
+
+    return ARRAY_SIZE(mfex_impls);
+}
+
+/* This function checks all available MFEX implementations, and selects the
+ * returns the function pointer to the one requested by "name".
+ */
+int32_t
+dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func)
+{
+    if ((name == NULL) || (out_func == NULL)) {
+        return -EINVAL;
+    }
+
+    uint32_t i;
+
+    for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
+        if (strcmp(mfex_impls[i].name, name) == 0) {
+            /* Probe function is optional - so check it is set before exec. */
+            if (!mfex_impls[i].available) {
+                *out_func = NULL;
+                return -EINVAL;
+            }
+
+            *out_func = mfex_impls[i].extract_func;
+            return 0;
+        }
+    }
+
+    return -EINVAL;
+}
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
new file mode 100644
index 000000000..074b3ee16
--- /dev/null
+++ b/lib/dpif-netdev-private-extract.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MFEX_AVX512_EXTRACT
+#define MFEX_AVX512_EXTRACT 1
+
+#include <sys/types.h>
+
+/* Forward declarations. */
+struct dp_packet;
+struct miniflow;
+struct dp_netdev_pmd_thread;
+struct dp_packet_batch;
+struct netdev_flow_key;
+
+/* Function pointer prototype to be implemented in the optimized miniflow
+ * extract code.
+ * returns the hitmask of the processed packets on success.
+ * returns zero on failure.
+ */
+typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
+                                          struct netdev_flow_key *keys,
+                                          uint32_t keys_size,
+                                          odp_port_t in_port,
+                                          struct dp_netdev_pmd_thread
+                                          *pmd_handle);
+
+/* Probe function is used to detect if this CPU has the ISA required
+ * to run the optimized miniflow implementation.
+ * returns one on successful probe.
+ * returns zero on failure.
+ */
+typedef int32_t (*miniflow_extract_probe)(void);
+
+/* Structure representing the attributes of an optimized implementation. */
+struct dpif_miniflow_extract_impl {
+    /* When non-zero, this impl has passed the probe() checks. */
+    bool available;
+
+    /* Probe function is used to detect if this CPU has the ISA required
+     * to run the optimized miniflow implementation.
+     */
+    miniflow_extract_probe probe;
+
+    /* Optional function to call to extract miniflows for a burst of packets.
+     */
+    miniflow_extract_func extract_func;
+
+    /* Name of the optimized implementation. */
+    char *name;
+};
+
+
+/* Enum to hold implementation indexes. The list is traversed
+ * linearly as from the ISA perspective, the VBMI version
+ * should always come before the generic AVX512-F version.
+ */
+enum dpif_miniflow_extract_impl_idx {
+    MFEX_IMPL_SCALAR,
+    MFEX_IMPL_MAX
+};
+
+/* This function returns all available implementations to the caller. The
+ * quantity of implementations is returned by the int return value.
+ */
+uint32_t
+dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
+                 size_t n);
+
+/* This function checks all available MFEX implementations, and selects the
+ * returns the function pointer to the one requested by "name".
+ */
+int32_t
+dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func);
+
+/* Returns the default MFEX which is first ./configure selected, but can be
+ * overridden at runtime. */
+miniflow_extract_func dp_mfex_impl_get_default(void);
+
+/* Overrides the default MFEX with the user set MFEX. */
+int32_t dp_mfex_impl_set_default_by_name(const char *name);
+
+
+/* Initializes the available miniflow extract implementations by probing for
+ * the CPU ISA requirements. As the runtime available CPU ISA does not change
+ * and the required ISA of the implementation also does not change, it is safe
+ * to cache the probe() results, and not call probe() at runtime.
+ */
+void
+dpif_miniflow_extract_init(void);
+
+#endif /* MFEX_AVX512_EXTRACT */
diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index ba79c4a0a..a4c092b69 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -27,6 +27,11 @@
 #include <stdint.h>
 
 #include "cmap.h"
+
+#include "dpif-netdev-private-dfc.h"
+#include "dpif-netdev-private-dpif.h"
+#include "dpif-netdev-perf.h"
+#include "dpif-netdev-private-extract.h"
 #include "openvswitch/thread.h"
 
 #ifdef  __cplusplus
@@ -110,6 +115,9 @@ struct dp_netdev_pmd_thread {
     /* Pointer for per-DPIF implementation scratch space. */
     void *netdev_input_func_userdata;
 
+    /* Function pointer to call for miniflow_extract() functionality. */
+    ATOMIC(miniflow_extract_func) miniflow_extract_opt;
+
     struct seq *reload_seq;
     uint64_t last_reload_seq;
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6203cf656..2043c9ba2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -46,6 +46,7 @@
 #include "dpif.h"
 #include "dpif-netdev-lookup.h"
 #include "dpif-netdev-perf.h"
+#include "dpif-netdev-private-extract.h"
 #include "dpif-provider.h"
 #include "dummy.h"
 #include "fat-rwlock.h"
@@ -1069,6 +1070,97 @@ dpif_netdev_impl_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
     ds_destroy(&reply);
 }
 
+static void
+dpif_miniflow_extract_impl_get(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                               const char *argv[] OVS_UNUSED,
+                               void *aux OVS_UNUSED)
+{
+    struct ds reply = DS_EMPTY_INITIALIZER;
+    struct shash_node *node;
+    uint32_t count = 0;
+
+    SHASH_FOR_EACH (node, &dp_netdevs) {
+        struct dp_netdev *dp = node->data;
+
+        /* Get PMD threads list. */
+        size_t n;
+        struct dp_netdev_pmd_thread **pmd_list;
+        sorted_poll_thread_list(dp, &pmd_list, &n);
+        count = dp_mfex_impl_get(&reply, pmd_list, n);
+    }
+
+    if (count == 0) {
+        unixctl_command_reply_error(conn, "Error getting Mfex names.");
+    } else {
+        unixctl_command_reply(conn, ds_cstr(&reply));
+    }
+
+    ds_destroy(&reply);
+}
+
+static void
+dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
+                     const char *argv[], void *aux OVS_UNUSED)
+{
+    /* This function requires just one parameter, the miniflow name.
+     */
+    const char *mfex_name = argv[1];
+    struct shash_node *node;
+
+    static const char *error_description[2] = {
+        "Unknown miniflow implementation",
+        "implementation doesn't exist",
+    };
+
+    ovs_mutex_lock(&dp_netdev_mutex);
+    int32_t err = dp_mfex_impl_set_default_by_name(mfex_name);
+
+    if (err) {
+        struct ds reply = DS_EMPTY_INITIALIZER;
+        ds_put_format(&reply,
+                      "Miniflow implementation not available: %s %s.\n",
+                      error_description[ (err == EINVAL) ], mfex_name);
+        const char *reply_str = ds_cstr(&reply);
+        unixctl_command_reply_error(conn, reply_str);
+        VLOG_INFO("%s", reply_str);
+        ds_destroy(&reply);
+        ovs_mutex_unlock(&dp_netdev_mutex);
+        return;
+    }
+
+    SHASH_FOR_EACH (node, &dp_netdevs) {
+        struct dp_netdev *dp = node->data;
+
+        /* Get PMD threads list. */
+        size_t n;
+        struct dp_netdev_pmd_thread **pmd_list;
+        sorted_poll_thread_list(dp, &pmd_list, &n);
+
+        for (size_t i = 0; i < n; i++) {
+            struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+            if (pmd->core_id == NON_PMD_CORE_ID) {
+                continue;
+            }
+
+            /* Initialize MFEX function pointer to the newly configured
+             * default. */
+            miniflow_extract_func default_func = dp_mfex_impl_get_default();
+            atomic_uintptr_t *pmd_func = (void *) &pmd->miniflow_extract_opt;
+            atomic_store_relaxed(pmd_func, (uintptr_t) default_func);
+        };
+    }
+
+    ovs_mutex_unlock(&dp_netdev_mutex);
+
+    /* Reply with success to command. */
+    struct ds reply = DS_EMPTY_INITIALIZER;
+    ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
+    const char *reply_str = ds_cstr(&reply);
+    unixctl_command_reply(conn, reply_str);
+    VLOG_INFO("%s", reply_str);
+    ds_destroy(&reply);
+}
+
 static void
 dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
                           const char *argv[], void *aux OVS_UNUSED)
@@ -1298,6 +1390,13 @@ dpif_netdev_init(void)
     unixctl_command_register("dpif-netdev/dpif-impl-get", "",
                              0, 0, dpif_netdev_impl_get,
                              NULL);
+    unixctl_command_register("dpif-netdev/miniflow-parser-set",
+                             "miniflow implementation name",
+                             1, 1, dpif_miniflow_extract_impl_set,
+                             NULL);
+    unixctl_command_register("dpif-netdev/miniflow-parser-get", "",
+                             0, 0, dpif_miniflow_extract_impl_get,
+                             NULL);
     return 0;
 }
 
@@ -1499,6 +1598,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
 
     dp->conntrack = conntrack_init();
 
+    dpif_miniflow_extract_init();
+
     atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
     atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
 
@@ -6206,6 +6307,11 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     atomic_uintptr_t *pmd_func = (void *) &pmd->netdev_input_func;
     atomic_init(pmd_func, (uintptr_t) default_func);
 
+    /* Init default miniflow_extract function */
+    miniflow_extract_func mfex_func = dp_mfex_impl_get_default();
+    atomic_uintptr_t *pmd_func_mfex = (void *)&pmd->miniflow_extract_opt;
+    atomic_store_relaxed(pmd_func_mfex, (uintptr_t) mfex_func);
+
     /* init the 'flow_cache' since there is no
      * actual thread created for NON_PMD_CORE_ID. */
     if (core_id == NON_PMD_CORE_ID) {
@@ -6795,6 +6901,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0;
     struct dfc_cache *cache = &pmd->flow_cache;
     struct dp_packet *packet;
+    struct dp_packet_batch single_packet;
     const size_t cnt = dp_packet_batch_size(packets_);
     uint32_t cur_min = pmd->ctx.emc_insert_min;
     int i;
@@ -6803,6 +6910,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     size_t map_cnt = 0;
     bool batch_enable = true;
 
+    single_packet.count = 1;
+
+    miniflow_extract_func mfex_func;
+    atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
+
     atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
     pmd_perf_update_counter(&pmd->perf_stats,
                             md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
@@ -6853,7 +6965,20 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
             }
         }
 
-        miniflow_extract(packet, &key->mf);
+        /* Set the count and packet for miniflow_opt with batch_size 1. */
+        if ((mfex_func) && (!md_is_valid)) {
+            single_packet.packets[0] = packet;
+            int mf_ret;
+
+            mf_ret = mfex_func(&single_packet, key, 1, port_no, pmd);
+            /* Fallback to original miniflow_extract if there is a miss. */
+            if (!mf_ret) {
+                miniflow_extract(packet, &key->mf);
+            }
+        } else {
+            miniflow_extract(packet, &key->mf);
+        }
+
         key->len = 0; /* Not computed yet. */
         key->hash =
                 (md_is_valid == false)
-- 
2.32.0



More information about the dev mailing list