[ovs-dev] [PATCH v12 2/6] netdev-dpdk: Convert initialization from cmdline to db

Aaron Conole aconole at redhat.com
Fri Apr 29 13:36:36 UTC 2016


Daniele Di Proietto <diproiettod at vmware.com> writes:

> Hi Aaron,
>
> thanks (again!) for this patch. Few comments:
>
> * As I mentioned on my previous round of review, I don't think it's necessary to pass the whole Open_vSwitch table to dpdk_init(). I.e., instead of doing
>
>   dpdk_init(&cfg);
>
>   I'd prefer
>
>   if (cfg) {
>       dpdk_init(&cfg->other_config);
>   }
>
>  This way we don't have to include "vswitch-idl.h".
>
>
> * I suggested 'dpdk-mem-channels', because I think people who want to use that will be comfortable passing '-n' in dpdk-extra.  What do you think? Is there any reason why you think it's worth keeping?

D'oh! I had done both of these changes, but they were dropped during the
rebase. I'll cook a fix asap.

> * Sorry for not noticing this before: it seems that netdev_dpdk_register() now always registers the netdev classes, even though they cannot be created.  The registered classes end up in the database in the iface_type column of the Open_vSwitch table, so controllers might think that they're available.  I think we should register the classes only when DPDK is initialized.

I had an issue doing this, back when the I had the lazy
initialization. I don't remember the details, though. I'll try it again,
and see what happens.

> Two minor nits inline,
>
> Thanks

Thanks so much for the review, Daniele!

-Aaron

> On 26/04/2016 12:42, "Aaron Conole" <aconole at redhat.com> wrote:
>
>>Existing DPDK integration is provided by use of command line options which
>>must be split out and passed to librte in a special manner. However, this
>>forces any configuration to be passed by way of a special DPDK flag, and
>>interferes with ovs+dpdk packaging solutions.
>>
>>This commit delays dpdk initialization until after the OVS database
>>connection is established, at which point ovs initializes librte. It
>>pulls all of the config data from the OVS database, and assembles a
>>new argv/argc pair to be passed along.
>>
>>Signed-off-by: Aaron Conole <aconole at redhat.com>
>>Acked-by: Kevin Traynor <kevin.traynor at intel.com>
>>---
>>Previous: http://openvswitch.org/pipermail/dev/2016-April/069028.html
>>
>>v12:
>>* Squashed NEWS change into this commit
>>
>> FAQ.md                     |   6 +-
>> INSTALL.DPDK.md            |  82 +++++++++---
>> NEWS                       |   5 +
>> lib/automake.mk            |   4 +
>> lib/netdev-dpdk.c          | 304 +++++++++++++++++++++++++++++++++------------
>> lib/netdev-dpdk.h          |  14 +--
>> lib/netdev-nodpdk.c        |  21 ++++
>> tests/ofproto-macros.at    |   3 +-
>> utilities/ovs-dev.py       |  13 +-
>> vswitchd/bridge.c          |   3 +
>> vswitchd/ovs-vswitchd.8.in |   6 +-
>> vswitchd/ovs-vswitchd.c    |  27 +---
>> vswitchd/vswitch.xml       | 120 ++++++++++++++++++
>> 13 files changed, 463 insertions(+), 145 deletions(-)
>> create mode 100644 lib/netdev-nodpdk.c
>>
>
> [...]
>
>>
>>@@ -2732,22 +2734,20 @@ static const struct dpdk_qos_ops egress_policer_ops = {
>> 
>> static int
>> process_vhost_flags(char *flag, char *default_val, int size,
>>-                    char **argv, char **new_val)
>>+                    const struct ovsrec_open_vswitch *ovs_cfg,
>>+                    char **new_val)
>> {
>>+    const char *val;
>>     int changed = 0;
>> 
>>+    val = smap_get(&ovs_cfg->other_config, flag);
>>+
>>     /* Depending on which version of vhost is in use, process the vhost-specific
>>-     * flag if it is provided on the vswitchd command line, otherwise resort to
>>-     * a default value.
>>-     *
>>-     * For vhost-user: Process "-vhost_sock_dir" to set the custom location of
>>-     * the vhost-user socket(s).
>>-     * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the
>>-     * vhost-cuse character device.
>>+     * flag if it is provided, otherwise resort to default value.
>>      */
>>-    if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {
>>+    if (val && (strlen(val) <= size)) {
>>         changed = 1;
>>-        *new_val = xstrdup(argv[2]);
>>+        *new_val = xstrdup(val);
>>         VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
>>     } else {
>>         VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
>>@@ -2757,68 +2757,186 @@ process_vhost_flags(char *flag, char *default_val, int size,
>>     return changed;
>> }
>> 
>>-int
>>-dpdk_init(int argc, char **argv)
>>+static char **
>>+grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
>> {
>>-    int result;
>>-    int base = 0;
>>-    char *pragram_name = argv[0];
>>-    int err;
>>-    int isset;
>>-    cpu_set_t cpuset;
>>+    return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
>>+}
>> 
>>-    if (argc < 2 || strcmp(argv[1], "--dpdk"))
>>-        return 0;
>>+static void
>>+dpdk_option_extend(char ***argv, int argc, const char *option,
>>+                   const char *value)
>>+{
>>+    char **newargv = grow_argv(argv, argc, 2);
>>+    *argv = newargv;
>>+    newargv[argc] = xstrdup(option);
>>+    newargv[argc+1] = xstrdup(value);
>>+}
>> 
>>-    /* Remove the --dpdk argument from arg list.*/
>>-    argc--;
>>-    argv++;
>>+static int
>>+construct_dpdk_options(const struct ovsrec_open_vswitch *ovs_cfg,
>>+                       char ***argv, const int initial_size)
>>+{
>>+    struct dpdk_options_map {
>>+        const char *ovs_configuration;
>>+        const char *dpdk_option;
>>+        bool default_enabled;
>>+        const char *default_value;
>>+    } opts[] = {
>>+        {"dpdk-lcore-mask", "-c", false, NULL},
>>+        {"dpdk-mem-channels", "-n", false, NULL},
>>+        {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
>>+    };
>>+
>>+    int i, ret = initial_size;
>>+
>>+    /*First, construct from the flat-options (non-mutex)*/
>>+    for (i = 0; i < ARRAY_SIZE(opts); ++i) {
>>+        const char *lookup = smap_get(&ovs_cfg->other_config,
>>+                                      opts[i].ovs_configuration);
>>+        if (!lookup && opts[i].default_enabled) {
>>+            lookup = opts[i].default_value;
>>+        }
>> 
>>-    /* Reject --user option */
>>-    int i;
>>-    for (i = 0; i < argc; i++) {
>>-        if (!strcmp(argv[i], "--user")) {
>>-            VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
>>+        if (lookup) {
>>+            dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
>>+            ret += 2;
>>         }
>>     }
>> 
>>+    return ret;
>>+}
>>+
>>+#define MAX_DPDK_EXCL_OPTS 10
>>+
>>+static int
>>+construct_dpdk_mutex_options(const struct ovsrec_open_vswitch *ovs_cfg,
>>+                             char ***argv, const int initial_size)
>>+{
>>+    struct dpdk_exclusive_options_map {
>>+        const char *category;
>>+        const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
>>+        const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
>>+        const char *default_value;
>>+        int default_option;
>>+    } excl_opts[] = {
>>+        {"memory type",
>>+         {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
>>+         {"-m",             "--socket-mem",    NULL,},
>>+         "1024,0", 1
>>+        },
>>+    };
>>+
>>+    int i, ret = initial_size;
>>+    for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
>>+        int found_opts = 0, scan, found_pos = -1;
>>+        const char *found_value;
>>+        struct dpdk_exclusive_options_map *popt = &excl_opts[i];
>>+
>>+        for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
>>+                 && popt->ovs_dpdk_options[scan]; ++scan) {
>>+            const char *lookup = smap_get(&ovs_cfg->other_config,
>>+                                          popt->ovs_dpdk_options[scan]);
>>+            if (lookup && strlen(lookup)) {
>>+                found_opts++;
>>+                found_pos = scan;
>>+                found_value = lookup;
>>+            }
>>+        }
>>+
>>+        if (!found_opts) {
>>+            if (popt->default_option) {
>>+                found_pos = popt->default_option;
>>+                found_value = popt->default_value;
>>+            } else {
>>+                continue;
>>+            }
>>+        }
>>+
>>+        if (found_opts > 1) {
>>+            VLOG_ERR("Multiple defined options for %s. Please check your"
>>+                     " database settings and reconfigure if necessary.",
>>+                     popt->category);
>>+        }
>>+
>>+        dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
>>+                           found_value);
>>+        ret += 2;
>>+    }
>>+
>>+    return ret;
>>+}
>>+
>>+static int
>>+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
>>+{
>>+    int i = construct_dpdk_options(ovs_cfg, argv, 1);
>>+    i = construct_dpdk_mutex_options(ovs_cfg, argv, i);
>>+    return i;
>>+}
>>+
>>+static char **dpdk_argv;
>>+static int dpdk_argc;
>>+
>>+static void
>>+deferred_argv_release(void)
>>+{
>>+    int result;
>>+    for (result = 0; result < dpdk_argc; ++result) {
>>+        free(dpdk_argv[result]);
>>+    }
>>+
>>+    free(dpdk_argv);
>>+}
>>+
>>+static void
>>+dpdk_init__(const struct ovsrec_open_vswitch *ovs_cfg)
>>+{
>>+    char **argv = NULL;
>>+    int result;
>>+    int argc;
>>+    int err;
>>+    cpu_set_t cpuset;
>>+
>>+    if (!smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>>+        VLOG_INFO("DPDK Disabled - to change this requires a restart.\n");
>>+        return;
>>+    }
>>+
>>+    VLOG_INFO("DPDK Enabled, initializing");
>>+
>> #ifdef VHOST_CUSE
>>-    if (process_vhost_flags("-cuse_dev_name", xstrdup("vhost-net"),
>>-                            PATH_MAX, argv, &cuse_dev_name)) {
>>+    if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"),
>>+                            PATH_MAX, ovs_cfg, &cuse_dev_name)) {
>> #else
>>-    if (process_vhost_flags("-vhost_sock_dir", xstrdup(ovs_rundir()),
>>-                            NAME_MAX, argv, &vhost_sock_dir)) {
>>+    if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()),
>>+                            NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
>>         struct stat s;
>>-        int err;
>> 
>>         err = stat(vhost_sock_dir, &s);
>>         if (err) {
>>-            VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
>>-                     vhost_sock_dir);
>>-            return err;
>>+            VLOG_ERR("vhost-user sock directory '%s' does not exist.",
>>+                      vhost_sock_dir);
>>         }
>> #endif
>>-        /* Remove the vhost flag configuration parameters from the argument
>>-         * list, so that the correct elements are passed to the DPDK
>>-         * initialization function
>>-         */
>>-        argc -= 2;
>>-        argv += 2;    /* Increment by two to bypass the vhost flag arguments */
>>-        base = 2;
>>     }
>> 
>>     /* Get the main thread affinity */
>>     CPU_ZERO(&cpuset);
>>-    err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>>+    err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
>>+                                 &cpuset);
>>     if (err) {
>>         VLOG_ERR("Thread getaffinity error %d.", err);
>>-        return err;
>>     }
>> 
>>-    /* Keep the program name argument as this is needed for call to
>>-     * rte_eal_init()
>>-     */
>>-    argv[0] = pragram_name;
>>+    argv = grow_argv(&argv, 0, 1);
>>+    argv[0] = xstrdup(ovs_get_program_name());
>>+    argc = get_dpdk_args(ovs_cfg, &argv);
>>+
>>+    argv = grow_argv(&argv, argc, 1);
>>+    argv[argc] = 0;
>
> sparse complains about this: we should use NULL instead of 0

sorry, I'll fix it.

>>+
>>+    optind = 1;
>> 
>>     /* Make sure things are initialized ... */
>>     result = rte_eal_init(argc, argv);
>>@@ -2827,23 +2945,51 @@ dpdk_init(int argc, char **argv)
>>     }
>> 
>>     /* Set the main thread affinity back to pre rte_eal_init() value */
>>-    err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>>-    if (err) {
>>-        VLOG_ERR("Thread setaffinity error %d", err);
>>-        return err;
>>+    if (!err) {
>>+        err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
>>+                                     &cpuset);
>>+        if (err) {
>>+            VLOG_ERR("Thread setaffinity error %d", err);
>>+        }
>>     }
>> 
>>+    dpdk_argv = argv;
>>+    dpdk_argc = argc;
>>+
>>+    atexit(deferred_argv_release);
>>+
>>     rte_memzone_dump(stdout);
>>     rte_eal_init_ret = 0;
>> 
>>-    if (argc > result) {
>>-        argv[result] = argv[0];
>>-    }
>>-
>>     /* We are called from the main thread here */
>>     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
>> 
>>-    return result + 1 + base;
>>+    ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL);
>>+
>>+#ifdef VHOST_CUSE
>>+    /* Register CUSE device to handle IOCTLs.
>>+     * Unless otherwise specified, cuse_dev_name is set to vhost-net.
>>+     */
>>+    err = rte_vhost_driver_register(cuse_dev_name);
>>+
>>+    if (err != 0) {
>>+        VLOG_ERR("CUSE device setup failure.");
>>+        return;
>>+    }
>>+#endif
>>+
>>+    dpdk_vhost_class_init();
>>+}
>>+
>>+void
>>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>>+{
>>+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>+
>>+    if (ovs_cfg && ovsthread_once_start(&once)) {
>>+        dpdk_init__(ovs_cfg);
>>+        ovsthread_once_done(&once);
>>+    }
>> }
>> 
>> static const struct netdev_class dpdk_class =
>>@@ -2907,10 +3053,6 @@ netdev_dpdk_register(void)
>> {
>>     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>> 
>>-    if (rte_eal_init_ret) {
>>-        return;
>>-    }
>>-
>>     if (ovsthread_once_start(&once)) {
>>         dpdk_common_init();
>>         netdev_register_provider(&dpdk_class);
>>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>>index 646d3e2..143a609 100644
>>--- a/lib/netdev-dpdk.h
>>+++ b/lib/netdev-dpdk.h
>>@@ -4,6 +4,7 @@
>> #include <config.h>
>> 
>> struct dp_packet;
>>+struct ovsrec_open_vswitch;
>> 
>> #ifdef DPDK_NETDEV
>> 
>>@@ -22,7 +23,6 @@ struct dp_packet;
>> 
>> #define NON_PMD_CORE_ID LCORE_ID_ANY
>> 
>>-int dpdk_init(int argc, char **argv);
>> void netdev_dpdk_register(void);
>> void free_dpdk_buf(struct dp_packet *);
>> int pmd_thread_setaffinity_cpu(unsigned cpu);
>>@@ -33,15 +33,6 @@ int pmd_thread_setaffinity_cpu(unsigned cpu);
>> 
>> #include "util.h"
>> 
>>-static inline int
>>-dpdk_init(int argc, char **argv)
>>-{
>>-    if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {
>>-        ovs_fatal(0, "DPDK support not built into this copy of Open vSwitch.");
>>-    }
>>-    return 0;
>>-}
>>-
>> static inline void
>> netdev_dpdk_register(void)
>> {
>>@@ -61,4 +52,7 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED)
>> }
>> 
>> #endif /* DPDK_NETDEV */
>>+
>>+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);
>>+
>> #endif
>>diff --git a/lib/netdev-nodpdk.c b/lib/netdev-nodpdk.c
>>new file mode 100644
>>index 0000000..28e637a
>>--- /dev/null
>>+++ b/lib/netdev-nodpdk.c
>>@@ -0,0 +1,21 @@
>>+#include <config.h>
>>+#include "netdev-dpdk.h"
>>+#include "smap.h"
>>+#include "ovs-thread.h"
>>+#include "openvswitch/vlog.h"
>>+#include "vswitch-idl.h"
>>+
>>+VLOG_DEFINE_THIS_MODULE(dpdk);
>>+
>>+void
>>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
>
> This is used now

Okay, I'll fix.

>>+{
>>+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>+
>>+    if (ovs_cfg && ovsthread_once_start(&once)) {
>>+        if (smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>>+            VLOG_ERR("DPDK not supported in this copy of Open vSwitch.");
>>+        }
>>+        ovsthread_once_done(&once);
>>+    }
>>+}



More information about the dev mailing list