[ovs-dev] [PATCH v12 2/6] netdev-dpdk: Convert initialization from cmdline to db
Aaron Conole
aconole at redhat.com
Fri Apr 29 13:36:36 UTC 2016
Daniele Di Proietto <diproiettod at vmware.com> writes:
> Hi Aaron,
>
> thanks (again!) for this patch. Few comments:
>
> * As I mentioned on my previous round of review, I don't think it's necessary to pass the whole Open_vSwitch table to dpdk_init(). I.e., instead of doing
>
> dpdk_init(&cfg);
>
> I'd prefer
>
> if (cfg) {
> dpdk_init(&cfg->other_config);
> }
>
> This way we don't have to include "vswitch-idl.h".
>
>
> * I suggested 'dpdk-mem-channels', because I think people who want to use that will be comfortable passing '-n' in dpdk-extra. What do you think? Is there any reason why you think it's worth keeping?
D'oh! I had done both of these changes, but they were dropped during the
rebase. I'll cook a fix asap.
> * Sorry for not noticing this before: it seems that netdev_dpdk_register() now always registers the netdev classes, even though they cannot be created. The registered classes end up in the database in the iface_type column of the Open_vSwitch table, so controllers might think that they're available. I think we should register the classes only when DPDK is initialized.
I had an issue doing this, back when the I had the lazy
initialization. I don't remember the details, though. I'll try it again,
and see what happens.
> Two minor nits inline,
>
> Thanks
Thanks so much for the review, Daniele!
-Aaron
> On 26/04/2016 12:42, "Aaron Conole" <aconole at redhat.com> wrote:
>
>>Existing DPDK integration is provided by use of command line options which
>>must be split out and passed to librte in a special manner. However, this
>>forces any configuration to be passed by way of a special DPDK flag, and
>>interferes with ovs+dpdk packaging solutions.
>>
>>This commit delays dpdk initialization until after the OVS database
>>connection is established, at which point ovs initializes librte. It
>>pulls all of the config data from the OVS database, and assembles a
>>new argv/argc pair to be passed along.
>>
>>Signed-off-by: Aaron Conole <aconole at redhat.com>
>>Acked-by: Kevin Traynor <kevin.traynor at intel.com>
>>---
>>Previous: http://openvswitch.org/pipermail/dev/2016-April/069028.html
>>
>>v12:
>>* Squashed NEWS change into this commit
>>
>> FAQ.md | 6 +-
>> INSTALL.DPDK.md | 82 +++++++++---
>> NEWS | 5 +
>> lib/automake.mk | 4 +
>> lib/netdev-dpdk.c | 304 +++++++++++++++++++++++++++++++++------------
>> lib/netdev-dpdk.h | 14 +--
>> lib/netdev-nodpdk.c | 21 ++++
>> tests/ofproto-macros.at | 3 +-
>> utilities/ovs-dev.py | 13 +-
>> vswitchd/bridge.c | 3 +
>> vswitchd/ovs-vswitchd.8.in | 6 +-
>> vswitchd/ovs-vswitchd.c | 27 +---
>> vswitchd/vswitch.xml | 120 ++++++++++++++++++
>> 13 files changed, 463 insertions(+), 145 deletions(-)
>> create mode 100644 lib/netdev-nodpdk.c
>>
>
> [...]
>
>>
>>@@ -2732,22 +2734,20 @@ static const struct dpdk_qos_ops egress_policer_ops = {
>>
>> static int
>> process_vhost_flags(char *flag, char *default_val, int size,
>>- char **argv, char **new_val)
>>+ const struct ovsrec_open_vswitch *ovs_cfg,
>>+ char **new_val)
>> {
>>+ const char *val;
>> int changed = 0;
>>
>>+ val = smap_get(&ovs_cfg->other_config, flag);
>>+
>> /* Depending on which version of vhost is in use, process the vhost-specific
>>- * flag if it is provided on the vswitchd command line, otherwise resort to
>>- * a default value.
>>- *
>>- * For vhost-user: Process "-vhost_sock_dir" to set the custom location of
>>- * the vhost-user socket(s).
>>- * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the
>>- * vhost-cuse character device.
>>+ * flag if it is provided, otherwise resort to default value.
>> */
>>- if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {
>>+ if (val && (strlen(val) <= size)) {
>> changed = 1;
>>- *new_val = xstrdup(argv[2]);
>>+ *new_val = xstrdup(val);
>> VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
>> } else {
>> VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
>>@@ -2757,68 +2757,186 @@ process_vhost_flags(char *flag, char *default_val, int size,
>> return changed;
>> }
>>
>>-int
>>-dpdk_init(int argc, char **argv)
>>+static char **
>>+grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
>> {
>>- int result;
>>- int base = 0;
>>- char *pragram_name = argv[0];
>>- int err;
>>- int isset;
>>- cpu_set_t cpuset;
>>+ return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
>>+}
>>
>>- if (argc < 2 || strcmp(argv[1], "--dpdk"))
>>- return 0;
>>+static void
>>+dpdk_option_extend(char ***argv, int argc, const char *option,
>>+ const char *value)
>>+{
>>+ char **newargv = grow_argv(argv, argc, 2);
>>+ *argv = newargv;
>>+ newargv[argc] = xstrdup(option);
>>+ newargv[argc+1] = xstrdup(value);
>>+}
>>
>>- /* Remove the --dpdk argument from arg list.*/
>>- argc--;
>>- argv++;
>>+static int
>>+construct_dpdk_options(const struct ovsrec_open_vswitch *ovs_cfg,
>>+ char ***argv, const int initial_size)
>>+{
>>+ struct dpdk_options_map {
>>+ const char *ovs_configuration;
>>+ const char *dpdk_option;
>>+ bool default_enabled;
>>+ const char *default_value;
>>+ } opts[] = {
>>+ {"dpdk-lcore-mask", "-c", false, NULL},
>>+ {"dpdk-mem-channels", "-n", false, NULL},
>>+ {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
>>+ };
>>+
>>+ int i, ret = initial_size;
>>+
>>+ /*First, construct from the flat-options (non-mutex)*/
>>+ for (i = 0; i < ARRAY_SIZE(opts); ++i) {
>>+ const char *lookup = smap_get(&ovs_cfg->other_config,
>>+ opts[i].ovs_configuration);
>>+ if (!lookup && opts[i].default_enabled) {
>>+ lookup = opts[i].default_value;
>>+ }
>>
>>- /* Reject --user option */
>>- int i;
>>- for (i = 0; i < argc; i++) {
>>- if (!strcmp(argv[i], "--user")) {
>>- VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
>>+ if (lookup) {
>>+ dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
>>+ ret += 2;
>> }
>> }
>>
>>+ return ret;
>>+}
>>+
>>+#define MAX_DPDK_EXCL_OPTS 10
>>+
>>+static int
>>+construct_dpdk_mutex_options(const struct ovsrec_open_vswitch *ovs_cfg,
>>+ char ***argv, const int initial_size)
>>+{
>>+ struct dpdk_exclusive_options_map {
>>+ const char *category;
>>+ const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
>>+ const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
>>+ const char *default_value;
>>+ int default_option;
>>+ } excl_opts[] = {
>>+ {"memory type",
>>+ {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
>>+ {"-m", "--socket-mem", NULL,},
>>+ "1024,0", 1
>>+ },
>>+ };
>>+
>>+ int i, ret = initial_size;
>>+ for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
>>+ int found_opts = 0, scan, found_pos = -1;
>>+ const char *found_value;
>>+ struct dpdk_exclusive_options_map *popt = &excl_opts[i];
>>+
>>+ for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
>>+ && popt->ovs_dpdk_options[scan]; ++scan) {
>>+ const char *lookup = smap_get(&ovs_cfg->other_config,
>>+ popt->ovs_dpdk_options[scan]);
>>+ if (lookup && strlen(lookup)) {
>>+ found_opts++;
>>+ found_pos = scan;
>>+ found_value = lookup;
>>+ }
>>+ }
>>+
>>+ if (!found_opts) {
>>+ if (popt->default_option) {
>>+ found_pos = popt->default_option;
>>+ found_value = popt->default_value;
>>+ } else {
>>+ continue;
>>+ }
>>+ }
>>+
>>+ if (found_opts > 1) {
>>+ VLOG_ERR("Multiple defined options for %s. Please check your"
>>+ " database settings and reconfigure if necessary.",
>>+ popt->category);
>>+ }
>>+
>>+ dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
>>+ found_value);
>>+ ret += 2;
>>+ }
>>+
>>+ return ret;
>>+}
>>+
>>+static int
>>+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
>>+{
>>+ int i = construct_dpdk_options(ovs_cfg, argv, 1);
>>+ i = construct_dpdk_mutex_options(ovs_cfg, argv, i);
>>+ return i;
>>+}
>>+
>>+static char **dpdk_argv;
>>+static int dpdk_argc;
>>+
>>+static void
>>+deferred_argv_release(void)
>>+{
>>+ int result;
>>+ for (result = 0; result < dpdk_argc; ++result) {
>>+ free(dpdk_argv[result]);
>>+ }
>>+
>>+ free(dpdk_argv);
>>+}
>>+
>>+static void
>>+dpdk_init__(const struct ovsrec_open_vswitch *ovs_cfg)
>>+{
>>+ char **argv = NULL;
>>+ int result;
>>+ int argc;
>>+ int err;
>>+ cpu_set_t cpuset;
>>+
>>+ if (!smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>>+ VLOG_INFO("DPDK Disabled - to change this requires a restart.\n");
>>+ return;
>>+ }
>>+
>>+ VLOG_INFO("DPDK Enabled, initializing");
>>+
>> #ifdef VHOST_CUSE
>>- if (process_vhost_flags("-cuse_dev_name", xstrdup("vhost-net"),
>>- PATH_MAX, argv, &cuse_dev_name)) {
>>+ if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"),
>>+ PATH_MAX, ovs_cfg, &cuse_dev_name)) {
>> #else
>>- if (process_vhost_flags("-vhost_sock_dir", xstrdup(ovs_rundir()),
>>- NAME_MAX, argv, &vhost_sock_dir)) {
>>+ if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()),
>>+ NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
>> struct stat s;
>>- int err;
>>
>> err = stat(vhost_sock_dir, &s);
>> if (err) {
>>- VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
>>- vhost_sock_dir);
>>- return err;
>>+ VLOG_ERR("vhost-user sock directory '%s' does not exist.",
>>+ vhost_sock_dir);
>> }
>> #endif
>>- /* Remove the vhost flag configuration parameters from the argument
>>- * list, so that the correct elements are passed to the DPDK
>>- * initialization function
>>- */
>>- argc -= 2;
>>- argv += 2; /* Increment by two to bypass the vhost flag arguments */
>>- base = 2;
>> }
>>
>> /* Get the main thread affinity */
>> CPU_ZERO(&cpuset);
>>- err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>>+ err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
>>+ &cpuset);
>> if (err) {
>> VLOG_ERR("Thread getaffinity error %d.", err);
>>- return err;
>> }
>>
>>- /* Keep the program name argument as this is needed for call to
>>- * rte_eal_init()
>>- */
>>- argv[0] = pragram_name;
>>+ argv = grow_argv(&argv, 0, 1);
>>+ argv[0] = xstrdup(ovs_get_program_name());
>>+ argc = get_dpdk_args(ovs_cfg, &argv);
>>+
>>+ argv = grow_argv(&argv, argc, 1);
>>+ argv[argc] = 0;
>
> sparse complains about this: we should use NULL instead of 0
sorry, I'll fix it.
>>+
>>+ optind = 1;
>>
>> /* Make sure things are initialized ... */
>> result = rte_eal_init(argc, argv);
>>@@ -2827,23 +2945,51 @@ dpdk_init(int argc, char **argv)
>> }
>>
>> /* Set the main thread affinity back to pre rte_eal_init() value */
>>- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>>- if (err) {
>>- VLOG_ERR("Thread setaffinity error %d", err);
>>- return err;
>>+ if (!err) {
>>+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
>>+ &cpuset);
>>+ if (err) {
>>+ VLOG_ERR("Thread setaffinity error %d", err);
>>+ }
>> }
>>
>>+ dpdk_argv = argv;
>>+ dpdk_argc = argc;
>>+
>>+ atexit(deferred_argv_release);
>>+
>> rte_memzone_dump(stdout);
>> rte_eal_init_ret = 0;
>>
>>- if (argc > result) {
>>- argv[result] = argv[0];
>>- }
>>-
>> /* We are called from the main thread here */
>> RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
>>
>>- return result + 1 + base;
>>+ ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL);
>>+
>>+#ifdef VHOST_CUSE
>>+ /* Register CUSE device to handle IOCTLs.
>>+ * Unless otherwise specified, cuse_dev_name is set to vhost-net.
>>+ */
>>+ err = rte_vhost_driver_register(cuse_dev_name);
>>+
>>+ if (err != 0) {
>>+ VLOG_ERR("CUSE device setup failure.");
>>+ return;
>>+ }
>>+#endif
>>+
>>+ dpdk_vhost_class_init();
>>+}
>>+
>>+void
>>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>>+{
>>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>+
>>+ if (ovs_cfg && ovsthread_once_start(&once)) {
>>+ dpdk_init__(ovs_cfg);
>>+ ovsthread_once_done(&once);
>>+ }
>> }
>>
>> static const struct netdev_class dpdk_class =
>>@@ -2907,10 +3053,6 @@ netdev_dpdk_register(void)
>> {
>> static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>
>>- if (rte_eal_init_ret) {
>>- return;
>>- }
>>-
>> if (ovsthread_once_start(&once)) {
>> dpdk_common_init();
>> netdev_register_provider(&dpdk_class);
>>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>>index 646d3e2..143a609 100644
>>--- a/lib/netdev-dpdk.h
>>+++ b/lib/netdev-dpdk.h
>>@@ -4,6 +4,7 @@
>> #include <config.h>
>>
>> struct dp_packet;
>>+struct ovsrec_open_vswitch;
>>
>> #ifdef DPDK_NETDEV
>>
>>@@ -22,7 +23,6 @@ struct dp_packet;
>>
>> #define NON_PMD_CORE_ID LCORE_ID_ANY
>>
>>-int dpdk_init(int argc, char **argv);
>> void netdev_dpdk_register(void);
>> void free_dpdk_buf(struct dp_packet *);
>> int pmd_thread_setaffinity_cpu(unsigned cpu);
>>@@ -33,15 +33,6 @@ int pmd_thread_setaffinity_cpu(unsigned cpu);
>>
>> #include "util.h"
>>
>>-static inline int
>>-dpdk_init(int argc, char **argv)
>>-{
>>- if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {
>>- ovs_fatal(0, "DPDK support not built into this copy of Open vSwitch.");
>>- }
>>- return 0;
>>-}
>>-
>> static inline void
>> netdev_dpdk_register(void)
>> {
>>@@ -61,4 +52,7 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED)
>> }
>>
>> #endif /* DPDK_NETDEV */
>>+
>>+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);
>>+
>> #endif
>>diff --git a/lib/netdev-nodpdk.c b/lib/netdev-nodpdk.c
>>new file mode 100644
>>index 0000000..28e637a
>>--- /dev/null
>>+++ b/lib/netdev-nodpdk.c
>>@@ -0,0 +1,21 @@
>>+#include <config.h>
>>+#include "netdev-dpdk.h"
>>+#include "smap.h"
>>+#include "ovs-thread.h"
>>+#include "openvswitch/vlog.h"
>>+#include "vswitch-idl.h"
>>+
>>+VLOG_DEFINE_THIS_MODULE(dpdk);
>>+
>>+void
>>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
>
> This is used now
Okay, I'll fix.
>>+{
>>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>+
>>+ if (ovs_cfg && ovsthread_once_start(&once)) {
>>+ if (smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>>+ VLOG_ERR("DPDK not supported in this copy of Open vSwitch.");
>>+ }
>>+ ovsthread_once_done(&once);
>>+ }
>>+}
More information about the dev
mailing list