[ovs-dev] [PATCH 5/6] dpif-netdev: Allow configuring number of PMD threads.
Daniele Di Proietto
diproiettod at vmware.com
Thu Mar 12 18:04:36 UTC 2015
Dealing with CPU masks can be confusing and unnecessary for simple
configurations. This commit introduces the 'other_config:n-pmd-cores'
key to specify the desired number of CPU cores reserved to the PMD
threads. The 'other_config:pmd-cpu-mask' (if specified) overrides this
parameter.
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
lib/dpif-netdev.c | 122 +++++++++++++++++++++++++++++++--------------
lib/dpif-provider.h | 7 ++-
lib/dpif.c | 6 ++-
lib/dpif.h | 2 +-
lib/ovs-numa.c | 15 ++----
lib/ovs-numa.h | 8 +--
ofproto/ofproto-dpif.c | 4 +-
ofproto/ofproto-provider.h | 2 +
ofproto/ofproto.c | 7 +++
ofproto/ofproto.h | 1 +
vswitchd/bridge.c | 2 +
vswitchd/vswitch.xml | 31 +++++++++++-
12 files changed, 146 insertions(+), 61 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1657621..3506432 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -209,10 +209,15 @@ struct dp_netdev {
* 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */
ovsthread_key_t per_pmd_key;
- /* Number of rx queues for each dpdk interface and the cpu mask
- * for pin of pmd threads. */
+ /* Number of rx queues for each dpdk interface */
size_t n_dpdk_rxqs;
+ /* Maximum number of PMD threads. Ignored if 'pmd_cmask' != NULL */
+ int n_pmd_threads;
+ /* CPU cores used for PMD threads. There will be one thread for each core
+ * set in the CPU mask. */
char *pmd_cmask;
+ /* Non pmd threads will be restricted to use the CPU set specified by this
+ * mask */
char *nonpmd_cmask;
uint64_t last_tnl_conf_seq;
};
@@ -436,12 +441,12 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
static struct dp_netdev_pmd_thread *
dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
-static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
-static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
+static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id, int);
static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
+static int get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id);
static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
@@ -623,10 +628,11 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
ovs_mutex_init_recursive(&dp->non_pmd_mutex);
ovsthread_key_create(&dp->per_pmd_key, NULL);
- /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */
- ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID);
+ /* There can never be a pmd thread of NON_PMD_CORE_ID. */
+ ovs_numa_core_disable_pmd(NON_PMD_CORE_ID);
dp_netdev_set_nonpmd(dp);
dp->n_dpdk_rxqs = NR_QUEUE;
+ dp->n_pmd_threads = NR_PMD_THREADS;
ovs_mutex_lock(&dp->port_mutex);
error = do_add_port(dp, name, "internal", ODPP_LOCAL);
@@ -901,9 +907,20 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
if (netdev_is_pmd(netdev)) {
- dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
- dp_netdev_reload_pmds(dp);
- dp_netdev_set_nonpmd_affinity();
+ int dev_numa_id = netdev_get_numa_id(netdev);
+
+ if (!get_n_pmd_threads_on_numa(dp, dev_numa_id)) {
+ /* There weren't pmd threads on numa domain 'dev_numa_id'.
+ * Reset all the pmd threads to distribute the pmd threads
+ * among numa domains */
+ dp_netdev_destroy_all_pmds(dp);
+ dp_netdev_reset_pmd_threads(dp);
+ } else {
+ /* There are already pmd threads on numa domain 'dev_numa_id'.
+ * Redistribute the queues */
+ dp_netdev_reload_pmds(dp);
+ dp_netdev_set_nonpmd_affinity();
+ }
}
seq_change(dp->port_seq);
@@ -1090,7 +1107,8 @@ do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
/* If there is no netdev on the numa node, deletes the pmd threads
* for that numa. Else, just reloads the queues. */
if (!has_pmd_port_for_numa(dp, numa_id)) {
- dp_netdev_del_pmds_on_numa(dp, numa_id);
+ dp_netdev_destroy_all_pmds(dp);
+ dp_netdev_reset_pmd_threads(dp);
}
dp_netdev_reload_pmds(dp);
}
@@ -2135,17 +2153,25 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
* is changed. */
static bool
pmd_config_changed(const struct dp_netdev *dp, size_t rxqs,
- const char *cmask_pmd)
+ const char *cmask_pmd, int n_pmd_cores)
{
if (dp->n_dpdk_rxqs != rxqs) {
return true;
- } else {
- if (dp->pmd_cmask != NULL && cmask_pmd != NULL) {
- return strcmp(dp->pmd_cmask, cmask_pmd);
- } else {
- return (dp->pmd_cmask != NULL || cmask_pmd != NULL);
- }
}
+
+ if (dp->pmd_cmask != NULL && cmask_pmd != NULL) {
+ return strcmp(dp->pmd_cmask, cmask_pmd);
+ }
+
+ if (dp->pmd_cmask != NULL || cmask_pmd != NULL) {
+ return true;
+ }
+
+ if (dp->pmd_cmask == NULL) {
+ return dp->n_pmd_threads != n_pmd_cores;
+ }
+
+ return false;
}
/* Returns true if the configuration for nonpmd cpu mask is changed */
@@ -2162,11 +2188,12 @@ nonpmd_config_changed(const struct dp_netdev *dp, const char *cmask_nonpmd)
/* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
static int
dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs,
- const char *cmask_pmd, const char *cmask_nonpmd)
+ int n_pmd_cores, const char *cmask_pmd,
+ const char *cmask_nonpmd)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
- if (pmd_config_changed(dp, n_rxqs, cmask_pmd)) {
+ if (pmd_config_changed(dp, n_rxqs, cmask_pmd, n_pmd_cores)) {
struct dp_netdev_port *port;
dp_netdev_destroy_all_pmds(dp);
@@ -2200,6 +2227,7 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs,
}
}
dp->n_dpdk_rxqs = n_rxqs;
+ dp->n_pmd_threads = n_pmd_cores;
/* Reconfigures the cpu mask. */
ovs_numa_set_cpu_mask_pmd(cmask_pmd);
@@ -2208,6 +2236,7 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs,
free(dp->nonpmd_cmask);
dp->pmd_cmask = cmask_pmd ? xstrdup(cmask_pmd) : NULL;
dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL;
+ ovs_numa_core_disable_pmd(NON_PMD_CORE_ID);
/* Restores the non-pmd. */
dp_netdev_set_nonpmd(dp);
@@ -2630,23 +2659,10 @@ dp_netdev_destroy_all_pmds(struct dp_netdev *dp)
}
}
-/* Deletes all pmd threads on numa node 'numa_id'. */
-static void
-dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
-{
- struct dp_netdev_pmd_thread *pmd;
-
- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
- if (pmd->numa_id == numa_id) {
- dp_netdev_del_pmd(pmd);
- }
- }
-}
-
/* Checks the numa node id of 'netdev' and starts pmd threads for
* the numa node. */
static void
-dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
+dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id, int nr_threads)
{
int n_pmds;
@@ -2672,8 +2688,9 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
}
/* If cpu mask is specified, uses all unpinned cores, otherwise
- * tries creating NR_PMD_THREADS pmd threads. */
- can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
+ * tries creating 'nr_threads' pmd threads. */
+ can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, nr_threads);
+
for (i = 0; i < can_have; i++) {
struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
@@ -2694,15 +2711,46 @@ static void
dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
{
struct dp_netdev_port *port;
+ int max_numa = ovs_numa_get_n_numas();
+ unsigned long *numabitmap;
+ int numa_id, nr_numa;
+
+ if (max_numa < 1) {
+ max_numa = 1;
+ }
+
+ numabitmap = bitmap_allocate(max_numa);
CMAP_FOR_EACH (port, node, &dp->ports) {
if (netdev_is_pmd(port->netdev)) {
- int numa_id = netdev_get_numa_id(port->netdev);
+ numa_id = netdev_get_numa_id(port->netdev);
- dp_netdev_set_pmds_on_numa(dp, numa_id);
+ bitmap_set1(numabitmap, numa_id);
+ }
+ }
+
+ nr_numa = bitmap_count1(numabitmap, max_numa);
+ if (nr_numa) {
+ int n_threads_per_numa, n_threads_remainder;
+
+ if (dp->n_pmd_threads == 0) {
+ /* Default: just create one pmd threads per numa node */
+ n_threads_per_numa = 1;
+ n_threads_remainder = 0;
+ } else {
+ n_threads_per_numa = dp->n_pmd_threads / nr_numa;
+ n_threads_remainder = dp->n_pmd_threads % nr_numa;
+ }
+
+ BITMAP_FOR_EACH_1(numa_id, max_numa, numabitmap) {
+ dp_netdev_set_pmds_on_numa(dp, numa_id, n_threads_per_numa
+ + n_threads_remainder);
+ n_threads_remainder = 0;
}
}
dp_netdev_set_nonpmd_affinity();
+
+ bitmap_free(numabitmap);
}
static char *
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 3612766..d33d7ed 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -310,9 +310,12 @@ struct dpif_class {
* configuration. 'n_rxqs' configures the number of rx_queues, which
* are distributed among threads. 'cmask' configures the cpu mask
* for setting the polling threads' cpu affinity. 'cmask_nonpmd'
- * configures the cpumask of the remaining OVS threads */
+ * configures the cpumask of the remaining OVS threads.
+ * If 'cmask' is NULL, 'n_pmd_cores' cores will be used on each
+ * numa domain, otherwise 'n_pmd_cores' will be ignored */
int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs,
- const char *cmask, const char *cmask_nonpmd);
+ int n_pmd_cores, const char *cmask,
+ const char *cmask_nonpmd);
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
* priority value used for setting packet priority. */
diff --git a/lib/dpif.c b/lib/dpif.c
index 32bc005..cbb7399 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1383,12 +1383,14 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
* configuration. */
int
dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs,
- const char *cmask, const char *cmask_nonpmd)
+ int n_pmd_cores, const char *cmask,
+ const char *cmask_nonpmd)
{
int error = 0;
if (dpif->dpif_class->poll_threads_set) {
- error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask,
+ error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs,
+ n_pmd_cores, cmask,
cmask_nonpmd);
if (error) {
log_operation(dpif, "poll_threads_set", error);
diff --git a/lib/dpif.h b/lib/dpif.h
index 68774bf..4ee1a69 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -819,7 +819,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);
int dpif_recv_set(struct dpif *, bool enable);
int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
-int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs,
+int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, int n_pmd_cores,
const char *cmask, const char *cmask_nonpmd);
int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
struct ofpbuf *);
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 3b432f1..8c6a6e1 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -287,22 +287,15 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
return OVS_CORE_UNSPEC;
}
-/* Given 'core_id', tries to pin that core. Returns true, if succeeds.
- * False, if the core has already been pinned, or if it is invalid or
- * not available. */
-bool
-ovs_numa_try_pin_core_specific(int core_id)
+/* Removes the core 'core_id' from the pmd cpu mask */
+void
+ovs_numa_core_disable_pmd(int core_id)
{
struct cpu_core *core = get_core_by_core_id(core_id);
if (core) {
- if (core->available_pmd && !core->pinned) {
- core->pinned = true;
- return true;
- }
+ core->available_pmd = false;
}
-
- return false;
}
/* Searches through all cores for an unpinned and available core. Returns
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 04f598a..ea7a759 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -52,7 +52,7 @@ int ovs_numa_get_n_cores(void);
int ovs_numa_get_numa_id(int core_id);
int ovs_numa_get_n_cores_on_numa(int numa_id);
int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id);
-bool ovs_numa_try_pin_core_specific(int core_id);
+void ovs_numa_core_disable_pmd(int core_id);
int ovs_numa_get_unpinned_core_any(void);
int ovs_numa_get_unpinned_core_on_numa(int numa_id);
void ovs_numa_unpin_core(int core_id);
@@ -131,10 +131,10 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id OVS_UNUSED)
return OVS_CORE_UNSPEC;
}
-static inline bool
-ovs_numa_try_pin_core_specific(int core_id OVS_UNUSED)
+static inline void
+ovs_numa_core_disable_pmd(int core_id OVS_UNUSED)
{
- return false;
+ /* Nothing */
}
static inline int
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 0a90f9e..5c86dbb 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -569,8 +569,8 @@ type_run(const char *type)
udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
- dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask,
- nonpmd_cpu_mask);
+ dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, n_pmd_cores,
+ pmd_cpu_mask, nonpmd_cpu_mask);
if (backer->need_revalidate) {
struct ofproto_dpif *ofproto;
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 856491d..c953576 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -457,6 +457,8 @@ extern size_t n_handlers, n_revalidators;
/* Number of rx queues to be created for each dpdk interface. */
extern size_t n_dpdk_rxqs;
+/* Maximum number of pmd threads. Ignored if 'pmd_cpu_mask' != NULL */
+extern int n_pmd_cores;
/* Cpu mask for pmd threads. */
extern char *pmd_cpu_mask;
extern char *nonpmd_cpu_mask;
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index ecb7bb8..92e4038 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -305,6 +305,7 @@ unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT;
size_t n_handlers, n_revalidators;
size_t n_dpdk_rxqs;
+int n_pmd_cores;
char *pmd_cpu_mask;
char *nonpmd_cpu_mask;
@@ -742,6 +743,12 @@ ofproto_set_n_dpdk_rxqs(int n_rxqs)
}
void
+ofproto_set_n_pmd_cores(int n_cores)
+{
+ n_pmd_cores = MAX(n_cores, 0);
+}
+
+void
ofproto_set_pmd_cpu_mask(const char *cmask)
{
free(pmd_cpu_mask);
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index bc7359b..3dada59 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -317,6 +317,7 @@ int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux,
const struct ofproto_mcast_snooping_port_settings *s);
void ofproto_set_threads(int n_handlers, int n_revalidators);
void ofproto_set_n_dpdk_rxqs(int n_rxqs);
+void ofproto_set_n_pmd_cores(int n_cores);
void ofproto_set_pmd_cpu_mask(const char *cmask);
void ofproto_set_nonpmd_cpu_mask(const char *cmask);
void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 1bbd6af..571a036 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -565,6 +565,8 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
OFPROTO_MAX_IDLE_DEFAULT));
ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config,
"n-dpdk-rxqs", 0));
+ ofproto_set_n_pmd_cores(smap_get_int(&ovs_cfg->other_config,
+ "n-pmd-cores", 0));
ofproto_set_pmd_cpu_mask(smap_get(&ovs_cfg->other_config,
"pmd-cpu-mask"));
ofproto_set_nonpmd_cpu_mask(smap_get(&ovs_cfg->other_config,
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 5d14487..c36d252 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -161,6 +161,24 @@
</p>
</column>
+ <column name="other_config" key="n-cores"
+ type='{"type": "integer", "minInteger": 0}'>
+ <p>
+ Specifies the maximum number of cores that the userspace datapath is
+ allowed to use to process packets (i.e. the maximum number of PMD
+ threads that will be created by OVS). The special value 0 means
+ that OVS will use a single core per NUMA domain.
+ </p>
+ <p>
+ If ``other_config:pmd-cpu-mask'' is specified, this value will be
+ ignored and the CPU mask will be honored.
+ </p>
+ <p>
+ The default is 0. It means that OVS will use only one core per
+ NUMA domain to process packets in the userspace datapath.
+ </p>
+ </column>
+
<column name="other_config" key="pmd-cpu-mask">
<p>
Specifies CPU mask for setting the cpu affinity of PMD (Poll
@@ -175,8 +193,13 @@
those uncovered cores are considered not set.
</p>
<p>
- If not specified, one pmd thread will be created for each numa node
- and pinned to any available core on the numa node by default.
+ Please note that core 0 (NON_PMD_CORE_ID in the code) is reserved
+ and will never be used for a PMD thread. If set in the mask, it will
+ be ignored.
+ </p>
+ <p>
+ If not specified, the ``other_config:n-cores'' key will be honored.
+ If it is specified, ``other_config:n-cores'' will be ignored.
</p>
</column>
@@ -195,6 +218,10 @@
the cores used for PMD threads operations.
</p>
<p>
+ Please note that core 0 (NON_PMD_CORE_ID in the code) is always used
+ for non PMD threads, even if unset in this mask.
+ </p>
+ <p>
If not specified, the non PMD threads will be bound to every core
not used for pmd operations.
</p>
--
2.1.4
More information about the dev
mailing list