[ovs-dev] [PATCH v2 5/7] dpif-netdev: Allow controlling non PMD threads' affinity.
Daniele Di Proietto
diproiettod at vmware.com
Thu Apr 2 16:01:00 UTC 2015
This commit introduces the 'other_config:nonpmd-cpu-mask' key to control
the CPU affinity of non PMD threads.
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
lib/dpif-netdev.c | 62 +++++++++++++++++++++++++++++-----
lib/dpif-provider.h | 7 ++--
lib/dpif.c | 5 +--
lib/dpif.h | 2 +-
lib/ovs-numa.c | 83 ++++++++++++++++++++++++++++++++++++----------
lib/ovs-numa.h | 21 ++++++++++--
ofproto/ofproto-dpif.c | 3 +-
ofproto/ofproto-provider.h | 1 +
ofproto/ofproto.c | 11 +++++-
ofproto/ofproto.h | 3 +-
vswitchd/bridge.c | 5 ++-
vswitchd/vswitch.xml | 20 +++++++++++
12 files changed, 185 insertions(+), 38 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3b781f5..e632614 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -213,6 +213,7 @@ struct dp_netdev {
* for pin of pmd threads. */
size_t n_dpdk_rxqs;
char *pmd_cmask;
+ char *nonpmd_cmask;
uint64_t last_tnl_conf_seq;
};
@@ -422,6 +423,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
struct dp_packet **, int cnt);
+static void *pmd_thread_main(void *);
static void dp_netdev_disable_upcall(struct dp_netdev *);
void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
@@ -708,6 +710,7 @@ dp_netdev_free(struct dp_netdev *dp)
dp_netdev_destroy_upcall_lock(dp);
free(dp->pmd_cmask);
+ free(dp->nonpmd_cmask);
free(CONST_CAST(char *, dp->name));
free(dp);
}
@@ -770,6 +773,25 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
}
static void
+dp_netdev_set_nonpmd_affinity(void)
+{
+ cpu_set_t nonpmdset;
+
+ if (ovs_numa_get_non_pmd_free_set(sizeof nonpmdset, &nonpmdset)) {
+ struct ovsthread *t;
+ CPU_SET(NON_PMD_CORE_ID, &nonpmdset);
+
+ ovs_mutex_lock(&ovsthread_list_mutex);
+ LIST_FOR_EACH(t, list_node, &ovsthread_list) {
+ if (t->start != pmd_thread_main) {
+ pthread_setaffinity_np(t->thread, sizeof nonpmdset, &nonpmdset);
+ }
+ }
+ ovs_mutex_unlock(&ovsthread_list_mutex);
+ }
+}
+
+static void
dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
{
int old_seq;
@@ -885,6 +907,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
if (netdev_is_pmd(netdev)) {
dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
dp_netdev_reload_pmds(dp);
+ dp_netdev_set_nonpmd_affinity();
}
seq_change(dp->port_seq);
@@ -2115,26 +2138,39 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
/* Returns true if the configuration for rx queues or cpu mask
* is changed. */
static bool
-pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask)
+pmd_config_changed(const struct dp_netdev *dp, size_t rxqs,
+ const char *cmask_pmd)
{
if (dp->n_dpdk_rxqs != rxqs) {
return true;
} else {
- if (dp->pmd_cmask != NULL && cmask != NULL) {
- return strcmp(dp->pmd_cmask, cmask);
+ if (dp->pmd_cmask != NULL && cmask_pmd != NULL) {
+ return strcmp(dp->pmd_cmask, cmask_pmd);
} else {
- return (dp->pmd_cmask != NULL || cmask != NULL);
+ return (dp->pmd_cmask != NULL || cmask_pmd != NULL);
}
}
}
+/* Returns true if the configuration for nonpmd cpu mask is changed */
+static bool
+nonpmd_config_changed(const struct dp_netdev *dp, const char *cmask_nonpmd)
+{
+ if (dp->nonpmd_cmask != NULL && cmask_nonpmd != NULL) {
+ return strcmp(dp->nonpmd_cmask, cmask_nonpmd);
+ } else {
+ return (dp->nonpmd_cmask != NULL || cmask_nonpmd != NULL);
+ }
+}
+
/* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
static int
-dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
+dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs,
+ const char *cmask_pmd, const char *cmask_nonpmd)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
- if (pmd_config_changed(dp, n_rxqs, cmask)) {
+ if (pmd_config_changed(dp, n_rxqs, cmask_pmd)) {
struct dp_netdev_port *port;
dp_netdev_destroy_all_pmds(dp);
@@ -2170,12 +2206,21 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
dp->n_dpdk_rxqs = n_rxqs;
/* Reconfigures the cpu mask. */
- ovs_numa_set_cpu_mask(cmask);
+ ovs_numa_set_cpu_mask_pmd(cmask_pmd);
+ ovs_numa_set_cpu_mask_nonpmd(cmask_nonpmd);
free(dp->pmd_cmask);
- dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL;
+ free(dp->nonpmd_cmask);
+ dp->pmd_cmask = cmask_pmd ? xstrdup(cmask_pmd) : NULL;
+ dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL;
/* Restores all pmd threads. */
dp_netdev_reset_pmd_threads(dp);
+ } else if (nonpmd_config_changed(dp, cmask_nonpmd)) {
+ free(dp->nonpmd_cmask);
+ dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL;
+ ovs_numa_set_cpu_mask_nonpmd(cmask_nonpmd);
+
+ dp_netdev_set_nonpmd_affinity();
}
return 0;
@@ -2652,6 +2697,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
dp_netdev_set_pmds_on_numa(dp, numa_id);
}
}
+ dp_netdev_set_nonpmd_affinity();
}
static char *
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 7b4878eb..e54817d 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -308,10 +308,11 @@ struct dpif_class {
/* If 'dpif' creates its own I/O polling threads, refreshes poll threads
* configuration. 'n_rxqs' configures the number of rx_queues, which
- * are distributed among threads. 'cmask' configures the cpu mask
- * for setting the polling threads' cpu affinity. */
+ * are distributed among threads. 'cmask_pmd' configures the cpu mask
+ * for setting the polling threads' cpu affinity. 'cmask_nonpmd'
+ * configures the cpumask of the remaining OVS threads */
int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs,
- const char *cmask);
+ const char *cmask_pmd, const char *cmask_nonpmd);
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
* priority value used for setting packet priority. */
diff --git a/lib/dpif.c b/lib/dpif.c
index ee71774..32bc005 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1383,12 +1383,13 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
* configuration. */
int
dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs,
- const char *cmask)
+ const char *cmask, const char *cmask_nonpmd)
{
int error = 0;
if (dpif->dpif_class->poll_threads_set) {
- error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask);
+ error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask,
+ cmask_nonpmd);
if (error) {
log_operation(dpif, "poll_threads_set", error);
}
diff --git a/lib/dpif.h b/lib/dpif.h
index 06c6525..68774bf 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -820,7 +820,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);
int dpif_recv_set(struct dpif *, bool enable);
int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs,
- const char *cmask);
+ const char *cmask, const char *cmask_nonpmd);
int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
struct ofpbuf *);
void dpif_recv_purge(struct dpif *);
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 3aa1036..3b432f1 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -71,8 +71,9 @@ struct cpu_core {
struct ovs_list list_node; /* In 'numa_node->cores' list. */
struct numa_node *numa; /* numa node containing the core. */
int core_id; /* Core id. */
- bool available; /* If the core can be pinned. */
+ bool available_pmd; /* If the core can be pinned. */
bool pinned; /* If a thread has been pinned to the core. */
+ bool available_nonpmd; /* If the core is available for nonpmd threads. */
};
/* Contains all 'struct numa_node's. */
@@ -126,7 +127,8 @@ discover_numa_and_core(void)
list_insert(&n->cores, &c->list_node);
c->core_id = core_id;
c->numa = n;
- c->available = true;
+ c->available_pmd = true;
+ c->available_nonpmd = true;
n_cpus++;
}
}
@@ -262,8 +264,9 @@ ovs_numa_get_n_cores_on_numa(int numa_id)
return OVS_CORE_UNSPEC;
}
-/* Returns the number of cpu cores that are available and unpinned
- * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
+/* Returns the number of cpu cores that are available to be pinned
+ * (and currently unpinned) on numa node.
+ * Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
int
ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
{
@@ -274,7 +277,7 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
int count = 0;
LIST_FOR_EACH(core, list_node, &numa->cores) {
- if (core->available && !core->pinned) {
+ if (core->available_pmd && !core->pinned) {
count++;
}
}
@@ -293,7 +296,7 @@ ovs_numa_try_pin_core_specific(int core_id)
struct cpu_core *core = get_core_by_core_id(core_id);
if (core) {
- if (core->available && !core->pinned) {
+ if (core->available_pmd && !core->pinned) {
core->pinned = true;
return true;
}
@@ -311,7 +314,7 @@ ovs_numa_get_unpinned_core_any(void)
struct cpu_core *core;
HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
- if (core->available && !core->pinned) {
+ if (core->available_pmd && !core->pinned) {
core->pinned = true;
return core->core_id;
}
@@ -332,7 +335,7 @@ ovs_numa_get_unpinned_core_on_numa(int numa_id)
struct cpu_core *core;
LIST_FOR_EACH(core, list_node, &numa->cores) {
- if (core->available && !core->pinned) {
+ if (core->available_pmd && !core->pinned) {
core->pinned = true;
return core->core_id;
}
@@ -390,11 +393,8 @@ ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
free(dump);
}
-/* Reads the cpu mask configuration from 'cmask' and sets the
- * 'available' of corresponding cores. For unspecified cores,
- * sets 'available' to false. */
-void
-ovs_numa_set_cpu_mask(const char *cmask)
+static void
+ovs_numa_parse_cpu_mask(const char *cmask, void (*cb)(struct cpu_core *, bool))
{
int core_id = 0;
int i;
@@ -403,12 +403,12 @@ ovs_numa_set_cpu_mask(const char *cmask)
return;
}
- /* If no mask specified, resets the 'available' to true for all cores. */
+ /* If no mask specified, defaults to all cores available*/
if (!cmask) {
struct cpu_core *core;
HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
- core->available = true;
+ cb(core, true);
}
return;
@@ -433,7 +433,7 @@ ovs_numa_set_cpu_mask(const char *cmask)
core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
hash_int(core_id++, 0)),
struct cpu_core, hmap_node);
- core->available = (bin >> j) & 0x1;
+ cb(core, (bin >> j) & 0x1);
if (core_id >= hmap_count(&all_cpu_cores)) {
return;
@@ -448,8 +448,57 @@ ovs_numa_set_cpu_mask(const char *cmask)
core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
hash_int(core_id++, 0)),
struct cpu_core, hmap_node);
- core->available = false;
+ cb(core, false);
+ }
+}
+
+static void
+available_pmd_cb(struct cpu_core *core, bool available)
+{
+ core->available_pmd = available;
+}
+
+/* Reads the cpu mask configuration from 'cmask' and sets the
+ * 'available_pmd' of corresponding cores. For unspecified cores,
+ * sets 'available' to false. */
+void
+ovs_numa_set_cpu_mask_pmd(const char *cmask_pmd)
+{
+ ovs_numa_parse_cpu_mask(cmask_pmd, available_pmd_cb);
+}
+
+static void
+available_nonpmd_cb(struct cpu_core *core, bool available)
+{
+ core->available_nonpmd = available;
+}
+
+/* Reads the cpu mask configuration from 'cmask' and sets the
+ * 'available_nonpmd' of corresponding cores. For unspecified cores,
+ * sets 'available' to false. */
+void
+ovs_numa_set_cpu_mask_nonpmd(const char *cmask_nonpmd)
+{
+ ovs_numa_parse_cpu_mask(cmask_nonpmd, available_nonpmd_cb);
+}
+
+/* Reads the cpu mask configuration from 'cmask' and sets the
+ * 'available_nonpmd' of corresponding cores. For unspecified cores,
+ * sets 'available' to false. */
+bool
+ovs_numa_get_non_pmd_free_set(size_t cpusetsize, cpu_set_t *cpuset)
+{
+ struct cpu_core *core;
+
+ ovs_assert(cpusetsize == sizeof *cpuset);
+
+ CPU_ZERO(cpuset);
+ HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
+ if (!core->pinned && core->available_nonpmd) {
+ CPU_SET(core->core_id, cpuset);
+ }
}
+ return true;
}
#endif /* __linux__ */
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 35b351b..04f598a 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -22,6 +22,7 @@
#include "compiler.h"
#include "list.h"
+#include "ovs-thread.h"
#define OVS_CORE_UNSPEC INT_MAX
#define OVS_NUMA_UNSPEC INT_MAX
@@ -45,7 +46,8 @@ bool ovs_numa_numa_id_is_valid(int numa_id);
bool ovs_numa_core_id_is_valid(int core_id);
bool ovs_numa_core_is_pinned(int core_id);
int ovs_numa_get_n_numas(void);
-void ovs_numa_set_cpu_mask(const char *cmask);
+void ovs_numa_set_cpu_mask_pmd(const char *cmask);
+void ovs_numa_set_cpu_mask_nonpmd(const char *cmask);
int ovs_numa_get_n_cores(void);
int ovs_numa_get_numa_id(int core_id);
int ovs_numa_get_n_cores_on_numa(int numa_id);
@@ -56,6 +58,7 @@ int ovs_numa_get_unpinned_core_on_numa(int numa_id);
void ovs_numa_unpin_core(int core_id);
struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
void ovs_numa_dump_destroy(struct ovs_numa_dump *);
+bool ovs_numa_get_non_pmd_free_set(size_t, cpu_set_t *);
#define FOR_EACH_CORE_ON_NUMA(ITER, DUMP) \
LIST_FOR_EACH((ITER), list_node, &(DUMP)->dump)
@@ -87,7 +90,13 @@ ovs_numa_core_is_pinned(int core_id OVS_UNUSED)
}
static inline void
-ovs_numa_set_cpu_mask(const char *cmask OVS_UNUSED)
+ovs_numa_set_cpu_mask_pmd(const char *cmask OVS_UNUSED)
+{
+ /* Nothing */
+}
+
+static inline void
+ovs_numa_set_cpu_mask_nonpmd(const char *cmask OVS_UNUSED)
{
/* Nothing */
}
@@ -158,9 +167,15 @@ ovs_numa_dump_destroy(struct ovs_numa_dump *dump OVS_UNUSED)
/* Nothing */
}
+static inline bool
+ovs_numa_get_non_pmd_free_set(size_t s OVS_UNUSED, cpu_set_t *c OVS_UNUSED)
+{
+ return false;
+}
+
/* No loop. */
#define FOR_EACH_CORE_ON_NUMA(ITER, DUMP) \
for ((ITER) = NULL; (ITER);)
#endif /* __linux__ */
-#endif /* ovs-thead.h */
+#endif /* ovs-numa.h */
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 837ae0b..4b45bb2 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -557,7 +557,8 @@ type_run(const char *type)
udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
- dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
+ dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask,
+ nonpmd_cpu_mask);
if (backer->need_revalidate) {
struct ofproto_dpif *ofproto;
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 9222fe4..54f358d 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -459,6 +459,7 @@ extern size_t n_dpdk_rxqs;
/* Cpu mask for pmd threads. */
extern char *pmd_cpu_mask;
+extern char *nonpmd_cpu_mask;
static inline struct rule *rule_from_cls_rule(const struct cls_rule *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index a36a1f8..4ee7e37 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -306,6 +306,7 @@ unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT;
size_t n_handlers, n_revalidators;
size_t n_dpdk_rxqs;
char *pmd_cpu_mask;
+char *nonpmd_cpu_mask;
/* Map from datapath name to struct ofproto, for use by unixctl commands. */
static struct hmap all_ofprotos = HMAP_INITIALIZER(&all_ofprotos);
@@ -741,7 +742,7 @@ ofproto_set_n_dpdk_rxqs(int n_rxqs)
}
void
-ofproto_set_cpu_mask(const char *cmask)
+ofproto_set_pmd_cpu_mask(const char *cmask)
{
free(pmd_cpu_mask);
@@ -749,6 +750,14 @@ ofproto_set_cpu_mask(const char *cmask)
}
void
+ofproto_set_nonpmd_cpu_mask(const char *cmask)
+{
+ free(nonpmd_cpu_mask);
+
+ nonpmd_cpu_mask = cmask ? xstrdup(cmask) : NULL;
+}
+
+void
ofproto_set_threads(int n_handlers_, int n_revalidators_)
{
int threads = MAX(count_cpu_cores(), 2);
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 7dc1874..bc7359b 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -317,7 +317,8 @@ int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux,
const struct ofproto_mcast_snooping_port_settings *s);
void ofproto_set_threads(int n_handlers, int n_revalidators);
void ofproto_set_n_dpdk_rxqs(int n_rxqs);
-void ofproto_set_cpu_mask(const char *cmask);
+void ofproto_set_pmd_cpu_mask(const char *cmask);
+void ofproto_set_nonpmd_cpu_mask(const char *cmask);
void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
int ofproto_set_snoops(struct ofproto *, const struct sset *snoops);
int ofproto_set_netflow(struct ofproto *,
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index afd7c3e..24f780c 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -572,7 +572,10 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
OFPROTO_MAX_IDLE_DEFAULT));
ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config,
"n-dpdk-rxqs", 0));
- ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask"));
+ ofproto_set_pmd_cpu_mask(smap_get(&ovs_cfg->other_config,
+ "pmd-cpu-mask"));
+ ofproto_set_nonpmd_cpu_mask(smap_get(&ovs_cfg->other_config,
+ "nonpmd-cpu-mask"));
ofproto_set_threads(
smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0),
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index b8fd458..b7388b4 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -180,6 +180,26 @@
</p>
</column>
+ <column name="other_config" key="nonpmd-cpu-mask">
+ <p>
+ Specifies CPU mask for setting the cpu affinity of non PMD threads
+ in OVS. Value should be in the form of hex string, similar to the
+ dpdk EAL '-c COREMASK' option input or the 'taskset' mask input.
+ This key is effective only when PMD threads are used (i.e. when the
+ userspace datapath is used with DPDK devices).
+ </p>
+ <p>
+ The lowest order bit corresponds to the first CPU core. A set bit
+ means the corresponding core is available. All the non PMD threads
+ will be pinned to the set of core specified by this option, minus
+ the cores used for PMD threads operations.
+ </p>
+ <p>
+ If not specified, the non PMD threads will be bound to every core
+ not used for pmd operations.
+ </p>
+ </column>
+
<column name="other_config" key="n-handler-threads"
type='{"type": "integer", "minInteger": 1}'>
<p>
--
2.1.4
More information about the dev
mailing list