[ovs-dev] [PATCH v2 1/3] dpif-netdev: only poll enabled vhost queues
Ilya Maximets
i.maximets at samsung.com
Tue Apr 16 13:41:24 UTC 2019
Hi.
One comment for the patch names. It's not a rule, but it's better
to make the <summary> part of the subject a complete sentence.
i.e. start with a capital letter and end with a period.
Same rule is applicable to the comments in the code, but this is
documented in coding-style.
More comments inline.
Best regards, Ilya Maximets.
On 16.04.2019 12:45, David Marchand wrote:
> We currently poll all available queues based on the max queue count
> exchanged with the vhost peer and rely on the vhost library in DPDK to
> check the vring status beneath.
> This can lead to some overhead when we have a lot of unused queues.
>
> To enhance the situation, we can skip the disabled queues.
> On rxq notifications, we make use of the netdev's change_seq number so
> that the pmd thread main loop can cache the queue state periodically.
>
> $ ovs-appctl dpif-netdev/pmd-rxq-show
> pmd thread numa_id 0 core_id 1:
> isolated : true
> port: dpdk0 queue-id: 0 pmd usage: 0 %
> pmd thread numa_id 0 core_id 2:
> isolated : true
> port: vhost1 queue-id: 0 pmd usage: 0 %
> port: vhost3 queue-id: 0 pmd usage: 0 %
> pmd thread numa_id 0 core_id 15:
> isolated : true
> port: dpdk1 queue-id: 0 pmd usage: 0 %
> pmd thread numa_id 0 core_id 16:
> isolated : true
> port: vhost0 queue-id: 0 pmd usage: 0 %
> port: vhost2 queue-id: 0 pmd usage: 0 %
>
> $ while true; do
> ovs-appctl dpif-netdev/pmd-rxq-show |awk '
> /port: / {
> tot++;
> if ($NF == "disabled") {
> dis++;
> }
> }
> END {
> print "total: " tot ", enabled: " (tot - dis)
> }'
> sleep 1
> done
>
> total: 6, enabled: 2
> total: 6, enabled: 2
> ...
>
> # Started vm, virtio devices are bound to kernel driver which enables
> # F_MQ + all queue pairs
> total: 6, enabled: 2
> total: 66, enabled: 66
> ...
>
> # Unbound vhost0 and vhost1 from the kernel driver
> total: 66, enabled: 66
> total: 66, enabled: 34
> ...
>
> # Configured kernel bound devices to use only 1 queue pair
> total: 66, enabled: 34
> total: 66, enabled: 19
> total: 66, enabled: 4
> ...
>
> # While rebooting the vm
> total: 66, enabled: 4
> total: 66, enabled: 2
> ...
> total: 66, enabled: 66
> ...
>
> # After shutting down the vm
> total: 66, enabled: 66
> total: 66, enabled: 2
>
> Signed-off-by: David Marchand <david.marchand at redhat.com>
> ---
>
> Changes since v1:
> - only indicate disabled queues in dpif-netdev/pmd-rxq-show output
> - Ilya comments
> - no need for a struct as we only need a boolean per rxq
> - "rx_q" is generic, while we only care for this in vhost case,
> renamed as "vhost_rxq_enabled",
> - add missing rte_free on allocation error,
> - vhost_rxq_enabled is freed in vhost destruct only,
> - rxq0 is enabled at the virtio device activation to accomodate
> legacy implementations which would not report per queue states
> later,
> - do not mix boolean with integer,
> - do not use bit operand on boolean,
>
> ---
> lib/dpif-netdev.c | 27 ++++++++++++++++++++++++
> lib/netdev-dpdk.c | 58 +++++++++++++++++++++++++++++++++++++++------------
> lib/netdev-provider.h | 5 +++++
> lib/netdev.c | 10 +++++++++
> lib/netdev.h | 1 +
> 5 files changed, 88 insertions(+), 13 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 4d6d0c3..5bfa6ad 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -591,6 +591,8 @@ struct polled_queue {
> struct dp_netdev_rxq *rxq;
> odp_port_t port_no;
> bool emc_enabled;
> + bool enabled;
What do you think about renaming to 'rxq_enabled'? It seems more clear.
Having both 'emc_enabled' and just 'enabled' is a bit confusing.
> + uint64_t change_seq;
> };
>
> /* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
> @@ -1171,6 +1173,9 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
> } else {
> ds_put_format(reply, "%s", "NOT AVAIL");
> }
> + if (!netdev_rxq_enabled(list[i].rxq->rx)) {
> + ds_put_cstr(reply, " polling: disabled");
> + }
> ds_put_cstr(reply, "\n");
> }
> ovs_mutex_unlock(&pmd->port_mutex);
> @@ -5198,6 +5203,11 @@ dpif_netdev_run(struct dpif *dpif)
> }
>
> for (i = 0; i < port->n_rxq; i++) {
> +
> + if (!netdev_rxq_enabled(port->rxqs[i].rx)) {
> + continue;
> + }
> +
> if (dp_netdev_process_rxq_port(non_pmd,
> &port->rxqs[i],
> port->port_no)) {
> @@ -5371,6 +5381,9 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
> poll_list[i].rxq = poll->rxq;
> poll_list[i].port_no = poll->rxq->port->port_no;
> poll_list[i].emc_enabled = poll->rxq->port->emc_enabled;
> + poll_list[i].enabled = netdev_rxq_enabled(poll->rxq->rx);
> + poll_list[i].change_seq =
> + netdev_get_change_seq(poll->rxq->port->netdev);
> i++;
> }
>
> @@ -5436,6 +5449,10 @@ reload:
>
> for (i = 0; i < poll_cnt; i++) {
>
> + if (!poll_list[i].enabled) {
> + continue;
> + }
> +
> if (poll_list[i].emc_enabled) {
> atomic_read_relaxed(&pmd->dp->emc_insert_min,
> &pmd->ctx.emc_insert_min);
> @@ -5472,6 +5489,16 @@ reload:
> if (reload) {
> break;
> }
> +
> + for (i = 0; i < poll_cnt; i++) {
> + uint64_t current_seq =
> + netdev_get_change_seq(poll_list[i].rxq->port->netdev);
> + if (poll_list[i].change_seq != current_seq) {
> + poll_list[i].change_seq = current_seq;
> + poll_list[i].enabled =
> + netdev_rxq_enabled(poll_list[i].rxq->rx);
> + }
> + }
> }
> pmd_perf_end_iteration(s, rx_packets, tx_packets,
> pmd_perf_metrics_enabled(pmd));
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 47153dc..9ba8e67 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -424,6 +424,9 @@ struct netdev_dpdk {
> OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
> uint32_t policer_rate;
> uint32_t policer_burst;
> +
> + /* Array of vhost rxq states, see vring_state_changed */
Should end with a period.
> + bool *vhost_rxq_enabled;
> );
>
> PADDED_MEMBERS(CACHE_LINE_SIZE,
> @@ -1235,8 +1238,14 @@ vhost_common_construct(struct netdev *netdev)
> int socket_id = rte_lcore_to_socket_id(rte_get_master_lcore());
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> + dev->vhost_rxq_enabled = dpdk_rte_mzalloc(OVS_VHOST_MAX_QUEUE_NUM *
> + sizeof *dev->vhost_rxq_enabled);
> + if (!dev->vhost_rxq_enabled) {
> + return ENOMEM;
> + }
> dev->tx_q = netdev_dpdk_alloc_txq(OVS_VHOST_MAX_QUEUE_NUM);
> if (!dev->tx_q) {
> + rte_free(dev->vhost_rxq_enabled);
> return ENOMEM;
> }
>
> @@ -1448,6 +1457,7 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
> dev->vhost_id = NULL;
>
> common_destruct(dev);
> + rte_free(dev->vhost_rxq_enabled);
Logically, 'common_destruct' should go after class-specific things.
>
> ovs_mutex_unlock(&dpdk_mutex);
>
> @@ -2200,6 +2210,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> return 0;
> }
>
> +static bool
> +netdev_dpdk_vhost_rxq_enabled(struct netdev_rxq *rxq)
> +{
> + struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> +
> + return dev->vhost_rxq_enabled[rxq->queue_id];
> +}
> +
> static int
> netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch,
> int *qfill)
> @@ -3563,6 +3581,8 @@ destroy_device(int vid)
> ovs_mutex_lock(&dev->mutex);
> dev->vhost_reconfigured = false;
> ovsrcu_index_set(&dev->vid, -1);
> + memset(dev->vhost_rxq_enabled, 0,
> + OVS_VHOST_MAX_QUEUE_NUM * sizeof *dev->vhost_rxq_enabled);
We need to clear only first 'dev->up.n_rxq' queues.
> netdev_dpdk_txq_map_clear(dev);
>
> netdev_change_seq_changed(&dev->up);
> @@ -3597,24 +3617,30 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
> struct netdev_dpdk *dev;
> bool exists = false;
> int qid = queue_id / VIRTIO_QNUM;
> + bool is_rx = (queue_id % VIRTIO_QNUM) == VIRTIO_TXQ;
> char ifname[IF_NAME_SZ];
>
> rte_vhost_get_ifname(vid, ifname, sizeof ifname);
>
> - if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
> - return 0;
> - }
> -
> ovs_mutex_lock(&dpdk_mutex);
> LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> ovs_mutex_lock(&dev->mutex);
> if (nullable_string_is_equal(ifname, dev->vhost_id)) {
> - if (enable) {
> - dev->tx_q[qid].map = qid;
> + if (is_rx) {
> + bool enabled = dev->vhost_rxq_enabled[qid];
This is also confusing to have 'enable' and 'enabled' in a same scope.
What do you think about renaming 'enabled' --> 'old_state'?
> +
> + dev->vhost_rxq_enabled[qid] = enable != 0;
> + if (enabled != dev->vhost_rxq_enabled[qid]) {
> + netdev_change_seq_changed(&dev->up);
> + }
> } else {
> - dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
> + if (enable) {
> + dev->tx_q[qid].map = qid;
> + } else {
> + dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
> + }
> + netdev_dpdk_remap_txqs(dev);
> }
> - netdev_dpdk_remap_txqs(dev);
> exists = true;
> ovs_mutex_unlock(&dev->mutex);
> break;
> @@ -3624,9 +3650,9 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
> ovs_mutex_unlock(&dpdk_mutex);
>
> if (exists) {
> - VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' "
> - "changed to \'%s\'", queue_id, qid, ifname,
> - (enable == 1) ? "enabled" : "disabled");
> + VLOG_INFO("State of queue %d ( %s_qid %d ) of vhost device '%s' "
> + "changed to \'%s\'", queue_id, is_rx == true ? "rx" : "tx",
> + qid, ifname, (enable == 1) ? "enabled" : "disabled");
> } else {
> VLOG_INFO("vHost Device '%s' not found", ifname);
> return -1;
> @@ -4085,6 +4112,10 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
> dev->up.n_rxq = dev->requested_n_rxq;
> int err;
>
> + /* Always keep RX queue 0 enabled for implementations that won't
> + * report vring states. */
> + dev->vhost_rxq_enabled[0] = true;
> +
> /* Enable TX queue 0 by default if it wasn't disabled. */
> if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) {
> dev->tx_q[0].map = 0;
> @@ -4297,7 +4327,8 @@ static const struct netdev_class dpdk_vhost_class = {
> .get_stats = netdev_dpdk_vhost_get_stats,
> .get_status = netdev_dpdk_vhost_user_get_status,
> .reconfigure = netdev_dpdk_vhost_reconfigure,
> - .rxq_recv = netdev_dpdk_vhost_rxq_recv
> + .rxq_recv = netdev_dpdk_vhost_rxq_recv,
> + .rxq_enabled = netdev_dpdk_vhost_rxq_enabled,
> };
>
> static const struct netdev_class dpdk_vhost_client_class = {
> @@ -4311,7 +4342,8 @@ static const struct netdev_class dpdk_vhost_client_class = {
> .get_stats = netdev_dpdk_vhost_get_stats,
> .get_status = netdev_dpdk_vhost_user_get_status,
> .reconfigure = netdev_dpdk_vhost_client_reconfigure,
> - .rxq_recv = netdev_dpdk_vhost_rxq_recv
> + .rxq_recv = netdev_dpdk_vhost_rxq_recv,
> + .rxq_enabled = netdev_dpdk_vhost_rxq_enabled,
> };
>
> void
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index fb0c27e..5faae0d 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -789,6 +789,11 @@ struct netdev_class {
> void (*rxq_destruct)(struct netdev_rxq *);
> void (*rxq_dealloc)(struct netdev_rxq *);
>
> + /* A netdev can report if a queue won't get traffic and should be excluded
> + * from polling (no callback implicitely means that the queue is enabled).
> + */
I'm suggesting following variant of this comment to be similar to other function
descriptions here:
/* Retrieves the current state of rx queue. 'False' means that queue won't
* get traffic in a short term and could be not polled.
*
* This function may be set to null if it would always return 'True'
* anyhow. */
> + bool (*rxq_enabled)(struct netdev_rxq *);
> +
> /* Attempts to receive a batch of packets from 'rx'. In 'batch', the
> * caller supplies 'packets' as the pointer to the beginning of an array
> * of NETDEV_MAX_BURST pointers to dp_packet. If successful, the
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 7d7ecf6..03a1b24 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -682,6 +682,16 @@ netdev_rxq_close(struct netdev_rxq *rx)
> }
> }
>
> +bool netdev_rxq_enabled(struct netdev_rxq *rx)
> +{
> + bool enabled = true;
> +
> + if (rx->netdev->netdev_class->rxq_enabled) {
> + enabled = rx->netdev->netdev_class->rxq_enabled(rx);
> + }
> + return enabled;
> +}
> +
> /* Attempts to receive a batch of packets from 'rx'. 'batch' should point to
> * the beginning of an array of NETDEV_MAX_BURST pointers to dp_packet. If
> * successful, this function stores pointers to up to NETDEV_MAX_BURST
> diff --git a/lib/netdev.h b/lib/netdev.h
> index d94817f..bfcdf39 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -183,6 +183,7 @@ enum netdev_pt_mode netdev_get_pt_mode(const struct netdev *);
> /* Packet reception. */
> int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id);
> void netdev_rxq_close(struct netdev_rxq *);
> +bool netdev_rxq_enabled(struct netdev_rxq *);
>
> const char *netdev_rxq_get_name(const struct netdev_rxq *);
> int netdev_rxq_get_queue_id(const struct netdev_rxq *);
>
More information about the dev
mailing list