[ovs-dev] [PATCH] netdev-dpdk: Add vHost User PMD

Daniele Di Proietto diproiettod at ovn.org
Sat Apr 30 00:23:03 UTC 2016


Hi Ciara,

thanks for doing this.  I really think this has the potential to clean up
the netdev-dpdk code.

The clang thread safety analyzer reports some warnings:

  CC       lib/netdev-dpdk.lo
../lib/netdev-dpdk.c:882:1: error: mutex 'dpdk_mutex' is not held on every
path through here [-Werror,-Wthread-safety-analysis]
}
^
../lib/netdev-dpdk.c:870:5: note: mutex acquired here
    ovs_mutex_lock(&dpdk_mutex);
    ^
../include/openvswitch/thread.h:60:9: note: expanded from macro
'ovs_mutex_lock'
        ovs_mutex_lock_at(mutex, OVS_SOURCE_LOCATOR)
        ^
1 error generated.

I see that this patch removes the transmission locks for the txqs. I think
those are still needed by physical NICs.

This patch seem to remove a lot of txq remapping functions (like
netdev_dpdk_remap_txqs).  How does it handle the case of a disabled txq in
the guest kernel?

I see that vhost-cuse is still handled separately. Is it possible to use
the vhost pmd also for vhost-cuse? Otherwise we still basically have to
handle differently three cases: NIC PMD, vhost user pmd, vhost cuse.  Maybe
it's time to remove vhost-cuse (I understand this is a separate issue,
though)?

I get an error when I try this:

ovs-vsctl add-port br0 p1 -- set Interface p1 type="dpdkvhostuser"
ovs-vsctl del-port br0 p1
ovs-vsctl add-port br0 p1 -- set Interface p1 type="dpdkvhostuser"

More comments inline

Thanks!

2016-04-21 5:20 GMT-07:00 Ciara Loftus <ciara.loftus at intel.com>:

> DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports
> to be controlled by the librte_ether API, like physical 'dpdk' ports.
> The commit integrates this functionality into OVS, and refactors some
> of the existing vhost code such that it is vhost-cuse specific.
> Similarly, there is now some overlap between dpdk and vhost-user port
> code.
>
> Signed-off-by: Ciara Loftus <ciara.loftus at intel.com>
> ---
>  INSTALL.DPDK.md   |  12 ++
>  NEWS              |   2 +
>  lib/netdev-dpdk.c | 515
> +++++++++++++++++++++++++-----------------------------
>  3 files changed, 254 insertions(+), 275 deletions(-)
>  mode change 100644 => 100755 lib/netdev-dpdk.c
>
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 7f76df8..5006812 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -945,6 +945,18 @@ Restrictions:
>      increased to the desired number of queues. Both DPDK and OVS must be
>      recompiled for this change to take effect.
>
> +  DPDK 'eth' type ports:
> +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the
> context of
> +    DPDK as they are all managed by the rte_ether API. This means that
> they
> +    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS which
> by
> +    default is set to 32. This means by default the combined total number
> of
> +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32.
> This
> +    value can be changed if desired by modifying the configuration file in
> +    DPDK, or by overriding the default value on the command line when
> building
> +    DPDK. eg.
> +
> +        `make install CONFIG_RTE_MAX_ETHPORTS=64`
> +
>

This seems a heavy limitation compared to the previous librte_vhost
approach. Are there any plans to increase this in DPDK upstream?


>  Bug Reporting:
>  --------------
>
> diff --git a/NEWS b/NEWS
> index ea7f3a1..4dc0201 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,6 +26,8 @@ Post-v2.5.0
>         assignment.
>       * Type of log messages from PMD threads changed from INFO to DBG.
>       * QoS functionality with sample egress-policer implementation.
> +     * vHost PMD integration brings vhost-user ports under control of the
> +       rte_ether DPDK API.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
>     - ovs-appctl:
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> old mode 100644
> new mode 100755
> index 208c5f5..4fccd63
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -56,6 +56,7 @@
>  #include "rte_mbuf.h"
>  #include "rte_meter.h"
>  #include "rte_virtio_net.h"
> +#include "rte_eth_vhost.h"
>
>  VLOG_DEFINE_THIS_MODULE(dpdk);
>  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> @@ -109,6 +110,8 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF /
> ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
>
>  static char *cuse_dev_name = NULL;    /* Character device cuse_dev_name.
> */
>  static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
> +/* Array that tracks the used & unused vHost user driver IDs */
> +static unsigned int vhost_user_drv_ids[RTE_MAX_ETHPORTS];
>
>  /*
>   * Maximum amount of time in micro seconds to try and enqueue to vhost.
> @@ -143,7 +146,8 @@ enum { DRAIN_TSC = 200000ULL };
>
>  enum dpdk_dev_type {
>      DPDK_DEV_ETH = 0,
> -    DPDK_DEV_VHOST = 1,
> +    DPDK_DEV_VHOST_USER = 1,
> +    DPDK_DEV_VHOST_CUSE = 2,
>  };
>
>  static int rte_eal_init_ret = ENODEV;
> @@ -275,8 +279,6 @@ struct dpdk_tx_queue {
>                                      * from concurrent access.  It is used
> only
>                                      * if the queue is shared among
> different
>                                      * pmd threads (see
> 'txq_needs_locking'). */
> -    int map;                       /* Mapping of configured vhost-user
> queues
> -                                    * to enabled by guest. */
>      uint64_t tsc;
>      struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
>  };
> @@ -329,12 +331,22 @@ struct netdev_dpdk {
>      int real_n_rxq;
>      bool txq_needs_locking;
>
> -    /* virtio-net structure for vhost device */
> +    /* Spinlock for vhost cuse transmission. Other DPDK devices use
> spinlocks
> +     * in dpdk_tx_queue */
> +    rte_spinlock_t vhost_cuse_tx_lock;
> +
> +    /* virtio-net structure for vhost cuse device */
>      OVSRCU_TYPE(struct virtio_net *) virtio_dev;
>
>      /* Identifier used to distinguish vhost devices from each other */
>      char vhost_id[PATH_MAX];
>
> +    /* ID of vhost user port given to the PMD driver */
> +    unsigned int vhost_pmd_id;
> +
> +    /* Number of virtqueue pairs reported by the guest */
> +    uint32_t reported_queues;
> +
>      /* In dpdk_list. */
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
>
> @@ -352,13 +364,20 @@ struct netdev_rxq_dpdk {
>  static bool dpdk_thread_is_pmd(void);
>
>  static int netdev_dpdk_construct(struct netdev *);
> +static int netdev_dpdk_vhost_user_construct(struct netdev *);
>
>  struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
>
> +void vring_state_changed_callback(uint8_t port_id,
> +        enum rte_eth_event_type type OVS_UNUSED, void *param OVS_UNUSED);
> +void device_state_changed_callback(uint8_t port_id,
> +        enum rte_eth_event_type type OVS_UNUSED, void *param OVS_UNUSED);
> +
>  static bool
>  is_dpdk_class(const struct netdev_class *class)
>  {
> -    return class->construct == netdev_dpdk_construct;
> +    return ((class->construct == netdev_dpdk_construct) ||
> +            (class->construct == netdev_dpdk_vhost_user_construct));
>  }
>
>  /* DPDK NIC drivers allocate RX buffers at a particular granularity,
> typically
> @@ -581,7 +600,9 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int
> n_rxq, int n_txq)
>          }
>
>          dev->up.n_rxq = n_rxq;
> -        dev->real_n_txq = n_txq;
> +        if (dev->type == DPDK_DEV_ETH) {
> +            dev->real_n_txq = n_txq;
> +        }
>
>          return 0;
>      }
> @@ -672,11 +693,72 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev,
> unsigned int n_txqs)
>              /* Queues are shared among CPUs. Always flush */
>              dev->tx_q[i].flush_tx = true;
>          }
> +    }
> +}
>
> -        /* Initialize map for vhost devices. */
> -        dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
> -        rte_spinlock_init(&dev->tx_q[i].tx_lock);
> +void
> +vring_state_changed_callback(uint8_t port_id,
> +                             enum rte_eth_event_type type OVS_UNUSED,
> +                             void *param OVS_UNUSED)
> +{
> +    struct netdev_dpdk *dev;
> +    struct rte_eth_vhost_queue_event event;
> +    int err = 0;
> +
> +    err = rte_eth_vhost_get_queue_event(port_id, &event);
> +    if (err || (event.rx == 1)) {
> +        return;
>      }
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (port_id == dev->port_id) {
> +            ovs_mutex_lock(&dev->mutex);
> +            if (event.enable) {
> +                dev->reported_queues++;
> +            } else {
> +                dev->reported_queues--;
> +            }
> +            dev->real_n_rxq = dev->reported_queues;
> +            dev->real_n_txq = dev->reported_queues;
> +            dev->txq_needs_locking = true;
> +            netdev_dpdk_alloc_txq(dev, dev->real_n_txq);
> +            ovs_mutex_unlock(&dev->mutex);
> +            break;
> +        }
> +    }
> +    ovs_mutex_unlock(&dpdk_mutex);
> +
> +    return;
> +}
> +
> +void
> +device_state_changed_callback(uint8_t port_id,
> +                              enum rte_eth_event_type type OVS_UNUSED,
> +                              void *param OVS_UNUSED)
> +{
> +    struct netdev_dpdk *dev;
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (port_id == dev->port_id) {
> +            ovs_mutex_lock(&dev->mutex);
> +            check_link_status(dev);
> +            if (dev->link.link_status == ETH_LINK_UP) {
> +                /* new device */
> +                VLOG_INFO("vHost Device '%s' has been added",
> dev->vhost_id);
> +
> +            } else {
> +                /* destroy device */
> +                VLOG_INFO("vHost Device '%s' has been removed",
> dev->vhost_id);
> +            }
> +            ovs_mutex_unlock(&dev->mutex);
> +            break;
> +        }
> +    }
> +    ovs_mutex_unlock(&dpdk_mutex);
> +
> +    return;
>  }
>
>  static int
> @@ -688,6 +770,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      int sid;
>      int err = 0;
>      uint32_t buf_size;
> +    unsigned int n_queues = 0;
>
>      ovs_mutex_init(&dev->mutex);
>      ovs_mutex_lock(&dev->mutex);
> @@ -697,7 +780,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      /* If the 'sid' is negative, it means that the kernel fails
>       * to obtain the pci numa info.  In that situation, always
>       * use 'SOCKET0'. */
> -    if (type == DPDK_DEV_ETH) {
> +    if (type != DPDK_DEV_VHOST_CUSE) {
>          sid = rte_eth_dev_socket_id(port_no);
>      } else {
>          sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> @@ -709,6 +792,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      dev->flags = 0;
>      dev->mtu = ETHER_MTU;
>      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> +    dev->vhost_pmd_id = 0;
> +    dev->reported_queues = 0;
>
>      buf_size = dpdk_buf_size(dev->mtu);
>      dev->dpdk_mp = dpdk_mp_get(dev->socket_id,
> FRAME_LEN_TO_MTU(buf_size));
> @@ -726,14 +811,23 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      netdev->requested_n_rxq = NR_QUEUE;
>      dev->real_n_txq = NR_QUEUE;
>
> -    if (type == DPDK_DEV_ETH) {
> -        netdev_dpdk_alloc_txq(dev, NR_QUEUE);
> +    n_queues = (type == DPDK_DEV_VHOST_CUSE) ? OVS_VHOST_MAX_QUEUE_NUM :
> +                                               NR_QUEUE;
> +    netdev_dpdk_alloc_txq(dev, n_queues);
> +    if (type != DPDK_DEV_VHOST_CUSE) {
>          err = dpdk_eth_dev_init(dev);
>          if (err) {
>              goto unlock;
>          }
> -    } else {
> -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
> +    }
> +
> +    if (type == DPDK_DEV_VHOST_USER) {
> +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_QUEUE_STATE,
> +                                     (void*)vring_state_changed_callback,
> +                                      NULL);
> +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,
> +                                     (void*)device_state_changed_callback,
> +                                      NULL);
>      }
>
>      ovs_list_push_back(&dpdk_list, &dev->list_node);
> @@ -768,26 +862,37 @@ dpdk_dev_parse_name(const char dev_name[], const
> char prefix[],
>  }
>
>  static int
> -vhost_construct_helper(struct netdev *netdev) OVS_REQUIRES(dpdk_mutex)
> +netdev_dpdk_vhost_cuse_construct(struct netdev *netdev)
>  {
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int err;
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    strncpy(dev->vhost_id, netdev->name, sizeof(dev->vhost_id));
>      if (rte_eal_init_ret) {
>          return rte_eal_init_ret;
>      }
>
> -    return netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
> +    rte_spinlock_init(&dev->vhost_cuse_tx_lock);
> +
> +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST_CUSE);
> +
> +    ovs_mutex_unlock(&dpdk_mutex);
> +    return err;
>  }
>
>  static int
> -netdev_dpdk_vhost_cuse_construct(struct netdev *netdev)
> +get_vhost_user_drv_id(void)
>  {
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    int err;
> +    int i = 0;
>
> -    ovs_mutex_lock(&dpdk_mutex);
> -    strncpy(dev->vhost_id, netdev->name, sizeof(dev->vhost_id));
> -    err = vhost_construct_helper(netdev);
> -    ovs_mutex_unlock(&dpdk_mutex);
> -    return err;
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (vhost_user_drv_ids[i] == 0) {
> +            return i;
> +        }
> +    }
> +
> +    return -1;
>  }
>
>  static int
> @@ -796,6 +901,9 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev)
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      const char *name = netdev->name;
>      int err;
> +    uint8_t port_no = 0;
> +    char devargs[4096];
> +    int driver_id = 0;
>
>      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket
> in
>       * the file system. '/' or '\' would traverse directories, so they're
> not
> @@ -807,22 +915,35 @@ netdev_dpdk_vhost_user_construct(struct netdev
> *netdev)
>          return EINVAL;
>      }
>
> +    if (rte_eal_init_ret) {
> +        return rte_eal_init_ret;
> +    }
> +
>      ovs_mutex_lock(&dpdk_mutex);
>      /* Take the name of the vhost-user port and append it to the location
> where
>       * the socket is to be created, then register the socket.
>       */
>      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
>               vhost_sock_dir, name);
> +    driver_id = get_vhost_user_drv_id();
>

I'm not sure I understand the logic behing this code.  Are we trying to
create a unique name with driver_id? What are the requirements for the
device name?

Also, since the OVS Interface name is part of the devargs string, it seems
to me that we should protect agains commas (",")



> +    if (driver_id == -1) {
> +        VLOG_ERR("Too many vhost-user devices registered with PMD");
> +        err = ENODEV;
> +    } else {
> +        snprintf(devargs, sizeof(devargs),
> "eth_vhost%u,iface=%s,queues=%i",
> +                 driver_id, dev->vhost_id, RTE_MAX_QUEUES_PER_PORT);
> +        err = rte_eth_dev_attach(devargs, &port_no);
> +    }
>
> -    err = rte_vhost_driver_register(dev->vhost_id);
>      if (err) {
> -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
> -                 dev->vhost_id);
> +        VLOG_ERR("Failed to attach vhost-user device to DPDK");
>      } else {
>          fatal_signal_add_file_to_unlink(dev->vhost_id);
>          VLOG_INFO("Socket %s created for vhost-user port %s\n",
>                    dev->vhost_id, name);
> -        err = vhost_construct_helper(netdev);
> +        dev->vhost_pmd_id = driver_id;
> +        vhost_user_drv_ids[dev->vhost_pmd_id] = 1;
> +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST_USER);
>      }
>
>      ovs_mutex_unlock(&dpdk_mutex);
> @@ -857,6 +978,12 @@ netdev_dpdk_destruct(struct netdev *netdev)
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>      ovs_mutex_lock(&dev->mutex);
> +
> +    if (dev->type == DPDK_DEV_VHOST_USER) {
> +        rte_eth_dev_detach(dev->port_id, dev->vhost_id);
> +        vhost_user_drv_ids[dev->vhost_pmd_id] = 0;
> +    }
> +
>      rte_eth_dev_stop(dev->port_id);
>      ovs_mutex_unlock(&dev->mutex);
>
> @@ -868,7 +995,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>  }
>
>  static void
> -netdev_dpdk_vhost_destruct(struct netdev *netdev)
> +netdev_dpdk_vhost_cuse_destruct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> @@ -876,15 +1003,8 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>      if (netdev_dpdk_get_virtio(dev) != NULL) {
>          VLOG_ERR("Removing port '%s' while vhost device still attached.",
>                   netdev->name);
> -        VLOG_ERR("To restore connectivity after re-adding of port, VM on
> socket"
> -                 " '%s' must be restarted.",
> -                 dev->vhost_id);
> -    }
> -
> -    if (rte_vhost_driver_unregister(dev->vhost_id)) {
> -        VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
> -    } else {
> -        fatal_signal_remove_file_to_unlink(dev->vhost_id);
> +        VLOG_ERR("To restore connectivity after re-adding of port, VM
> with"
> +                 "port '%s' must be restarted.", dev->vhost_id);
>      }
>
>      ovs_mutex_lock(&dpdk_mutex);
> @@ -1000,30 +1120,6 @@ netdev_dpdk_vhost_cuse_set_multiq(struct netdev
> *netdev, unsigned int n_txq,
>      netdev->n_txq = n_txq;
>      dev->real_n_txq = 1;
>      netdev->n_rxq = 1;
> -    dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq;
> -
> -    ovs_mutex_unlock(&dev->mutex);
> -    ovs_mutex_unlock(&dpdk_mutex);
> -
> -    return err;
> -}
> -
> -static int
> -netdev_dpdk_vhost_set_multiq(struct netdev *netdev, unsigned int n_txq,
> -                             unsigned int n_rxq)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    int err = 0;
> -
> -    if (netdev->n_txq == n_txq && netdev->n_rxq == n_rxq) {
> -        return err;
> -    }
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    ovs_mutex_lock(&dev->mutex);
> -
> -    netdev->n_txq = n_txq;
> -    netdev->n_rxq = n_rxq;
>
>      ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
> @@ -1118,13 +1214,13 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
>  }
>
>  static bool
> -is_vhost_running(struct virtio_net *virtio_dev)
> +is_vhost_cuse_running(struct virtio_net *virtio_dev)
>  {
>      return (virtio_dev != NULL && (virtio_dev->flags &
> VIRTIO_DEV_RUNNING));
>  }
>
>  static inline void
> -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
> +netdev_dpdk_vhost_cuse_update_rx_counters(struct netdev_stats *stats,
>                                       struct dp_packet **packets, int
> count)
>  {
>      int i;
> @@ -1153,26 +1249,22 @@ netdev_dpdk_vhost_update_rx_counters(struct
> netdev_stats *stats,
>  }
>
>  /*
> - * The receive path for the vhost port is the TX path out from guest.
> + * The receive path for the vhost cuse port is the TX path out from guest.
>   */
>  static int
> -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> +netdev_dpdk_vhost_cuse_rxq_recv(struct netdev_rxq *rxq,
>                             struct dp_packet **packets, int *c)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> -    int qid = rxq->queue_id;
> +    int qid = 1;
>      uint16_t nb_rx = 0;
>
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
> +    if (OVS_UNLIKELY(!is_vhost_cuse_running(virtio_dev))) {
>          return EAGAIN;
>      }
>
> -    if (rxq->queue_id >= dev->real_n_rxq) {
> -        return EOPNOTSUPP;
> -    }
> -
> -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +
> VIRTIO_TXQ,
> +    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid,
>                                      dev->dpdk_mp->mp,
>                                      (struct rte_mbuf **)packets,
>                                      NETDEV_MAX_BURST);
> @@ -1181,7 +1273,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>      }
>
>      rte_spinlock_lock(&dev->stats_lock);
> -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx);
> +    netdev_dpdk_vhost_cuse_update_rx_counters(&dev->stats, packets,
> nb_rx);
>      rte_spinlock_unlock(&dev->stats_lock);
>
>      *c = (int) nb_rx;
> @@ -1217,6 +1309,18 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet **packets,
>      return 0;
>  }
>
> +static int
> +netdev_dpdk_vhost_user_rxq_recv(struct netdev_rxq *rxq,
> +                                struct dp_packet **packets, int *c)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> +
> +    if (rxq->queue_id >= dev->real_n_rxq) {
> +        return EOPNOTSUPP;
> +    }
> +
> +    return netdev_dpdk_rxq_recv(rxq, packets, c);
> +}
>  static inline int
>  netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
>                          int cnt)
> @@ -1235,7 +1339,7 @@ netdev_dpdk_qos_run__(struct netdev_dpdk *dev,
> struct rte_mbuf **pkts,
>  }
>
>  static inline void
> -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
> +netdev_dpdk_vhost_cuse_update_tx_counters(struct netdev_stats *stats,
>                                       struct dp_packet **packets,
>                                       int attempted,
>                                       int dropped)
> @@ -1252,9 +1356,8 @@ netdev_dpdk_vhost_update_tx_counters(struct
> netdev_stats *stats,
>  }
>
>  static void
> -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> -                         struct dp_packet **pkts, int cnt,
> -                         bool may_steal)
> +__netdev_dpdk_vhost_cuse_send(struct netdev *netdev, struct dp_packet
> **pkts,
> +                         int cnt, bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> @@ -1263,26 +1366,24 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int qid,
>      unsigned int qos_pkts = cnt;
>      uint64_t start = 0;
>
> -    qid = dev->tx_q[qid % dev->real_n_txq].map;
> -
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0)) {
> +    if (OVS_UNLIKELY(!is_vhost_cuse_running(virtio_dev))) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped+= cnt;
>          rte_spinlock_unlock(&dev->stats_lock);
>          goto out;
>      }
>
> -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +    /* There is a single vhost-cuse TX queue, So we need to lock it for
> TX. */
> +    rte_spinlock_lock(&dev->vhost_cuse_tx_lock);
>
>      /* Check has QoS has been configured for the netdev */
>      cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
>      qos_pkts -= cnt;
>
>      do {
> -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
>          unsigned int tx_pkts;
>
> -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
> +        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ,
>                                            cur_pkts, cnt);
>          if (OVS_LIKELY(tx_pkts)) {
>              /* Packets have been sent.*/
> @@ -1301,7 +1402,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>               * Unable to enqueue packets to vhost interface.
>               * Check available entries before retrying.
>               */
> -            while (!rte_vring_available_entries(virtio_dev, vhost_qid)) {
> +            while (!rte_vring_available_entries(virtio_dev, VIRTIO_RXQ)) {
>                  if (OVS_UNLIKELY((rte_get_timer_cycles() - start) >
> timeout)) {
>                      expired = 1;
>                      break;
> @@ -1313,12 +1414,12 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int qid,
>              }
>          }
>      } while (cnt);
> -
> -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> +    rte_spinlock_unlock(&dev->vhost_cuse_tx_lock);
>
>      rte_spinlock_lock(&dev->stats_lock);
>      cnt += qos_pkts;
> -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts,
> cnt);
> +    netdev_dpdk_vhost_cuse_update_tx_counters(&dev->stats, pkts,
> total_pkts,
> +                                              cnt);
>      rte_spinlock_unlock(&dev->stats_lock);
>
>  out:
> @@ -1412,8 +1513,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>          newcnt++;
>      }
>
> -    if (dev->type == DPDK_DEV_VHOST) {
> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> mbufs, newcnt, true);
> +    if (dev->type == DPDK_DEV_VHOST_CUSE) {
> +        __netdev_dpdk_vhost_cuse_send(netdev, (struct dp_packet **) mbufs,
> +                                      newcnt, true);
>      } else {
>          unsigned int qos_pkts = newcnt;
>
> @@ -1437,8 +1539,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>  }
>
>  static int
> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet
> **pkts,
> -                 int cnt, bool may_steal)
> +netdev_dpdk_vhost_cuse_send(struct netdev *netdev, int qid OVS_UNUSED,
> +                            struct dp_packet **pkts, int cnt, bool
> may_steal)
>  {
>      if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
>          int i;
> @@ -1450,7 +1552,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid, struct dp_packet **pkts,
>              }
>          }
>      } else {
> -        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
> +        __netdev_dpdk_vhost_cuse_send(netdev, pkts, cnt, may_steal);
>      }
>      return 0;
>  }
> @@ -1461,11 +1563,6 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>  {
>      int i;
>
> -    if (OVS_UNLIKELY(dev->txq_needs_locking)) {
> -        qid = qid % dev->real_n_txq;
> -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> -    }
> -
>      if (OVS_UNLIKELY(!may_steal ||
>                       pkts[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
> @@ -1541,6 +1638,18 @@ netdev_dpdk_eth_send(struct netdev *netdev, int qid,
>  }
>
>  static int
> +netdev_dpdk_vhost_user_send(struct netdev *netdev, int qid,
> +        struct dp_packet **pkts, int cnt, bool may_steal)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +
> +    qid = qid % dev->real_n_txq;
> +
> +    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
> +    return 0;
> +}
> +
> +static int
>  netdev_dpdk_set_etheraddr(struct netdev *netdev, const struct eth_addr
> mac)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> @@ -1634,7 +1743,7 @@ static int
>  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
>
>  static int
> -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
> +netdev_dpdk_vhost_cuse_get_stats(const struct netdev *netdev,
>                              struct netdev_stats *stats)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> @@ -1796,14 +1905,14 @@ netdev_dpdk_get_carrier(const struct netdev
> *netdev, bool *carrier)
>  }
>
>  static int
> -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
> +netdev_dpdk_vhost_cuse_get_carrier(const struct netdev *netdev, bool
> *carrier)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>
>      ovs_mutex_lock(&dev->mutex);
>
> -    if (is_vhost_running(virtio_dev)) {
> +    if (is_vhost_cuse_running(virtio_dev)) {
>          *carrier = 1;
>      } else {
>          *carrier = 0;
> @@ -1853,7 +1962,7 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
>          return 0;
>      }
>
> -    if (dev->type == DPDK_DEV_ETH) {
> +    if (dev->type != DPDK_DEV_VHOST_CUSE) {
>          if (dev->flags & NETDEV_UP) {
>              err = rte_eth_dev_start(dev->port_id);
>              if (err)
> @@ -1998,75 +2107,7 @@ set_irq_status(struct virtio_net *virtio_dev)
>  }
>
>  /*
> - * Fixes mapping for vhost-user tx queues. Must be called after each
> - * enabling/disabling of queues and real_n_txq modifications.
> - */
> -static void
> -netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
> -    OVS_REQUIRES(dev->mutex)
> -{
> -    int *enabled_queues, n_enabled = 0;
> -    int i, k, total_txqs = dev->real_n_txq;
> -
> -    enabled_queues = dpdk_rte_mzalloc(total_txqs * sizeof
> *enabled_queues);
> -
> -    for (i = 0; i < total_txqs; i++) {
> -        /* Enabled queues always mapped to themselves. */
> -        if (dev->tx_q[i].map == i) {
> -            enabled_queues[n_enabled++] = i;
> -        }
> -    }
> -
> -    if (n_enabled == 0 && total_txqs != 0) {
> -        enabled_queues[0] = OVS_VHOST_QUEUE_DISABLED;
> -        n_enabled = 1;
> -    }
> -
> -    k = 0;
> -    for (i = 0; i < total_txqs; i++) {
> -        if (dev->tx_q[i].map != i) {
> -            dev->tx_q[i].map = enabled_queues[k];
> -            k = (k + 1) % n_enabled;
> -        }
> -    }
> -
> -    VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id);
> -    for (i = 0; i < total_txqs; i++) {
> -        VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map);
> -    }
> -
> -    rte_free(enabled_queues);
> -}
> -
> -static int
> -netdev_dpdk_vhost_set_queues(struct netdev_dpdk *dev, struct virtio_net
> *virtio_dev)
> -    OVS_REQUIRES(dev->mutex)
> -{
> -    uint32_t qp_num;
> -
> -    qp_num = virtio_dev->virt_qp_nb;
> -    if (qp_num > dev->up.n_rxq) {
> -        VLOG_ERR("vHost Device '%s' %"PRIu64" can't be added - "
> -                 "too many queues %d > %d", virtio_dev->ifname,
> virtio_dev->device_fh,
> -                 qp_num, dev->up.n_rxq);
> -        return -1;
> -    }
> -
> -    dev->real_n_rxq = qp_num;
> -    dev->real_n_txq = qp_num;
> -    dev->txq_needs_locking = true;
> -    /* Enable TX queue 0 by default if it wasn't disabled. */
> -    if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) {
> -        dev->tx_q[0].map = 0;
> -    }
> -
> -    netdev_dpdk_remap_txqs(dev);
> -
> -    return 0;
> -}
> -
> -/*
> - * A new virtio-net device is added to a vhost port.
> + * A new virtio-net device is added to a vhost cuse port.
>   */
>  static int
>  new_device(struct virtio_net *virtio_dev)
> @@ -2079,11 +2120,6 @@ new_device(struct virtio_net *virtio_dev)
>      LIST_FOR_EACH(dev, list_node, &dpdk_list) {
>          if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
>              ovs_mutex_lock(&dev->mutex);
> -            if (netdev_dpdk_vhost_set_queues(dev, virtio_dev)) {
> -                ovs_mutex_unlock(&dev->mutex);
> -                ovs_mutex_unlock(&dpdk_mutex);
> -                return -1;
> -            }
>              ovsrcu_set(&dev->virtio_dev, virtio_dev);
>              exists = true;
>              virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> @@ -2107,23 +2143,11 @@ new_device(struct virtio_net *virtio_dev)
>      return 0;
>  }
>
> -/* Clears mapping for all available queues of vhost interface. */
> -static void
> -netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
> -    OVS_REQUIRES(dev->mutex)
> -{
> -    int i;
> -
> -    for (i = 0; i < dev->real_n_txq; i++) {
> -        dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
> -    }
> -}
> -
>  /*
> - * Remove a virtio-net device from the specific vhost port.  Use
> dev->remove
> - * flag to stop any more packets from being sent or received to/from a VM
> and
> - * ensure all currently queued packets have been sent/received before
> removing
> - *  the device.
> + * Remove a virtio-net device from the specific vhost cuse port. Use
> + * dev->remove flag to stop any more packets from being sent or received
> + * to/from a VM and ensure all currently queued packets have been
> sent/received
> + * before removing the device.
>   */
>  static void
>  destroy_device(volatile struct virtio_net *virtio_dev)
> @@ -2138,7 +2162,6 @@ destroy_device(volatile struct virtio_net
> *virtio_dev)
>              ovs_mutex_lock(&dev->mutex);
>              virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
>              ovsrcu_set(&dev->virtio_dev, NULL);
> -            netdev_dpdk_txq_map_clear(dev);
>              exists = true;
>              ovs_mutex_unlock(&dev->mutex);
>              break;
> @@ -2166,49 +2189,6 @@ destroy_device(volatile struct virtio_net
> *virtio_dev)
>      }
>  }
>
> -static int
> -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
> -                    int enable)
> -{
> -    struct netdev_dpdk *dev;
> -    bool exists = false;
> -    int qid = queue_id / VIRTIO_QNUM;
> -
> -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
> -        return 0;
> -    }
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> -            ovs_mutex_lock(&dev->mutex);
> -            if (enable) {
> -                dev->tx_q[qid].map = qid;
> -            } else {
> -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
> -            }
> -            netdev_dpdk_remap_txqs(dev);
> -            exists = true;
> -            ovs_mutex_unlock(&dev->mutex);
> -            break;
> -        }
> -    }
> -    ovs_mutex_unlock(&dpdk_mutex);
> -
> -    if (exists) {
> -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
> -                  PRIu64" changed to \'%s\'", queue_id, qid,
> -                  virtio_dev->ifname, virtio_dev->device_fh,
> -                  (enable == 1) ? "enabled" : "disabled");
> -    } else {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
> virtio_dev->ifname,
> -                  virtio_dev->device_fh);
> -        return -1;
> -    }
> -
> -    return 0;
> -}
> -
>  struct virtio_net *
>  netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
>  {
> @@ -2216,18 +2196,17 @@ netdev_dpdk_get_virtio(const struct netdev_dpdk
> *dev)
>  }
>
>  /*
> - * These callbacks allow virtio-net devices to be added to vhost ports
> when
> - * configuration has been fully complete.
> + * These callbacks allow virtio-net devices to be added to vhost cuse
> ports
> + * when configuration has been fully complete.
>   */
>  static const struct virtio_net_device_ops virtio_net_device_ops =
>  {
>      .new_device =  new_device,
>      .destroy_device = destroy_device,
> -    .vring_state_changed = vring_state_changed
>  };
>
>  static void *
> -start_vhost_loop(void *dummy OVS_UNUSED)
> +start_vhost_cuse_loop(void *dummy OVS_UNUSED)
>  {
>       pthread_detach(pthread_self());
>       /* Put the cuse thread into quiescent state. */
> @@ -2237,19 +2216,7 @@ start_vhost_loop(void *dummy OVS_UNUSED)
>  }
>
>  static int
> -dpdk_vhost_class_init(void)
> -{
> -    rte_vhost_driver_callback_register(&virtio_net_device_ops);
> -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
> -                            | 1ULL << VIRTIO_NET_F_CSUM);
> -
> -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
> -    return 0;
> -}
> -
> -static int
> -dpdk_vhost_cuse_class_init(void)
> +netdev_dpdk_vhost_cuse_class_init(void)
>  {
>      int err = -1;
>
> @@ -2265,14 +2232,12 @@ dpdk_vhost_cuse_class_init(void)
>          return -1;
>      }
>
> -    dpdk_vhost_class_init();
> -    return 0;
> -}
> +    rte_vhost_driver_callback_register(&virtio_net_device_ops);
> +    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> +                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
> +                            | 1ULL << VIRTIO_NET_F_CSUM);
> +    ovs_thread_create("vhost_cuse_thread", start_vhost_cuse_loop, NULL);
>
> -static int
> -dpdk_vhost_user_class_init(void)
> -{
> -    dpdk_vhost_class_init();
>      return 0;
>  }
>
> @@ -2859,30 +2824,30 @@ static const struct netdev_class dpdk_ring_class =
>  static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class =
>      NETDEV_DPDK_CLASS(
>          "dpdkvhostcuse",
> -        dpdk_vhost_cuse_class_init,
> +        netdev_dpdk_vhost_cuse_class_init,
>          netdev_dpdk_vhost_cuse_construct,
> -        netdev_dpdk_vhost_destruct,
> +        netdev_dpdk_vhost_cuse_destruct,
>          netdev_dpdk_vhost_cuse_set_multiq,
> -        netdev_dpdk_vhost_send,
> -        netdev_dpdk_vhost_get_carrier,
> -        netdev_dpdk_vhost_get_stats,
> +        netdev_dpdk_vhost_cuse_send,
> +        netdev_dpdk_vhost_cuse_get_carrier,
> +        netdev_dpdk_vhost_cuse_get_stats,
>          NULL,
>          NULL,
> -        netdev_dpdk_vhost_rxq_recv);
> +        netdev_dpdk_vhost_cuse_rxq_recv);
>
>  static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class =
>      NETDEV_DPDK_CLASS(
>          "dpdkvhostuser",
> -        dpdk_vhost_user_class_init,
> -        netdev_dpdk_vhost_user_construct,
> -        netdev_dpdk_vhost_destruct,
> -        netdev_dpdk_vhost_set_multiq,
> -        netdev_dpdk_vhost_send,
> -        netdev_dpdk_vhost_get_carrier,
> -        netdev_dpdk_vhost_get_stats,
> -        NULL,
>          NULL,
> -        netdev_dpdk_vhost_rxq_recv);
> +        netdev_dpdk_vhost_user_construct,
> +        netdev_dpdk_destruct,
> +        netdev_dpdk_set_multiq,
> +        netdev_dpdk_vhost_user_send,
> +        netdev_dpdk_get_carrier,
> +        netdev_dpdk_get_stats,
> +        netdev_dpdk_get_features,
> +        netdev_dpdk_get_status,
> +        netdev_dpdk_vhost_user_rxq_recv);
>
>  void
>  netdev_dpdk_register(void)
> --
> 2.4.3
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>



More information about the dev mailing list