[ovs-dev] [RFC PATCH v2 1/1] netdev-dpdk: Add support for DPDK 16.07

Daniele Di Proietto diproiettod at ovn.org
Fri Jul 15 23:29:18 UTC 2016


Hi Ciara,

thanks for the patch.

It mostly looks good to me, except a few comments inline

Thanks,

Daniele

2016-07-12 2:11 GMT-07:00 Ciara Loftus <ciara.loftus at intel.com>:

> This commit introduces support for DPDK 16.07 and consequently breaks
> compatibility with DPDK 16.04.
>
> DPDK 16.07 introduces some changes to various APIs. These have been
> updated in OVS, including:
> * xstats API: changes to structure of xstats
> * vhost API:  replace virtio-net references with 'vid'
>
> Signed-off-by: Ciara Loftus <ciara.loftus at intel.com>
>
> ---
>  .travis/linux-build.sh   |   2 +-
>  INSTALL.DPDK-ADVANCED.md |   8 +-
>  INSTALL.DPDK.md          |  20 ++--
>  lib/netdev-dpdk.c        | 243
> +++++++++++++++++++++++------------------------
>  4 files changed, 135 insertions(+), 138 deletions(-)
>
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> index 065de39..1b3d43d 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -68,7 +68,7 @@ fi
>
>  if [ "$DPDK" ]; then
>      if [ -z "$DPDK_VER" ]; then
> -        DPDK_VER="16.04"
> +        DPDK_VER="16.07"
>

I wanted to test it on travis, but the files are not there yet :)


>      fi
>      install_dpdk $DPDK_VER
>      if [ "$CC" = "clang" ]; then
> diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
> index 9ae536d..ec1de29 100644
> --- a/INSTALL.DPDK-ADVANCED.md
> +++ b/INSTALL.DPDK-ADVANCED.md
> @@ -43,7 +43,7 @@ for DPDK and OVS.
>      For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem-linuxapp-gcc`
>
>      ```
> -    export DPDK_DIR=/usr/src/dpdk-16.04
> +    export DPDK_DIR=/usr/src/dpdk-16.07
>      export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
>      make install T=$DPDK_TARGET DESTDIR=install
>      ```
> @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
>         cd /usr/src/cmdline_generator
>         wget
> https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/cmdline_generator.c
>         wget
> https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/Makefile
> -       export RTE_SDK=/usr/src/dpdk-16.04
> +       export RTE_SDK=/usr/src/dpdk-16.07
>         export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc
>         make
>         ./build/cmdline_generator -m -p dpdkr0 XXX
> @@ -363,7 +363,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
>         mount -t hugetlbfs nodev /dev/hugepages (if not already mounted)
>
>         # Build the DPDK ring application in the VM
> -       export RTE_SDK=/root/dpdk-16.04
> +       export RTE_SDK=/root/dpdk-16.07
>         export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc
>         make
>
> @@ -374,7 +374,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
>
>  ## <a name="vhost"></a> 6. Vhost Walkthrough
>
> -DPDK 16.04 supports two types of vhost:
> +DPDK 16.07 supports two types of vhost:
>
>  1. vhost-user - enabled default
>
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 5407794..9022ad8 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -21,7 +21,7 @@ The DPDK support of Open vSwitch is considered
> 'experimental'.
>
>  ### Prerequisites
>
> -* Required: DPDK 16.04, libnuma
> +* Required: DPDK 16.07, libnuma
>  * Hardware: [DPDK Supported NICs] when physical ports in use
>
>  ## <a name="build"></a> 2. Building and Installation
> @@ -42,10 +42,10 @@ advanced install guide [INSTALL.DPDK-ADVANCED.md]
>
>       ```
>       cd /usr/src/
> -     wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip
> -     unzip dpdk-16.04.zip
> +     wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip
> +     unzip dpdk-16.07.zip
>
> -     export DPDK_DIR=/usr/src/dpdk-16.04
> +     export DPDK_DIR=/usr/src/dpdk-16.07
>       cd $DPDK_DIR
>       ```
>
> @@ -329,9 +329,9 @@ can be found in [Vhost Walkthrough].
>
>    ```
>    cd /root/dpdk/
> -  wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip
> -  unzip dpdk-16.04.zip
> -  export DPDK_DIR=/root/dpdk/dpdk-16.04
> +  wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip
> +  unzip dpdk-16.07.zip
> +  export DPDK_DIR=/root/dpdk/dpdk-16.07
>    export DPDK_TARGET=x86_64-native-linuxapp-gcc
>    export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
>    cd $DPDK_DIR
> @@ -487,7 +487,7 @@ can be found in [Vhost Walkthrough].
>             </disk>
>             <disk type='dir' device='disk'>
>               <driver name='qemu' type='fat'/>
> -             <source dir='/usr/src/dpdk-16.04'/>
> +             <source dir='/usr/src/dpdk-16.07'/>
>               <target dev='vdb' bus='virtio'/>
>               <readonly/>
>             </disk>
> @@ -557,9 +557,9 @@ can be found in [Vhost Walkthrough].
>      DPDK. It is recommended that users update Network Interface firmware
> to
>      match what has been validated for the DPDK release.
>
> -    For DPDK 16.04, the list of validated firmware versions can be found
> at:
> +    For DPDK 16.07, the list of validated firmware versions can be found
> at:
>
> -    http://dpdk.org/doc/guides/rel_notes/release_16_04.html
> +    http://dpdk.org/doc/guides/rel_notes/release_16.07.html
>
>
>  Bug Reporting:
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 85b18fd..9cf0b0c 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -356,8 +356,8 @@ struct netdev_dpdk {
>       * always true.  */
>      bool txq_needs_locking;
>
> -    /* virtio-net structure for vhost device */
> -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;
> +    /* virtio identifier for vhost devices */
> +    int vid;
>

>      /* Identifier used to distinguish vhost devices from each other */
>      char vhost_id[PATH_MAX];
> @@ -393,8 +393,6 @@ static bool dpdk_thread_is_pmd(void);
>
>  static int netdev_dpdk_construct(struct netdev *);
>
> -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
> -
>  struct ingress_policer *
>  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
>
> @@ -749,6 +747,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      dev->flags = 0;
>      dev->mtu = ETHER_MTU;
>      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> +    dev->vid = -1;
>
>      buf_size = dpdk_buf_size(dev->mtu);
>      dev->dpdk_mp = dpdk_mp_get(dev->socket_id,
> FRAME_LEN_TO_MTU(buf_size));
> @@ -846,6 +845,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev)
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      const char *name = netdev->name;
>      int err;
> +    uint64_t flags = 0;
>
>      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket
> in
>       * the file system. '/' or '\' would traverse directories, so they're
> not
> @@ -868,7 +868,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev)
>      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
>               vhost_sock_dir, name);
>
> -    err = rte_vhost_driver_register(dev->vhost_id);
> +    err = rte_vhost_driver_register(dev->vhost_id, flags);
>      if (err) {
>          VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
>                   dev->vhost_id);
> @@ -923,13 +923,19 @@ netdev_dpdk_destruct(struct netdev *netdev)
>      ovs_mutex_unlock(&dpdk_mutex);
>  }
>
> +static bool
> +is_vhost_running(int vid)
> +{
> +    return vid >= 0;
> +}
> +
>  static void
>  netdev_dpdk_vhost_destruct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>      /* Guest becomes an orphan if still attached. */
> -    if (netdev_dpdk_get_virtio(dev) != NULL) {
> +    if (is_vhost_running(dev->vid)) {
>          VLOG_ERR("Removing port '%s' while vhost device still attached.",
>                   netdev->name);
>          VLOG_ERR("To restore connectivity after re-adding of port, VM on
> socket"
> @@ -1143,12 +1149,6 @@ ingress_policer_run(struct ingress_policer
> *policer, struct rte_mbuf **pkts,
>      return cnt;
>  }
>
> -static bool
> -is_vhost_running(struct virtio_net *virtio_dev)
> -{
> -    return (virtio_dev != NULL && (virtio_dev->flags &
> VIRTIO_DEV_RUNNING));
> -}
> -
>  static inline void
>  netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
>                                            unsigned int packet_size)
> @@ -1218,18 +1218,17 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>                             struct dp_packet **packets, int *c)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      int qid = rxq->queue_id;
>      struct ingress_policer *policer =
> netdev_dpdk_get_ingress_policer(dev);
>      uint16_t nb_rx = 0;
>      uint16_t dropped = 0;
>
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
> +    if (OVS_UNLIKELY(!is_vhost_running(dev->vid)
>                       || !(dev->flags & NETDEV_UP))) {
>          return EAGAIN;
>      }
>
> -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +
> VIRTIO_TXQ,
> +    nb_rx = rte_vhost_dequeue_burst(dev->vid, qid * VIRTIO_QNUM +
> VIRTIO_TXQ,
>                                      dev->dpdk_mp->mp,
>                                      (struct rte_mbuf **)packets,
>                                      NETDEV_MAX_BURST);
> @@ -1326,7 +1325,6 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>                           bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
>      unsigned int total_pkts = cnt;
>      unsigned int qos_pkts = cnt;
> @@ -1334,7 +1332,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>
>      qid = dev->tx_q[qid % netdev->n_txq].map;
>
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
> +    if (OVS_UNLIKELY(!is_vhost_running(dev->vid) || qid < 0
>                       || !(dev->flags & NETDEV_UP))) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped+= cnt;
> @@ -1352,7 +1350,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>          int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
>          unsigned int tx_pkts;
>
> -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
> +        tx_pkts = rte_vhost_enqueue_burst(dev->vid, vhost_qid,
>                                            cur_pkts, cnt);
>          if (OVS_LIKELY(tx_pkts)) {
>              /* Packets have been sent.*/
> @@ -1707,60 +1705,50 @@ netdev_dpdk_vhost_get_stats(const struct netdev
> *netdev,
>
>  static void
>  netdev_dpdk_convert_xstats(struct netdev_stats *stats,
> -                           const struct rte_eth_xstats *xstats,
> +                           const struct rte_eth_xstat *xstats,
> +                           const struct rte_eth_xstat_name *names,
>                             const unsigned int size)
>  {
> -    /* XXX Current implementation is simple search through an array
> -     * to find hardcoded counter names. In future DPDK release (TBD)
> -     * XSTATS API will change so each counter will be represented by
> -     * unique ID instead of String. */
> -
>      for (unsigned int i = 0; i < size; i++) {
> -        if (strcmp(XSTAT_RX_64_PACKETS, xstats[i].name) == 0) {
> +        if (strcmp(XSTAT_RX_64_PACKETS, names[i].name) == 0) {
>              stats->rx_1_to_64_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_65_to_127_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_128_to_255_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_256_to_511_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_512_to_1023_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS, names[i].name)
> == 0) {
>              stats->rx_1024_to_1522_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_1523_to_max_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_64_PACKETS, xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_TX_64_PACKETS, names[i].name) == 0) {
>              stats->tx_1_to_64_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_65_to_127_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_128_to_255_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_256_to_511_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_512_to_1023_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS, names[i].name)
> == 0) {
>              stats->tx_1024_to_1522_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS,
> -                          xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_1523_to_max_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_multicast_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, names[i].name) ==
> 0) {
>              stats->rx_broadcast_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, names[i].name) ==
> 0) {
>              stats->tx_broadcast_packets = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, names[i].name) ==
> 0) {
>              stats->rx_undersized_errors = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, xstats[i].name) ==
> 0) {
> +        } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, names[i].name) ==
> 0) {
>              stats->rx_fragmented_errors = xstats[i].value;
> -        } else if (strcmp(XSTAT_RX_JABBER_ERRORS, xstats[i].name) == 0) {
> +        } else if (strcmp(XSTAT_RX_JABBER_ERRORS, names[i].name) == 0) {
>              stats->rx_jabber_errors = xstats[i].value;
>          }
>      }
> @@ -1776,7 +1764,8 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
>      netdev_dpdk_get_carrier(netdev, &gg);
>      ovs_mutex_lock(&dev->mutex);
>
> -    struct rte_eth_xstats *rte_xstats;
> +    struct rte_eth_xstat *rte_xstats;
> +    struct rte_eth_xstat_name *rte_xstats_names;
>      int rte_xstats_len, rte_xstats_ret;
>
>      if (rte_eth_stats_get(dev->port_id, &rte_stats)) {
> @@ -1785,20 +1774,51 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
>          return EPROTO;
>      }
>
> -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> -    if (rte_xstats_len > 0) {
> -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) *
> rte_xstats_len);
> -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> -                                            rte_xstats_len);
> -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> -            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
> -        }
> +    /* Get length of statistics */
> +    rte_xstats_len = rte_eth_xstats_get_names(dev->port_id, NULL, 0);
> +    if (rte_xstats_len < 0) {
> +        VLOG_WARN("Cannot get XSTATS values for port: %i", dev->port_id);
> +        goto out;
> +    }
> +    /* Reserve memory for xstats names */
> +    rte_xstats_names = dpdk_rte_mzalloc(sizeof(*rte_xstats_names)
> +                                        * rte_xstats_len);
> +    if (rte_xstats_names == NULL) {
>

Minor: how about !rte_xstats_names?


> +        VLOG_WARN("Cannot allocate memory for XSTATS names for port %i",
> +                  dev->port_id);
> +        rte_free(rte_xstats_names);
>

This rte_free seems unnecessary


> +        goto out;
> +    }
> +    /* Reserve memory for xstats values */
> +    rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
> +    if (rte_xstats == NULL) {
>

Minor: how about !rte_xstats?


> +        VLOG_WARN("Cannot allocate memory for XSTATS values for port %i",
> +                  dev->port_id);
>          rte_free(rte_xstats);
>

This rte_free seems unnecessary.


> +        goto out;
>

I think here we would leak rte_xstats_names.


> +    }
> +    /* Retreive xstats names */
> +    if (rte_xstats_len != rte_eth_xstats_get_names(dev->port_id,
> +                                       rte_xstats_names, rte_xstats_len))
> {
> +        VLOG_WARN("Cannot get XSTATS names for port: %i.", dev->port_id);
> +        rte_free(rte_xstats);
> +        rte_free(rte_xstats_names);
> +        goto out;
> +    }
> +    /* Retreive xstats values */
> +    memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> +    rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> +                                        rte_xstats_len);
> +    if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> +        netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_names,
> +                                   rte_xstats_len);
>

Who frees rte_xstats_names and rte_xstats?


>      } else {
> -        VLOG_WARN("Can't get XSTATS counters for port: %i.",
> dev->port_id);
> +        VLOG_WARN("Cannot get XSTATS values for port: %i.", dev->port_id);
> +        rte_free(rte_xstats);
> +        rte_free(rte_xstats_names);

     }
>
> +out:
>

Perhaps it's easier to always free rte_xstats and rte_xstats_names here.

Also, is there a reason not to use xcalloc() and free(), instead?
This is not the fastpath, as far as I understand there's no reason for it
to be in hugepages.

     stats->rx_packets = rte_stats.ipackets;
>      stats->tx_packets = rte_stats.opackets;
>      stats->rx_bytes = rte_stats.ibytes;
> @@ -1806,7 +1826,6 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
>      /* DPDK counts imissed as errors, but count them here as dropped
> instead */
>      stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
>      stats->tx_errors = rte_stats.oerrors;
> -    stats->multicast = rte_stats.imcasts;
>
>      rte_spinlock_lock(&dev->stats_lock);
>      stats->tx_dropped = dev->stats.tx_dropped;
> @@ -1973,11 +1992,10 @@ static int
>  netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>
>      ovs_mutex_lock(&dev->mutex);
>
> -    if (is_vhost_running(virtio_dev)) {
> +    if (is_vhost_running(dev->vid)) {
>          *carrier = 1;
>      } else {
>          *carrier = 0;
> @@ -2045,10 +2063,9 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
>          /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and
> vhost is
>           * running then change netdev's change_seq to trigger link state
>           * update. */
> -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>
>          if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
> -            && is_vhost_running(virtio_dev)) {
> +            && is_vhost_running(dev->vid)){
>

Minor: space here.


>              netdev_change_seq_changed(&dev->up);
>
>              /* Clear statistics if device is getting up. */
> @@ -2176,15 +2193,15 @@ netdev_dpdk_set_admin_state(struct unixctl_conn
> *conn, int argc,
>   * Set virtqueue flags so that we do not receive interrupts.
>   */
>  static void
> -set_irq_status(struct virtio_net *virtio_dev)
> +set_irq_status(int vid)
>  {
>      uint32_t i;
>      uint64_t idx;
>
> -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
> +    for (i = 0; i < rte_vhost_get_queue_num(vid); i++) {
>          idx = i * VIRTIO_QNUM;
> -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ,
> 0);
> -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ,
> 0);
> +        rte_vhost_enable_guest_notification(vid, idx + VIRTIO_RXQ, 0);
> +        rte_vhost_enable_guest_notification(vid, idx + VIRTIO_TXQ, 0);
>      }
>  }
>
> @@ -2233,26 +2250,27 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
>   * A new virtio-net device is added to a vhost port.
>   */
>  static int
> -new_device(struct virtio_net *virtio_dev)
> +new_device(int vid)
>  {
>      struct netdev_dpdk *dev;
>      bool exists = false;
>      int newnode = 0;
> -    long err = 0;
> +    char ifname[PATH_MAX];
> +
> +    rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
>      ovs_mutex_lock(&dpdk_mutex);
>      /* Add device to the vhost port with the same name as that passed
> down. */
>      LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> -            uint32_t qp_num = virtio_dev->virt_qp_nb;
> +        if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) {
> +            uint32_t qp_num = rte_vhost_get_queue_num(vid);
>
>              ovs_mutex_lock(&dev->mutex);
>              /* Get NUMA information */
> -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
> -                                MPOL_F_NODE | MPOL_F_ADDR);
> -            if (err) {
> -                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
> -                        virtio_dev->ifname);
> +            newnode = rte_vhost_get_numa_node(vid);
> +            if ((newnode != -1) && (newnode != dev->socket_id)) {
> +                dev->requested_socket_id = newnode;
> +            } else {
>

Great, it looks like we don't need libnuma anymore.

I guess we can remove #include <numaif.h>.

I'm not sure if we can remove the build dependency, since if DPDK depends
on it we have to pass it to the linker.
Perhaps we can make it optional? In any case, this can be addressed by a
separate patch


>                  newnode = dev->socket_id;
>              }
>
> @@ -2261,11 +2279,11 @@ new_device(struct virtio_net *virtio_dev)
>              dev->requested_n_txq = qp_num;
>              netdev_request_reconfigure(&dev->up);
>
> -            ovsrcu_set(&dev->virtio_dev, virtio_dev);
> +            dev->vid = vid;
>              exists = true;
>
>              /* Disable notifications. */
> -            set_irq_status(virtio_dev);
> +            set_irq_status(dev->vid);
>              netdev_change_seq_changed(&dev->up);
>              ovs_mutex_unlock(&dev->mutex);
>              break;
> @@ -2274,14 +2292,14 @@ new_device(struct virtio_net *virtio_dev)
>      ovs_mutex_unlock(&dpdk_mutex);
>
>      if (!exists) {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
> -                  "found", virtio_dev->ifname, virtio_dev->device_fh);
> +        VLOG_INFO("vHost Device '%s' can't be added - name not found",
> ifname);
>
>          return -1;
>      }
>
> -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node
> %i",
> -              virtio_dev->ifname, virtio_dev->device_fh, newnode);
> +    VLOG_INFO("vHost Device '%s' has been added on numa node %i",
> +              ifname, newnode);
> +
>      return 0;
>  }
>
> @@ -2304,18 +2322,20 @@ netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
>   *  the device.
>   */
>  static void
> -destroy_device(volatile struct virtio_net *virtio_dev)
> +destroy_device(int vid)
>  {
>      struct netdev_dpdk *dev;
>      bool exists = false;
> +    char ifname[PATH_MAX];
> +
> +    rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
>      ovs_mutex_lock(&dpdk_mutex);
>      LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
> +        if (dev->vid == vid) {
>
>              ovs_mutex_lock(&dev->mutex);
> -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
> -            ovsrcu_set(&dev->virtio_dev, NULL);
> +            dev->vid = -1;
>              /* Clear tx/rx queue settings. */
>              netdev_dpdk_txq_map_clear(dev);
>              dev->requested_n_rxq = NR_QUEUE;
> @@ -2332,31 +2352,21 @@ destroy_device(volatile struct virtio_net
> *virtio_dev)
>      ovs_mutex_unlock(&dpdk_mutex);
>
>      if (exists == true) {
> -        /*
> -         * Wait for other threads to quiesce after setting the
> 'virtio_dev'
> -         * to NULL, before returning.
> -         */
> -        ovsrcu_synchronize();
> -        /*
> -         * As call to ovsrcu_synchronize() will end the quiescent state,
> -         * put thread back into quiescent state before returning.
> -         */
> -        ovsrcu_quiesce_start();
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
> -                  virtio_dev->ifname, virtio_dev->device_fh);
> +        VLOG_INFO("vHost Device '%s' has been removed", ifname);
>      } else {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
> virtio_dev->ifname,
> -                  virtio_dev->device_fh);
> +        VLOG_INFO("vHost Device '%s' not found", ifname);
>      }
>  }
>
>  static int
> -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
> -                    int enable)
> +vring_state_changed(int vid, uint16_t queue_id, int enable)
>  {
>      struct netdev_dpdk *dev;
>      bool exists = false;
>      int qid = queue_id / VIRTIO_QNUM;
> +    char ifname[PATH_MAX];
> +
> +    rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
>      if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
>          return 0;
> @@ -2364,7 +2374,7 @@ vring_state_changed(struct virtio_net *virtio_dev,
> uint16_t queue_id,
>
>      ovs_mutex_lock(&dpdk_mutex);
>      LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> +        if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) {
>              ovs_mutex_lock(&dev->mutex);
>              if (enable) {
>                  dev->tx_q[qid].map = qid;
> @@ -2380,25 +2390,17 @@ vring_state_changed(struct virtio_net *virtio_dev,
> uint16_t queue_id,
>      ovs_mutex_unlock(&dpdk_mutex);
>
>      if (exists) {
> -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
> -                  PRIu64" changed to \'%s\'", queue_id, qid,
> -                  virtio_dev->ifname, virtio_dev->device_fh,
> +        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s'"
> +                  "changed to \'%s\'", queue_id, qid, ifname,
>                    (enable == 1) ? "enabled" : "disabled");
>      } else {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
> virtio_dev->ifname,
> -                  virtio_dev->device_fh);
> +        VLOG_INFO("vHost Device '%s' not found", ifname);
>          return -1;
>      }
>
>      return 0;
>  }
>
> -struct virtio_net *
> -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
> -{
> -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
> -}
> -
>  struct ingress_policer *
>  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
>  {
> @@ -2848,7 +2850,6 @@ static int
>  netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      int err = 0;
>
>      ovs_mutex_lock(&dpdk_mutex);
> @@ -2874,10 +2875,6 @@ netdev_dpdk_vhost_user_reconfigure(struct netdev
> *netdev)
>          }
>      }
>
> -    if (virtio_dev) {
> -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> -    }
> -
>      ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>
> @@ -3342,7 +3339,7 @@ dpdk_init__(const struct smap *ovs_other_config)
>      /* Register CUSE device to handle IOCTLs.
>       * Unless otherwise specified, cuse_dev_name is set to vhost-net.
>       */
> -    err = rte_vhost_driver_register(cuse_dev_name);
> +    err = rte_vhost_driver_register(cuse_dev_name, 0);
>
>      if (err != 0) {
>          VLOG_ERR("CUSE device setup failure.");
> --
> 2.4.3
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>



More information about the dev mailing list