[ovs-dev] [dpdk-latest PATCH v1] netdev-dpdk: Upgrade to dpdk v18.11

Ophir Munk ophirmu at mellanox.com
Wed Nov 14 22:08:07 UTC 2018


1. Enable compilation and linkage with dpdk 18.11.x

2. Update references to DPDK version 18.11 in Documentation and in
travis linux-build script

3. Replace deprecated functions calls
    - rte_eth_dev_attach
    - rte_eth_dev_detach
with their respective new calls
    - rte_dev_probe
    - rte_dev_remove

4. Dpdk port representors were introduced in dpdk 18.xx.
Commits examples are listed in [1]. dpdk representors
documentation appears in [2]. A sample configuration which uses two
representors ports (the output of "ovs-vsctl show" command) is
shown in [3].

OVS remains backward compatible in supporting dpdk legacy PCI
ports which do not include representors.

5. Starting from dpdk 18.xx there is no more one to one relationship
between an rte device (e.g. PCI bus) and an eth device (seen as dpdk
port ids by OVS).
The relationship became one (rte device) to many (eth devices).
For example in [3] there are two devices (representors) using
the same PCI address 0000:08:00.0.
This commit handles the new one to many relationship. For example,
when one of the devices representors is closed - the PCI bus cannot
be detached until the other device is closed as well.

6. HW offload capability DEV_RX_OFFLOAD_CRC_STRIP was replaced with
DEV_RX_OFFLOAD_KEEP_CRC.

[1]
e0cb96204b71 ("net/i40e: add support for representor ports")
cf80ba6e2038 ("net/ixgbe: add support for representor ports")
26c08b979d26 ("net/mlx5: add port representor awareness")

[2]
doc/guides/prog_guide/switch_representation.rst

[3]
Bridge "ovs_br0"
    Port "ovs_br0"
        Interface "ovs_br0"
            type: internal
    Port "port-rep3"
        Interface "port-rep3"
            type: dpdk
            options: {dpdk-devargs="0000:08:00.0,representor=[3]"}
    Port "port-rep5"
        Interface "port-rep5"
            type: dpdk
            options: {dpdk-devargs="0000:08:00.0,representor=[5]"}
    ovs_version: "2.10.90"

Signed-off-by: Ophir Munk <ophirmu at mellanox.com>
---
v1:
Initial version (resent with [dpdk-latest PATCH v1])

 .travis/linux-build.sh                   |   2 +-
 Documentation/intro/install/dpdk.rst     |  12 ++--
 Documentation/topics/dpdk/vhost-user.rst |   6 +-
 lib/netdev-dpdk.c                        | 111 ++++++++++++++++++++++---------
 4 files changed, 91 insertions(+), 40 deletions(-)

diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
index 4c9e952..1d3a955 100755
--- a/.travis/linux-build.sh
+++ b/.travis/linux-build.sh
@@ -83,7 +83,7 @@ fi
 
 if [ "$DPDK" ]; then
     if [ -z "$DPDK_VER" ]; then
-        DPDK_VER="18.08"
+        DPDK_VER="18.11"
     fi
     install_dpdk $DPDK_VER
     if [ "$CC" = "clang" ]; then
diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index bab3560..8815427 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
 In addition to the requirements described in :doc:`general`, building Open
 vSwitch with DPDK will require the following:
 
-- DPDK 18.08.0
+- DPDK 18.11.0
 
 - A `DPDK supported NIC`_
 
@@ -71,9 +71,9 @@ Install DPDK
 #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
 
        $ cd /usr/src/
-       $ wget http://fast.dpdk.org/rel/dpdk-18.08.tar.xz
+       $ wget http://fast.dpdk.org/rel/dpdk-18.11.tar.xz
        $ tar xf dpdk-18.08.tar.xz
-       $ export DPDK_DIR=/usr/src/dpdk-stable-18.08
+       $ export DPDK_DIR=/usr/src/dpdk-stable-18.11
        $ cd $DPDK_DIR
 
 #. (Optional) Configure DPDK as a shared library
@@ -283,9 +283,9 @@ with either the ovs-vswitchd logs, or by running either of the commands::
 
   $ ovs-vswitchd --version
   ovs-vswitchd (Open vSwitch) 2.9.0
-  DPDK 18.08.0
+  DPDK 18.11.0
   $ ovs-vsctl get Open_vSwitch . dpdk_version
-  "DPDK 18.08.0"
+  "DPDK 18.11.0"
 
 At this point you can use ovs-vsctl to set up bridges and other Open vSwitch
 features. Seeing as we've configured the DPDK datapath, we will use DPDK-type
@@ -672,7 +672,7 @@ Limitations
   The latest list of validated firmware versions can be found in the `DPDK
   release notes`_.
 
-.. _DPDK release notes: http://dpdk.org/doc/guides/rel_notes/release_18_08.html
+.. _DPDK release notes: http://dpdk.org/doc/guides/rel_notes/release_18_11.html
 
 - Upper bound MTU: DPDK device drivers differ in how the L2 frame for a
   given MTU value is calculated e.g. i40e driver includes 2 x vlan headers in
diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst
index 062605c..1f6389d 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -320,9 +320,9 @@ To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or
 DPDK sources to VM and build DPDK::
 
     $ cd /root/dpdk/
-    $ wget http://fast.dpdk.org/rel/dpdk-18.08.tar.xz
-    $ tar xf dpdk-18.08.tar.xz
-    $ export DPDK_DIR=/root/dpdk/dpdk-stable-18.08
+    $ wget http://fast.dpdk.org/rel/dpdk-18.11.tar.xz
+    $ tar xf dpdk-18.11.tar.xz
+    $ export DPDK_DIR=/root/dpdk/dpdk-stable-18.11
     $ export DPDK_TARGET=x86_64-native-linuxapp-gcc
     $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
     $ cd $DPDK_DIR
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1480bf8..8432426 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -929,8 +929,9 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM;
     }
 
-    if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
-        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
+    if (!(dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) &&
+         (info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)) {
+        conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
     }
 
     /* Limit configured rss hash functions to only those supported
@@ -1215,6 +1216,23 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
     }
 }
 
+/* get the number of OVS interfaces which have the same DPDK
+ * rte device (e.g. same pci bus address). */
+static int
+netdev_dpdk_get_num_ports(struct rte_device *device)
+    OVS_REQUIRES(dpdk_mutex)
+{
+    struct netdev_dpdk *dev;
+    int count;
+
+    count = 0;
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (rte_eth_devices[dev->port_id].device == device)
+            count++;
+    }
+    return count;
+}
+
 static int
 vhost_common_construct(struct netdev *netdev)
     OVS_REQUIRES(dpdk_mutex)
@@ -1350,19 +1368,22 @@ static void
 netdev_dpdk_destruct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    char devname[RTE_ETH_NAME_MAX_LEN];
+    struct rte_device *rte_dev;
 
     ovs_mutex_lock(&dpdk_mutex);
 
     rte_eth_dev_stop(dev->port_id);
     dev->started = false;
-
     if (dev->attached) {
+        /* Remove the port eth device */
         rte_eth_dev_close(dev->port_id);
-        if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
-            VLOG_ERR("Device '%s' can not be detached", dev->devargs);
-        } else {
-            VLOG_INFO("Device '%s' has been detached", devname);
+        VLOG_INFO("Device '%s' has been removed", dev->devargs);
+        /* if this is the last port_id using this rte device
+         * remove this rte device and all its eth devices */
+        rte_dev = rte_eth_devices[dev->port_id].device;
+        if (netdev_dpdk_get_num_ports(rte_dev) == 1) {
+            if (rte_dev_remove(rte_dev) < 0)
+                VLOG_ERR("Device '%s' can not be detached", dev->devargs);
         }
     }
 
@@ -1628,8 +1649,26 @@ netdev_dpdk_get_port_by_mac(const char *mac_str)
     return DPDK_ETH_PORT_ID_INVALID;
 }
 
+/* return the first DPDK port_id matching the devargs pattern */
+static dpdk_port_t
+netdev_dpdk_get_port_by_devargs(const char *devargs)
+{
+    struct rte_dev_iterator iterator;
+    dpdk_port_t port_id;
+
+    if (rte_dev_probe(devargs)) {
+        port_id = DPDK_ETH_PORT_ID_INVALID;
+    } else {
+        RTE_ETH_FOREACH_MATCHING_DEV(port_id, devargs, &iterator) {
+            break;
+        }
+    }
+    return port_id;
+}
+
 /*
- * Normally, a PCI id is enough for identifying a specific DPDK port.
+ * Normally, a PCI id (optionally followed by a representor number)
+ * is enough for identifying a specific DPDK port.
  * However, for some NICs having multiple ports sharing the same PCI
  * id, using PCI id won't work then.
  *
@@ -1642,28 +1681,31 @@ static dpdk_port_t
 netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
                             const char *devargs, char **errp)
 {
-    char *name;
     dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
 
     if (strncmp(devargs, "class=eth,mac=", 14) == 0) {
         new_port_id = netdev_dpdk_get_port_by_mac(&devargs[14]);
     } else {
-        name = xmemdup0(devargs, strcspn(devargs, ","));
-        if (rte_eth_dev_get_port_by_name(name, &new_port_id)
-                || !rte_eth_dev_is_valid_port(new_port_id)) {
-            /* Device not found in DPDK, attempt to attach it */
-            if (!rte_eth_dev_attach(devargs, &new_port_id)) {
-                /* Attach successful */
-                dev->attached = true;
-                VLOG_INFO("Device '%s' attached to DPDK", devargs);
-            } else {
-                /* Attach unsuccessful */
+        new_port_id = netdev_dpdk_get_port_by_devargs(devargs);
+        if (!rte_eth_dev_is_valid_port(new_port_id)) {
+            new_port_id = DPDK_ETH_PORT_ID_INVALID;
+        } else {
+            struct netdev_dpdk *dup_dev;
+
+            dup_dev = netdev_dpdk_lookup_by_port_id(new_port_id);
+            if (dup_dev) {
+                VLOG_WARN_BUF(errp, "'%s' is trying to use device '%s' "
+                               "which is already in use by '%s'",
+                          netdev_get_name(&dev->up), devargs,
+                          netdev_get_name(&dup_dev->up));
                 new_port_id = DPDK_ETH_PORT_ID_INVALID;
+            } else {
+                /* device successfully found */
+                dev->attached = true;
+                VLOG_INFO("Device '%s' attached to DPDK port %d", devargs, new_port_id);
             }
         }
-        free(name);
     }
-
     if (new_port_id == DPDK_ETH_PORT_ID_INVALID) {
         VLOG_WARN_BUF(errp, "Error attaching device '%s' to DPDK", devargs);
     }
@@ -3208,15 +3250,18 @@ static void
 netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                    const char *argv[], void *aux OVS_UNUSED)
 {
-    int ret;
     char *response;
     dpdk_port_t port_id;
-    char devname[RTE_ETH_NAME_MAX_LEN];
     struct netdev_dpdk *dev;
+    struct rte_device *rte_dev;
+    struct rte_dev_iterator iterator;
 
     ovs_mutex_lock(&dpdk_mutex);
 
-    if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {
+    RTE_ETH_FOREACH_MATCHING_DEV(port_id, argv[1], &iterator) {
+        break;
+    }
+    if (port_id == DPDK_ETH_PORT_ID_INVALID) {
         response = xasprintf("Device '%s' not found in DPDK", argv[1]);
         goto error;
     }
@@ -3229,15 +3274,21 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
         goto error;
     }
 
-    rte_eth_dev_close(port_id);
+    rte_dev = rte_eth_devices[port_id].device;
+    if (netdev_dpdk_get_num_ports(rte_dev)) {
+        response = xasprintf("Device '%s' is being shared with other "
+                             "interfaces. Remove them before detaching.",
+                             argv[1]);
+        goto error;
+    }
 
-    ret = rte_eth_dev_detach(port_id, devname);
-    if (ret < 0) {
-        response = xasprintf("Device '%s' can not be detached", argv[1]);
+    rte_eth_dev_close(port_id);
+    if (rte_dev_remove(rte_dev) < 0) {
+        response = xasprintf("Device '%s' can not be removed", argv[1]);
         goto error;
     }
 
-    response = xasprintf("Device '%s' has been detached", argv[1]);
+    response = xasprintf("All devices shared with device '%s' have been detached", argv[1]);
 
     ovs_mutex_unlock(&dpdk_mutex);
     unixctl_command_reply(conn, response);
-- 
1.8.3.1



More information about the dev mailing list