[ovs-dev] [dpdk-hwol PATCH v1] netdev-dpdk: support port representors

Ian Stokes ian.stokes at intel.com
Wed Dec 12 15:40:40 UTC 2018


On 12/11/2018 11:34 PM, Ophir Munk wrote:
> Dpdk port representors were introduced in dpdk 18.xx.
> Prior to representors there was a one-to-one relationship
> between an rte device (e.g. PCI bus) and an eth device (a dpdk
> port id). With representors the relationship becomes one-to-many
> rte device to eth devices.
> For example in [3] there are two devices (representors) using the same
> PCI physical address 0000:08:00.0: "0000:08:00.0,representor=[3]" and
> "0000:08:00.0,representor=[5]".
> This commit handles the new one-to-many relationship. For example,
> when one of the device representors in [3] is closed - the PCI bus
> cannot be detached until the other device representor is closed as
> well. OVS remains backward compatible by supporting dpdk legacy PCI
> ports which do not include representors.
> Dpdk representors related commits are listed in [1]. dpdk representors
> documentation appears in [2]. A sample configuration which uses two
> representors ports (the output of "ovs-vsctl show" command) is
> shown in [3].

Thanks for this Ophir.

I need to complete some more testing on various devices before a full 
review can be completed but here are some first pass comments in the 
mean time that could be addressed for a v2.

Also with support for 18.11 being upstreamed to master (by tomorrow if 
there are no other comments), will this patch be targeting OVS 2.11? If 
so you could target the v2 at master once 18.11 is there rather than 
dpdk-hwol but I'll leave it to you.

Ian
> 
> [1]
> e0cb96204b71 ("net/i40e: add support for representor ports")
> cf80ba6e2038 ("net/ixgbe: add support for representor ports")
> 26c08b979d26 ("net/mlx5: add port representor awareness")
> 
> [2]
> doc/guides/prog_guide/switch_representation.rst
> 
> [3]
> Bridge "ovs_br0"
>      Port "ovs_br0"
>          Interface "ovs_br0"
>              type: internal
>      Port "port-rep3"
>          Interface "port-rep3"
>              type: dpdk
>              options: {dpdk-devargs="0000:08:00.0,representor=[3]"}
>      Port "port-rep5"
>          Interface "port-rep5"
>              type: dpdk
>              options: {dpdk-devargs="0000:08:00.0,representor=[5]"}
>      ovs_version: "2.10.90"
> 
> Signed-off-by: Ophir Munk <ophirmu at mellanox.com>
> ---
> v1:
> 1. rebase on top of Kevin's patch
> dpdk: Update to use DPDK 18.11.[ovs-dev,v7,dpdk-latest,1/1] dpdk: Update to use DPDK 18.11.
> https://patchwork.ozlabs.org/patch/1005535/
> 2. skipping count of sibling ports in case the sibling port state is RTE_ETH_DEV_UNUSED
> 
>   lib/netdev-dpdk.c | 112 ++++++++++++++++++++++++++++++++++++++++--------------
>   1 file changed, 83 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 6b8e05e..30af043 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1216,6 +1216,25 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
>       }
>   }
>   
> +/* get the number of OVS interfaces which have the same DPDK

Minor, can you capitalize the first letter of the comment.

> + * rte device (e.g. same pci bus address). */
> +static int
> +netdev_dpdk_get_num_ports(struct rte_device *device)
> +    OVS_REQUIRES(dpdk_mutex)
> +{
> +    struct netdev_dpdk *dev;
> +    int count;
> +
You could initialize count to 0 above where it is declared to save the 
line below.

> +    count = 0;
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (rte_eth_devices[dev->port_id].device == device
> +        && rte_eth_devices[dev->port_id].state != RTE_ETH_DEV_UNUSED) {
> +            count++;
> +        }
> +    }
> +    return count;
> +}
> +
>   static int
>   vhost_common_construct(struct netdev *netdev)
>       OVS_REQUIRES(dpdk_mutex)
> @@ -1351,20 +1370,23 @@ static void
>   netdev_dpdk_destruct(struct netdev *netdev)
>   {
>       struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct rte_eth_dev_info dev_info;
> +    struct rte_device *rte_dev;
>   
>       ovs_mutex_lock(&dpdk_mutex);
>   
>       rte_eth_dev_stop(dev->port_id);
>       dev->started = false;
> -
>       if (dev->attached) {
> +        /* Remove the port eth device */
>           rte_eth_dev_close(dev->port_id);
> -        rte_eth_dev_info_get(dev->port_id, &dev_info);
> -        if (dev_info.device && !rte_dev_remove(dev_info.device)) {
> -            VLOG_INFO("Device '%s' has been detached", dev->devargs);
> -        } else {
> -            VLOG_ERR("Device '%s' can not be detached", dev->devargs);
> +        VLOG_INFO("Device '%s' has been removed", dev->devargs);
> +        /* if this is the last port_id using this rte device

Minor: capitalize first letter of comment above, period at end of the 
comment.

> +         * remove this rte device and all its eth devices */
> +        rte_dev = rte_eth_devices[dev->port_id].device;
> +        if (netdev_dpdk_get_num_ports(rte_dev) == 1) {
> +            if (rte_dev_remove(rte_dev) < 0) {
> +                VLOG_ERR("Device '%s' can not be detached", dev->devargs);
> +            }
>           }
>       }
>   
> @@ -1630,8 +1652,26 @@ netdev_dpdk_get_port_by_mac(const char *mac_str)
>       return DPDK_ETH_PORT_ID_INVALID;
>   }
>   
> +/* return the first DPDK port_id matching the devargs pattern */
Minor: Capitalize first word and add a period to the end of the comment.

> +static dpdk_port_t
> +netdev_dpdk_get_port_by_devargs(const char *devargs)
> +{
> +    struct rte_dev_iterator iterator;
> +    dpdk_port_t port_id;
> +
> +    if (rte_dev_probe(devargs)) {
> +        port_id = DPDK_ETH_PORT_ID_INVALID;
> +    } else {
> +        RTE_ETH_FOREACH_MATCHING_DEV(port_id, devargs, &iterator) {
> +            break;
Reading the comments for RTE_ETH_FOREACH_MATCHING_DEV

If a break is done before the end of the loop, the function 
rte_eth_iterator_cleanup() must be called.

That doesn't seem to be handled here and I would think needs to be checked?
> +        }
> +    }
> +    return port_id;
> +}
> +
>   /*
> - * Normally, a PCI id is enough for identifying a specific DPDK port.
> + * Normally, a PCI id (optionally followed by a representor number)
> + * is enough for identifying a specific DPDK port.
>    * However, for some NICs having multiple ports sharing the same PCI
>    * id, using PCI id won't work then.
>    *
> @@ -1644,29 +1684,32 @@ static dpdk_port_t
>   netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
>                               const char *devargs, char **errp)
>   {
> -    char *name;
>       dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
>   
>       if (strncmp(devargs, "class=eth,mac=", 14) == 0) {
>           new_port_id = netdev_dpdk_get_port_by_mac(&devargs[14]);
>       } else {
> -        name = xmemdup0(devargs, strcspn(devargs, ","));
> -        if (rte_eth_dev_get_port_by_name(name, &new_port_id)
> -                || !rte_eth_dev_is_valid_port(new_port_id)) {
> -            /* Device not found in DPDK, attempt to attach it */
> -            if (!rte_dev_probe(devargs)
> -                && !rte_eth_dev_get_port_by_name(name, &new_port_id)) {
> -                /* Attach successful */
> -                dev->attached = true;
> -                VLOG_INFO("Device '%s' attached to DPDK", devargs);
> -            } else {
> -                /* Attach unsuccessful */
> +        new_port_id = netdev_dpdk_get_port_by_devargs(devargs);
> +        if (!rte_eth_dev_is_valid_port(new_port_id)) {
> +            new_port_id = DPDK_ETH_PORT_ID_INVALID;
> +        } else {
> +            struct netdev_dpdk *dup_dev;
> +
> +            dup_dev = netdev_dpdk_lookup_by_port_id(new_port_id);

Above, netdev_dpdk_lookup_by_port_id(new_port_id) requires holding of 
dpdk_mutex.

> +            if (dup_dev) {
> +                VLOG_WARN_BUF(errp, "'%s' is trying to use device '%s' "
> +                               "which is already in use by '%s'",
> +                          netdev_get_name(&dev->up), devargs,
> +                          netdev_get_name(&dup_dev->up));

The alignment above here seems a bit odd, could you vertically align them?
>                   new_port_id = DPDK_ETH_PORT_ID_INVALID;
> +            } else {
> +                /* device successfully found */
Minor: Capital at beginning and period at end of comment.

> +                dev->attached = true;
> +                VLOG_INFO("Device '%s' attached to DPDK port %d",
> +                          devargs, new_port_id);
>               }
>           }
> -        free(name);
>       }
> -
>       if (new_port_id == DPDK_ETH_PORT_ID_INVALID) {
>           VLOG_WARN_BUF(errp, "Error attaching device '%s' to DPDK", devargs);
>       }
> @@ -3234,11 +3277,15 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>       char *response;
>       dpdk_port_t port_id;
>       struct netdev_dpdk *dev;
> -    struct rte_eth_dev_info dev_info;
> +    struct rte_device *rte_dev;
> +    struct rte_dev_iterator iterator;
>   
>       ovs_mutex_lock(&dpdk_mutex);
>   
> -    if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {
> +    RTE_ETH_FOREACH_MATCHING_DEV(port_id, argv[1], &iterator) {
> +        break;
Same comment as above, need to handle break with iterator clean up if 
you do not reach the end of the loop.

> +    }
> +    if (port_id == DPDK_ETH_PORT_ID_INVALID) {
>           response = xasprintf("Device '%s' not found in DPDK", argv[1]);
>           goto error;
>       }
> @@ -3251,15 +3298,22 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>           goto error;
>       }
>   
> -    rte_eth_dev_close(port_id);
> +    rte_dev = rte_eth_devices[port_id].device;
> +    if (netdev_dpdk_get_num_ports(rte_dev)) {
> +        response = xasprintf("Device '%s' is being shared with other "
> +                             "interfaces. Remove them before detaching.",
> +                             argv[1]);
> +        goto error;
> +    }
>   
> -    rte_eth_dev_info_get(port_id, &dev_info);
> -    if (!dev_info.device || rte_dev_remove(dev_info.device)) {
> -        response = xasprintf("Device '%s' can not be detached", argv[1]);
> +    rte_eth_dev_close(port_id);
> +    if (rte_dev_remove(rte_dev) < 0) {
> +        response = xasprintf("Device '%s' can not be removed", argv[1]);
>           goto error;
>       }
>   
> -    response = xasprintf("Device '%s' has been detached", argv[1]);
> +    response = xasprintf("All devices shared with device '%s' "
> +                          "have been detached", argv[1]);
>   
>       ovs_mutex_unlock(&dpdk_mutex);
>       unixctl_command_reply(conn, response);
> 



More information about the dev mailing list