[ovs-dev] [PATCH RFC v6 2/2] netdev-dpdk: Add vHost User PMD

Loftus, Ciara ciara.loftus at intel.com
Fri Oct 28 16:15:30 UTC 2016


[snip]

> > +
> > +static int
> >  netdev_dpdk_vhost_construct(struct netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > @@ -904,7 +1051,7 @@ netdev_dpdk_vhost_construct(struct netdev
> *netdev)
> >      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
> >       * the file system. '/' or '\' would traverse directories, so they're not
> >       * acceptable in 'name'. */
> > -    if (strchr(name, '/') || strchr(name, '\\')) {
> > +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {
> >          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
> >                   "A valid name must not include '/' or '\\'",
> >                   name);
> > @@ -917,18 +1064,26 @@ netdev_dpdk_vhost_construct(struct netdev
> *netdev)
> >       */
> >      snprintf(dev->vhost_id, sizeof dev->vhost_id, "%s/%s",
> >               dpdk_get_vhost_sock_dir(), name);
> > +    dev->port_id = -1;
> >
> > -    dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT;
> > -    err = rte_vhost_driver_register(dev->vhost_id, dev-
> >vhost_driver_flags);
> > -    if (err) {
> > -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
> > -                 dev->vhost_id);
> > -    } else {
> > +    err = dpdk_attach_vhost_pmd(dev, 0);
> > +
> > +    if (!err) {
> >          fatal_signal_add_file_to_unlink(dev->vhost_id);
> >          VLOG_INFO("Socket %s created for vhost-user port %s\n",
> >                    dev->vhost_id, name);
> >      }
> > -    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
> > +    err = netdev_dpdk_init(netdev, dev->port_id, DPDK_DEV_VHOST);
> > +
> > +    if (err) {
> 
> I think, that callbacks are not registered at this point in case of failure.
> Anyway, IMHO, 'netdev_dpdk_init' should be responsible for unregistering.

Ok

> 
> > +        rte_eth_dev_callback_unregister(dev->port_id,
> > +                                        RTE_ETH_EVENT_QUEUE_STATE,
> > +                                        vring_state_changed_callback, NULL);
> > +        rte_eth_dev_callback_unregister(dev->port_id,
> > +                                        RTE_ETH_EVENT_INTR_LSC,
> > +                                        link_status_changed_callback, NULL);
> > +        rte_eth_dev_detach(dev->port_id, dev->vhost_id);
> > +    }
> >
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      return err;
> > @@ -940,7 +1095,7 @@ netdev_dpdk_vhost_client_construct(struct
> netdev *netdev)
> >      int err;
> >
> >      ovs_mutex_lock(&dpdk_mutex);
> > -    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
> > +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST_CLIENT);
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      return err;
> >  }
> > @@ -964,13 +1119,10 @@ netdev_dpdk_construct(struct netdev *netdev)
> >  }
> >
> >  static void
> > -netdev_dpdk_destruct(struct netdev *netdev)
> > +dpdk_destruct_helper(struct netdev_dpdk *dev)
> > +    OVS_REQUIRES(dpdk_mutex)
> > +    OVS_REQUIRES(dev->mutex)
> >  {
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -
> > -    ovs_mutex_lock(&dpdk_mutex);
> > -    ovs_mutex_lock(&dev->mutex);
> > -
> >      rte_eth_dev_stop(dev->port_id);
> >      free(ovsrcu_get_protected(struct ingress_policer *,
> >                                &dev->ingress_policer));
> > @@ -978,61 +1130,59 @@ netdev_dpdk_destruct(struct netdev *netdev)
> >      rte_free(dev->tx_q);
> >      ovs_list_remove(&dev->list_node);
> >      dpdk_mp_put(dev->dpdk_mp);
> > -
> > -    ovs_mutex_unlock(&dev->mutex);
> > -    ovs_mutex_unlock(&dpdk_mutex);
> >  }
> >
> > -/* rte_vhost_driver_unregister() can call back destroy_device(), which will
> > - * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'.  To avoid a
> > - * deadlock, none of the mutexes must be held while calling this function.
> */
> > -static int
> > -dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED,
> > -                             char *vhost_id)
> > -    OVS_EXCLUDED(dpdk_mutex)
> > -    OVS_EXCLUDED(dev->mutex)
> > +static void
> > +netdev_dpdk_destruct(struct netdev *netdev)
> >  {
> > -    return rte_vhost_driver_unregister(vhost_id);
> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > +
> > +    ovs_mutex_lock(&dpdk_mutex);
> > +    ovs_mutex_lock(&dev->mutex);
> > +
> > +    dpdk_destruct_helper(dev);
> > +
> > +    ovs_mutex_unlock(&dev->mutex);
> > +    ovs_mutex_unlock(&dpdk_mutex);
> >  }
> >
> >  static void
> >  netdev_dpdk_vhost_destruct(struct netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -    char *vhost_id;
> >
> >      ovs_mutex_lock(&dpdk_mutex);
> >      ovs_mutex_lock(&dev->mutex);
> >
> > -    /* Guest becomes an orphan if still attached. */
> > -    if (netdev_dpdk_get_vid(dev) >= 0
> > -        && !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
> > +    check_link_status(dev);
> > +    if (dev->link.link_status == ETH_LINK_UP) {
> >          VLOG_ERR("Removing port '%s' while vhost device still attached.",
> >                   netdev->name);
> >          VLOG_ERR("To restore connectivity after re-adding of port, VM on "
> >                   "socket '%s' must be restarted.", dev->vhost_id);
> >      }
> >
> > -    free(ovsrcu_get_protected(struct ingress_policer *,
> > -                              &dev->ingress_policer));
> > +    rte_eth_dev_callback_unregister(dev->port_id,
> > +                                    RTE_ETH_EVENT_QUEUE_STATE,
> > +                                    vring_state_changed_callback, NULL);
> > +    rte_eth_dev_callback_unregister(dev->port_id,
> > +                                    RTE_ETH_EVENT_INTR_LSC,
> > +                                    link_status_changed_callback, NULL);
> 
> We should check for EAGAIN here to avoid leaks cased by races.

How do you think we should handle the EAGAIN? Do you think we should keep trying until it succeeds? Or try N times?

[snip]

> > -
> >  static int
> >  netdev_dpdk_class_init(void)
> >  {
> > @@ -2576,19 +2326,9 @@ netdev_dpdk_class_init(void)
> >  static int
> >  netdev_dpdk_vhost_class_init(void)
> >  {
> > -    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> > -
> > -    /* This function can be called for different classes.  The initialization
> > -     * needs to be done only once */
> > -    if (ovsthread_once_start(&once)) {
> > -        rte_vhost_driver_callback_register(&virtio_net_device_ops);
> > -        rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> > -                                  | 1ULL << VIRTIO_NET_F_HOST_TSO6
> > -                                  | 1ULL << VIRTIO_NET_F_CSUM);
> > -        ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
> > -
> > -        ovsthread_once_done(&once);
> > -    }
> > +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> > +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
> > +                                | 1ULL << VIRTIO_NET_F_CSUM);
> 
> About features:
> How about to disable all new features in this patch and make another
> one for that? I think, that it's better to split infrastructure
> changes with the new features.

Sure. Good idea. Will update the 1/2 DPDK patch too with this change. And maybe add a 3/3 patch to eventually enable the INDIRECT_DESC feature.

> 
> >
> >      return 0;
> >  }
> > @@ -2690,7 +2430,17 @@ netdev_dpdk_ring_send(struct netdev
> *netdev, int qid,
> >          dp_packet_rss_invalidate(batch->packets[i]);
> >      }
> >
> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> > +    if (OVS_UNLIKELY(concurrent_txq)) {
> > +        qid = qid % dev->up.n_txq;
> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);
> > +
> > +    if (OVS_UNLIKELY(concurrent_txq)) {
> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> >      return 0;
> >  }
> >
> > @@ -2985,10 +2735,6 @@ dpdk_vhost_reconfigure_helper(struct
> netdev_dpdk *dev)
> >              netdev_change_seq_changed(&dev->up);
> >          }
> >      }
> > -
> > -    if (netdev_dpdk_get_vid(dev) >= 0) {
> > -        dev->vhost_reconfigured = true;
> > -    }
> >  }
> >
> >  static int
> > @@ -3007,6 +2753,7 @@ netdev_dpdk_vhost_client_reconfigure(struct
> netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      int err = 0;
> > +    int sid = -1;
> >
> >      ovs_mutex_lock(&dev->mutex);
> >
> > @@ -3020,20 +2767,45 @@ netdev_dpdk_vhost_client_reconfigure(struct
> netdev *netdev)
> >      if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)
> >              && strlen(dev->vhost_id)) {
> >          /* Register client-mode device */
> > -        err = rte_vhost_driver_register(dev->vhost_id,
> > -                                        RTE_VHOST_USER_CLIENT);
> > -        if (err) {
> > -            VLOG_ERR("vhost-user device setup failure for device %s\n",
> > -                     dev->vhost_id);
> > -        } else {
> > -            /* Configuration successful */
> > +        err = dpdk_attach_vhost_pmd(dev, 1);
> > +
> > +        if (!err) {
> > +            sid = rte_eth_dev_socket_id(dev->port_id);
> > +            dev->socket_id = sid < 0 ? SOCKET0 : sid;
> >              dev->vhost_driver_flags |= RTE_VHOST_USER_CLIENT;
> > +
> > +            rte_eth_dev_callback_register(dev->port_id,
> > +                                          RTE_ETH_EVENT_QUEUE_STATE,
> > +                                          vring_state_changed_callback, NULL);
> > +            rte_eth_dev_callback_register(dev->port_id,
> > +                                          RTE_ETH_EVENT_INTR_LSC,
> > +                                          link_status_changed_callback, NULL);
> > +
> > +            rte_eth_dev_stop(dev->port_id);
> > +            rte_free(dev->tx_q);
> > +            err = dpdk_eth_dev_init(dev);
> > +            dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq);
> > +            if (!dev->tx_q) {
> 
> callback unregistering required. And detach + id free also required if
> we are returning 0. (I'm suggesting to register callbacks after
> allocation to avoid unregistering on failure.)

Ok.

> 
> > +                err = ENOMEM;
> 
> Useless assignment right now, but it's unclear what should we return
> in case of memory allocation failure.
> Should this port be deleted from the datapath?
> If we're here, it means that port configured properly and there will
> be no more reconfiguration requests to try to reconfigure it again.
> 
> What do you think?

netdev_dpdk_reconfigure returns ENOMEM in this case. Perhaps we should follow suit with vHost.

Appreciate the reviews, Ilya.
I'm going to hold off respinning the patch until a full review is done (either by you or anybody else who wishes to!). Understand it's a large change & may take some time.
It will be ok to spend time ironing out creases like this post-16.11 release but if there's anything fundamentally problematic it needs to get caught ASAP so we can potentially submit a bugfix. Hopefully it doesn't come to that though.

Thanks,
Ciara

> 
> > +                goto out;
> > +            }
> > +
> > +            /* Enable TX queue 0 by default if it wasn't disabled. */
> > +            if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) {
> > +                dev->tx_q[0].map = 0;
> > +            }
> > +
> > +            netdev_dpdk_remap_txqs(dev);
> > +
> > +            netdev_change_seq_changed(netdev);
> > +
> >              VLOG_INFO("vHost User device '%s' created in 'client' mode, "
> >                        "using client socket '%s'",
> >                        dev->up.name, dev->vhost_id);
> >          }
> >      }
> >
> > +out:
> >      ovs_mutex_unlock(&dev->mutex);
> >
> >      return 0;
> > @@ -3153,12 +2925,13 @@ static const struct netdev_class
> dpdk_vhost_class =
> >          NULL,
> >          NULL,
> >          netdev_dpdk_vhost_send,
> > -        netdev_dpdk_vhost_get_carrier,
> > -        netdev_dpdk_vhost_get_stats,
> > -        NULL,
> > -        NULL,
> > +        netdev_dpdk_get_carrier,
> > +        netdev_dpdk_get_stats,
> > +        netdev_dpdk_get_features,
> > +        netdev_dpdk_get_status,
> >          netdev_dpdk_vhost_reconfigure,
> > -        netdev_dpdk_vhost_rxq_recv);
> > +        netdev_dpdk_rxq_recv);
> > +
> >  static const struct netdev_class dpdk_vhost_client_class =
> >      NETDEV_DPDK_CLASS(
> >          "dpdkvhostuserclient",
> > @@ -3168,12 +2941,12 @@ static const struct netdev_class
> dpdk_vhost_client_class =
> >          netdev_dpdk_vhost_client_set_config,
> >          NULL,
> >          netdev_dpdk_vhost_send,
> > -        netdev_dpdk_vhost_get_carrier,
> > -        netdev_dpdk_vhost_get_stats,
> > -        NULL,
> > -        NULL,
> > +        netdev_dpdk_get_carrier,
> > +        netdev_dpdk_get_stats,
> > +        netdev_dpdk_get_features,
> > +        netdev_dpdk_get_status,
> >          netdev_dpdk_vhost_client_reconfigure,
> > -        netdev_dpdk_vhost_rxq_recv);
> > +        netdev_dpdk_vhost_client_rxq_recv);
> >
> >  void
> >  netdev_dpdk_register(void)
> >



More information about the dev mailing list