[ovs-dev] [PATCH] netdev-dpdk: add support for rings in secondary processes in IVSHMEM setups

Daniele Di Proietto diproiettod at vmware.com
Tue Jul 28 18:28:09 UTC 2015


This is interesting, thanks for the patch.

I'm definitely not a IVSHMEM expert, but I have a concern: what
happens if the secondary OVS process allocates or frees some
mbufs? (e.g because a packet is sent to multiple destinations or is
dropped)

I found this in the DPDK documentation:
http://dpdk.org/doc/guides/prog_guide/ivshmem_lib.html#best-practices-for-w
riting-ivshmem-applications\

I'd appreciate feedback from someone with more IVSHMEM experience
than me (Maryam, perhaps?)


Thanks,

Daniele

On 22/07/2015 21:51, "Melvin Walls" <mwalls67 at gmail.com> wrote:

>In order for OVS running inside a VM using IVSHMEM to recognize ports
>created
>on the host, you have to start vswitchd with the --proc-type=secondary EAL
>option.
>
>When creating rings in secondary processes functions like
>rte_eth_dev_configure() fail with the error code E_RTE_SECONDARY, i.e.,
>the
>operations are not allowed in secondary processes. Avoiding this requires
>some
>changes to the way secondary processes handle dpdk rings.
>
>This patch changes dpdk_ring_create() to use rte_ring_lookup() instead of
>rte_ring_create() when called from a secondary process. It also
>introduces two
>functions: netdev_dpdk_ring_rxq_recv() and netdev_dpdk_ring_send__() to
>handle
>tx/rx on dpdk rings in secondary processes.
>
>Signed-off-by: Melvin Walls <mwalls67 at gmail.com>
>Signed-off-by: Ethan Jackson <ethan at nicira.com>
>---
> lib/netdev-dpdk.c | 158
>+++++++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 122 insertions(+), 36 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>index 5ae805e..5abe90f 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -227,6 +227,10 @@ struct netdev_dpdk {
>     /* Identifier used to distinguish vhost devices from each other */
>     char vhost_id[PATH_MAX];
> 
>+    /* Rings for secondary processes in IVSHMEM setups, NULL otherwise */
>+    struct rte_ring *rx_ring;
>+    struct rte_ring *tx_ring;
>+
>     /* In dpdk_list. */
>     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
> };
>@@ -340,12 +344,16 @@ dpdk_mp_get(int socket_id, int mtu)
>OVS_REQUIRES(dpdk_mutex)
>             return NULL;
>         }
> 
>-        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>-                                     MP_CACHE_SZ,
>-                                     sizeof(struct
>rte_pktmbuf_pool_private),
>-                                     rte_pktmbuf_pool_init, NULL,
>-                                     ovs_rte_pktmbuf_init, NULL,
>-                                     socket_id, 0);
>+        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>+            dmp->mp = rte_mempool_create(mp_name, mp_size,
>MBUF_SIZE(mtu),
>+                                         MP_CACHE_SZ,
>+                                         sizeof(struct
>rte_pktmbuf_pool_private),
>+                                         rte_pktmbuf_pool_init, NULL,
>+                                         ovs_rte_pktmbuf_init, NULL,
>+                                         socket_id, 0);
>+        } else {
>+            dmp->mp = rte_mempool_lookup(mp_name);
>+        }
>     } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >=
>MIN_NB_MBUF);
> 
>     if (dmp->mp == NULL) {
>@@ -439,39 +447,41 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>OVS_REQUIRES(dpdk_mutex)
>     dev->up.n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
>     dev->real_n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
> 
>-    diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq,
>dev->real_n_txq,
>-                                 &port_conf);
>-    if (diag) {
>-        VLOG_ERR("eth dev config error %d. rxq:%d txq:%d", diag,
>dev->up.n_rxq,
>-                 dev->real_n_txq);
>-        return -diag;
>-    }
>-
>-    for (i = 0; i < dev->real_n_txq; i++) {
>-        diag = rte_eth_tx_queue_setup(dev->port_id, i,
>NIC_PORT_TX_Q_SIZE,
>-                                      dev->socket_id, NULL);
>+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>+        diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq,
>dev->real_n_txq,
>+                                     &port_conf);
>         if (diag) {
>-            VLOG_ERR("eth dev tx queue setup error %d",diag);
>+            VLOG_ERR("eth dev config error %d. rxq:%d txq:%d", diag,
>dev->up.n_rxq,
>+                     dev->real_n_txq);
>             return -diag;
>         }
>-    }
> 
>-    for (i = 0; i < dev->up.n_rxq; i++) {
>-        diag = rte_eth_rx_queue_setup(dev->port_id, i,
>NIC_PORT_RX_Q_SIZE,
>-                                      dev->socket_id,
>-                                      NULL, dev->dpdk_mp->mp);
>+        for (i = 0; i < dev->real_n_txq; i++) {
>+            diag = rte_eth_tx_queue_setup(dev->port_id, i,
>NIC_PORT_TX_Q_SIZE,
>+                                          dev->socket_id, NULL);
>+            if (diag) {
>+                VLOG_ERR("eth dev tx queue setup error %d",diag);
>+                return -diag;
>+            }
>+        }
>+
>+        for (i = 0; i < dev->up.n_rxq; i++) {
>+            diag = rte_eth_rx_queue_setup(dev->port_id, i,
>NIC_PORT_RX_Q_SIZE,
>+                                          dev->socket_id,
>+                                          NULL, dev->dpdk_mp->mp);
>+            if (diag) {
>+                VLOG_ERR("eth dev rx queue setup error %d",diag);
>+                return -diag;
>+            }
>+        }
>+
>+        diag = rte_eth_dev_start(dev->port_id);
>         if (diag) {
>-            VLOG_ERR("eth dev rx queue setup error %d",diag);
>+            VLOG_ERR("eth dev start error %d",diag);
>             return -diag;
>         }
>     }
> 
>-    diag = rte_eth_dev_start(dev->port_id);
>-    if (diag) {
>-        VLOG_ERR("eth dev start error %d",diag);
>-        return -diag;
>-    }
>-
>     rte_eth_promiscuous_enable(dev->port_id);
>     rte_eth_allmulticast_enable(dev->port_id);
> 
>@@ -532,6 +542,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>port_no,
>     OVS_REQUIRES(dpdk_mutex)
> {
>     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>+    char *rxq_name = xasprintf("%s_tx", netdev->up.name);
>+    char *txq_name = xasprintf("%s_rx", netdev->up.name);
>     int sid;
>     int err = 0;
> 
>@@ -574,6 +586,19 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned
>int port_no,
>         }
>     }
> 
>+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>+        netdev->rx_ring = netdev->tx_ring = NULL;
>+    } else {
>+        netdev->rx_ring = rte_ring_lookup(rxq_name);
>+        netdev->tx_ring = rte_ring_lookup(txq_name);
>+        if (!netdev->rx_ring || !netdev->tx_ring) {
>+            err = ENOMEM;
>+        }
>+    }
>+
>+    free(rxq_name);
>+    free(txq_name);
>+
>     list_push_back(&dpdk_list, &netdev->list_node);
> 
> unlock:
>@@ -957,6 +982,36 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct
>dp_packet **packets,
>     return 0;
> }
> 
>+static int
>+netdev_dpdk_ring_rxq_recv(struct netdev_rxq *rxq_,
>+                           struct dp_packet **packets, int *c)
>+{
>+    struct netdev_dpdk *netdev = netdev_dpdk_cast(rxq_->netdev);
>+    struct rte_ring *rx_ring = netdev->rx_ring;
>+    unsigned rx_pkts = NETDEV_MAX_BURST;
>+
>+    /* Only use netdev_dpdk_ring_rxq_recv() as a secondary process.
>There are operations
>+     * performed by netdev_dpdk_rxq_recv() that primary processes are
>responsible for and
>+     * cannot be performed by secondary processes. */
>+    if (OVS_LIKELY(rte_eal_process_type() == RTE_PROC_PRIMARY)) {
>+        return netdev_dpdk_rxq_recv(rxq_,packets,c);
>+    }
>+
>+    while (OVS_UNLIKELY(rte_ring_dequeue_bulk(rx_ring, (void **)packets,
>rx_pkts) != 0) &&
>+        rx_pkts > 0) {
>+        rx_pkts = rte_ring_count(rx_ring);
>+        rx_pkts = (unsigned)MIN(rx_pkts,NETDEV_MAX_BURST);
>+    }
>+   
>+    if (!rx_pkts) {
>+        return EAGAIN;
>+    }
>+
>+    *c = rx_pkts;
>+
>+    return 0;
>+}
>+
> static void
> __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts,
>                          int cnt, bool may_steal)
>@@ -1147,6 +1202,20 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int
>qid OVS_UNUSED, struct dp_pack
> }
> 
> static inline void
>+netdev_dpdk_ring_send__(struct netdev_dpdk *netdev,
>+                        struct dp_packet **pkts, int cnt)
>+{
>+    struct rte_ring *tx_ring = netdev->tx_ring;
>+    int rslt = 0;
>+
>+    if (tx_ring != NULL) {
>+        do {
>+            rslt = rte_ring_enqueue_bulk(tx_ring, (void **)pkts, cnt);
>+        } while (rslt == -ENOBUFS);
>+    }
>+}
>+
>+static inline void
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>                    struct dp_packet **pkts, int cnt, bool may_steal)
> {
>@@ -1812,8 +1881,13 @@ dpdk_ring_create(const char dev_name[], unsigned
>int port_no,
>     }
> 
>     /* Create single consumer/producer rings, netdev does explicit
>locking. */
>-    ivshmem->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE,
>SOCKET0,
>-                                        RING_F_SP_ENQ | RING_F_SC_DEQ);
>+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>+        ivshmem->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE,
>SOCKET0,
>+                                            RING_F_SP_ENQ |
>RING_F_SC_DEQ);
>+    } else {
>+        ivshmem->cring_tx = rte_ring_lookup(ring_name);
>+    }
>+
>     if (ivshmem->cring_tx == NULL) {
>         rte_free(ivshmem);
>         return ENOMEM;
>@@ -1825,8 +1899,13 @@ dpdk_ring_create(const char dev_name[], unsigned
>int port_no,
>     }
> 
>     /* Create single consumer/producer rings, netdev does explicit
>locking. */
>-    ivshmem->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE,
>SOCKET0,
>-                                        RING_F_SP_ENQ | RING_F_SC_DEQ);
>+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>+        ivshmem->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE,
>SOCKET0,
>+                                            RING_F_SP_ENQ |
>RING_F_SC_DEQ);
>+    } else {
>+        ivshmem->cring_rx = rte_ring_lookup(ring_name);
>+    }
>+
>     if (ivshmem->cring_rx == NULL) {
>         rte_free(ivshmem);
>         return ENOMEM;
>@@ -1888,7 +1967,14 @@ netdev_dpdk_ring_send(struct netdev *netdev_, int
>qid,
>         dp_packet_set_rss_hash(pkts[i], 0);
>     }
> 
>-    netdev_dpdk_send__(netdev, qid, pkts, cnt, may_steal);
>+    /* Only use netdev_dpdk_send__() as a primary process. It leads to
>the execution
>+     * of code that cannot be executed by secondary processes. */
>+    if (OVS_LIKELY(rte_eal_process_type() == RTE_PROC_PRIMARY)) {
>+        netdev_dpdk_send__(netdev, qid, pkts, cnt, may_steal);
>+    } else {
>+        netdev_dpdk_ring_send__(netdev, pkts, cnt);
>+    }
>+
>     return 0;
> }
> 
>@@ -2101,7 +2187,7 @@ static const struct netdev_class dpdk_ring_class =
>         netdev_dpdk_get_stats,
>         netdev_dpdk_get_features,
>         netdev_dpdk_get_status,
>-        netdev_dpdk_rxq_recv);
>+        netdev_dpdk_ring_rxq_recv);
> 
> static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class =
>     NETDEV_DPDK_CLASS(
>-- 
>1.9.3 (Apple Git-50)
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma
>n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Sm
>B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=ksY9ulCC5TLZ-tQnj3WWXeuAHQUk9J
>o-mP_ruJ124K0&s=HiTz0cRNA8O4ZBvUJqeOL-KOwygu2-QlDmyVeLTpNBE&e= 




More information about the dev mailing list