[ovs-dev] [ovs-dev, 03/17] dpif-netdev: Don't try to output on a device without txqs.
Ilya Maximets
i.maximets at samsung.com
Mon Nov 21 13:07:10 UTC 2016
On 16.11.2016 03:45, Daniele Di Proietto wrote:
> Tunnel devices have 0 txqs and don't support netdev_send(). While
> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
> on output, and that might be confused by devices with no txqs.
>
> It seems better to have different structures in the fast path for ports
> that support netdev_{push,pop}_header (tunnel devices), and ports that
> support netdev_send. With this we can also remove a branch in
> netdev_send().
>
> This is also necessary for a future commit, which starts DPDK devices
> without txqs.
>
> Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
> ---
> lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
> lib/netdev.c | 15 ++++++++----
> lib/netdev.h | 1 +
> 3 files changed, 64 insertions(+), 24 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 7b67b42..81366b2 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -422,7 +422,8 @@ struct rxq_poll {
> struct ovs_list node;
> };
>
> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
> + * 'tnl_port_cache' or 'tx_ports'. */
> struct tx_port {
> struct dp_netdev_port *port;
> int qid;
> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
> * read by the pmd thread. */
> struct hmap tx_ports OVS_GUARDED;
>
> - /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
> - * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
> - * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
> - * Every other instance will only be accessed by its own pmd thread. */
> - struct hmap port_cache;
> + /* These are thread-local copies of 'tx_ports'. One contains only tunnel
> + * ports (that support push_tunnel/pop_tunnel) The other contains
> + * non-tunnel ports (that support send).
> + *
> + * These are kept separate to make sure that we don't try to execute
> + * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
> + * device.
> + *
> + * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
> + * threads, and thusly needs to be protected by 'non_pmd_mutex'. Every
> + * other instance will only be accessed by its own pmd thread. */
> + struct hmap tnl_port_cache;
> + struct hmap send_port_cache;
>
> /* Only a pmd thread can write on its own 'cycles' and 'stats'.
> * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
> /* Free all used tx queue ids. */
> dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>
> - HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
> + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
> + free(tx_port_cached);
> + }
> + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
> free(tx_port_cached);
> }
> }
> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
> struct tx_port *tx_port, *tx_port_cached;
>
> pmd_free_cached_ports(pmd);
> - hmap_shrink(&pmd->port_cache);
> + hmap_shrink(&pmd->send_port_cache);
> + hmap_shrink(&pmd->tnl_port_cache);
>
> HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
> + struct hmap *cache;
> +
> + if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
> + cache = &pmd->tnl_port_cache;
> + } else if (netdev_n_txq(tx_port->port->netdev)) {
> + cache = &pmd->send_port_cache;
> + } else {
> + continue;
> + }
IMHO, this code introduces artificial limitations for netdev.
What about something like this:
if (has_pop _OR_ has_push) {
insert to 'tnl_port_cache';
}
if (netdev_n_txq(tx_port->port->netdev)) {
insert to 'send_port_cache';
}
?
i.e make all the checks independent.
Otherwise, it must be described in 'netdev-provider.h' that netdev
can have only tunnel related functions (both 'push' and 'pop') or
send function.
> +
> tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
> - hmap_insert(&pmd->port_cache, &tx_port_cached->node,
> + hmap_insert(cache, &tx_port_cached->node,
> hash_port_no(tx_port_cached->port->port_no));
> }
> }
> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
> pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
> ovs_list_init(&pmd->poll_list);
> hmap_init(&pmd->tx_ports);
> - hmap_init(&pmd->port_cache);
> + hmap_init(&pmd->tnl_port_cache);
> + hmap_init(&pmd->send_port_cache);
> /* init the 'flow_cache' since there is no
> * actual thread created for NON_PMD_CORE_ID. */
> if (core_id == NON_PMD_CORE_ID) {
> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
> struct dpcls *cls;
>
> dp_netdev_pmd_flow_flush(pmd);
> - hmap_destroy(&pmd->port_cache);
> + hmap_destroy(&pmd->send_port_cache);
> + hmap_destroy(&pmd->tnl_port_cache);
> hmap_destroy(&pmd->tx_ports);
> /* All flows (including their dpcls_rules) have been deleted already */
> CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
> @@ -3592,7 +3617,9 @@ static void
> dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
> struct dp_netdev_port *port)
> {
> - struct tx_port *tx = xzalloc(sizeof *tx);
> + struct tx_port *tx;
> +
> + tx = xzalloc(sizeof *tx);
>
> tx->port = port;
> tx->qid = -1;
> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
> struct dp_netdev_port *port;
> long long interval;
>
> - HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
> + HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
> if (!tx->port->dynamic_txqs) {
> continue;
> }
> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
> }
>
> static struct tx_port *
> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> - odp_port_t port_no)
> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> + odp_port_t port_no)
> +{
> + return tx_port_lookup(&pmd->tnl_port_cache, port_no);
> +}
> +
> +static struct tx_port *
> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> + odp_port_t port_no)
> {
> - return tx_port_lookup(&pmd->port_cache, port_no);
> + return tx_port_lookup(&pmd->send_port_cache, port_no);
> }
>
> static int
> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>
> data = nl_attr_get(attr);
>
> - tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
> + tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
> if (!tun_port) {
> err = -EINVAL;
> goto error;
> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>
> switch ((enum ovs_action_attr)type) {
> case OVS_ACTION_ATTR_OUTPUT:
> - p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
> + p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
> if (OVS_LIKELY(p)) {
> int tx_qid;
> bool dynamic_txqs;
> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> struct dp_packet_batch *orig_packets_ = packets_;
> odp_port_t portno = nl_attr_get_odp_port(a);
>
> - p = pmd_tx_port_cache_lookup(pmd, portno);
> + p = pmd_tnl_port_cache_lookup(pmd, portno);
> if (p) {
> struct dp_packet_batch tnl_pkt;
> int i;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index ad90ef6..9ab4e4c 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
> return netdev->netdev_class->is_pmd;
> }
>
> +bool
> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
> +{
> + return netdev->netdev_class->push_header
> + && netdev->netdev_class->pop_header;
> +}
> +
> static void
> netdev_initialize(void)
> OVS_EXCLUDED(netdev_mutex)
> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
> * if a partial packet was transmitted or if a packet is too big or too small
> * to transmit on the device.
> *
> + * The caller must make sure that 'netdev' supports sending by making sure that
> + * 'netdev_n_txq(netdev)' returns >= 1.
> + *
> * If the function returns a non-zero value, some of the packets might have
> * been sent anyway.
> *
> @@ -710,11 +720,6 @@ int
> netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
> bool may_steal, bool concurrent_txq)
> {
> - if (!netdev->netdev_class->send) {
> - dp_packet_delete_batch(batch, may_steal);
> - return EOPNOTSUPP;
> - }
> -
> int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
> concurrent_txq);
> if (!error) {
> diff --git a/lib/netdev.h b/lib/netdev.h
> index bad28c4..03059ca 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
> int netdev_n_txq(const struct netdev *netdev);
> int netdev_n_rxq(const struct netdev *netdev);
> bool netdev_is_pmd(const struct netdev *netdev);
> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>
> /* Open and close. */
> int netdev_open(const char *name, const char *type, struct netdev **netdevp);
>
More information about the dev
mailing list