[ovs-dev] [ovs-dev, 03/17] dpif-netdev: Don't try to output on a device without txqs.
Daniele Di Proietto
diproiettod at vmware.com
Mon Nov 21 22:00:17 UTC 2016
On 21/11/2016 05:07, "Ilya Maximets" <i.maximets at samsung.com> wrote:
>On 16.11.2016 03:45, Daniele Di Proietto wrote:
>> Tunnel devices have 0 txqs and don't support netdev_send(). While
>> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
>> on output, and that might be confused by devices with no txqs.
>>
>> It seems better to have different structures in the fast path for ports
>> that support netdev_{push,pop}_header (tunnel devices), and ports that
>> support netdev_send. With this we can also remove a branch in
>> netdev_send().
>>
>> This is also necessary for a future commit, which starts DPDK devices
>> without txqs.
>>
>> Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
>> ---
>> lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
>> lib/netdev.c | 15 ++++++++----
>> lib/netdev.h | 1 +
>> 3 files changed, 64 insertions(+), 24 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 7b67b42..81366b2 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -422,7 +422,8 @@ struct rxq_poll {
>> struct ovs_list node;
>> };
>>
>> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
>> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
>> + * 'tnl_port_cache' or 'tx_ports'. */
>> struct tx_port {
>> struct dp_netdev_port *port;
>> int qid;
>> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
>> * read by the pmd thread. */
>> struct hmap tx_ports OVS_GUARDED;
>>
>> - /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
>> - * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
>> - * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
>> - * Every other instance will only be accessed by its own pmd thread. */
>> - struct hmap port_cache;
>> + /* These are thread-local copies of 'tx_ports'. One contains only tunnel
>> + * ports (that support push_tunnel/pop_tunnel) The other contains
>> + * non-tunnel ports (that support send).
>> + *
>> + * These are kept separate to make sure that we don't try to execute
>> + * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
>> + * device.
>> + *
>> + * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
>> + * threads, and thusly needs to be protected by 'non_pmd_mutex'. Every
>> + * other instance will only be accessed by its own pmd thread. */
>> + struct hmap tnl_port_cache;
>> + struct hmap send_port_cache;
>>
>> /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>> * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
>> /* Free all used tx queue ids. */
>> dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>>
>> - HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
>> + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
>> + free(tx_port_cached);
>> + }
>> + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
>> free(tx_port_cached);
>> }
>> }
>> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>> struct tx_port *tx_port, *tx_port_cached;
>>
>> pmd_free_cached_ports(pmd);
>> - hmap_shrink(&pmd->port_cache);
>> + hmap_shrink(&pmd->send_port_cache);
>> + hmap_shrink(&pmd->tnl_port_cache);
>>
>> HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
>> + struct hmap *cache;
>> +
>> + if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
>> + cache = &pmd->tnl_port_cache;
>> + } else if (netdev_n_txq(tx_port->port->netdev)) {
>> + cache = &pmd->send_port_cache;
>> + } else {
>> + continue;
>> + }
>
>IMHO, this code introduces artificial limitations for netdev.
>What about something like this:
>
> if (has_pop _OR_ has_push) {
> insert to 'tnl_port_cache';
> }
>
> if (netdev_n_txq(tx_port->port->netdev)) {
> insert to 'send_port_cache';
> }
>?
>i.e make all the checks independent.
>
>Otherwise, it must be described in 'netdev-provider.h' that netdev
>can have only tunnel related functions (both 'push' and 'pop') or
>send function.
I was more leaning towards documenting the limitation (currently we have
no use case for a netdev that support both), but making the checks
independent is simpler and it avoids one assumption on the netdev provider,
so I went with that.
I also realized that the checks in netdev_pop_header() and
netdev_push_header() are not necessary, so I removed them.
Thanks
>
>> +
>> tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
>> - hmap_insert(&pmd->port_cache, &tx_port_cached->node,
>> + hmap_insert(cache, &tx_port_cached->node,
>> hash_port_no(tx_port_cached->port->port_no));
>> }
>> }
>> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>> pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>> ovs_list_init(&pmd->poll_list);
>> hmap_init(&pmd->tx_ports);
>> - hmap_init(&pmd->port_cache);
>> + hmap_init(&pmd->tnl_port_cache);
>> + hmap_init(&pmd->send_port_cache);
>> /* init the 'flow_cache' since there is no
>> * actual thread created for NON_PMD_CORE_ID. */
>> if (core_id == NON_PMD_CORE_ID) {
>> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>> struct dpcls *cls;
>>
>> dp_netdev_pmd_flow_flush(pmd);
>> - hmap_destroy(&pmd->port_cache);
>> + hmap_destroy(&pmd->send_port_cache);
>> + hmap_destroy(&pmd->tnl_port_cache);
>> hmap_destroy(&pmd->tx_ports);
>> /* All flows (including their dpcls_rules) have been deleted already */
>> CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
>> @@ -3592,7 +3617,9 @@ static void
>> dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>> struct dp_netdev_port *port)
>> {
>> - struct tx_port *tx = xzalloc(sizeof *tx);
>> + struct tx_port *tx;
>> +
>> + tx = xzalloc(sizeof *tx);
>>
>> tx->port = port;
>> tx->qid = -1;
>> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>> struct dp_netdev_port *port;
>> long long interval;
>>
>> - HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
>> + HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
>> if (!tx->port->dynamic_txqs) {
>> continue;
>> }
>> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>> }
>>
>> static struct tx_port *
>> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> - odp_port_t port_no)
>> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> + odp_port_t port_no)
>> +{
>> + return tx_port_lookup(&pmd->tnl_port_cache, port_no);
>> +}
>> +
>> +static struct tx_port *
>> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> + odp_port_t port_no)
>> {
>> - return tx_port_lookup(&pmd->port_cache, port_no);
>> + return tx_port_lookup(&pmd->send_port_cache, port_no);
>> }
>>
>> static int
>> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>>
>> data = nl_attr_get(attr);
>>
>> - tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>> + tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>> if (!tun_port) {
>> err = -EINVAL;
>> goto error;
>> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>
>> switch ((enum ovs_action_attr)type) {
>> case OVS_ACTION_ATTR_OUTPUT:
>> - p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>> + p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>> if (OVS_LIKELY(p)) {
>> int tx_qid;
>> bool dynamic_txqs;
>> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>> struct dp_packet_batch *orig_packets_ = packets_;
>> odp_port_t portno = nl_attr_get_odp_port(a);
>>
>> - p = pmd_tx_port_cache_lookup(pmd, portno);
>> + p = pmd_tnl_port_cache_lookup(pmd, portno);
>> if (p) {
>> struct dp_packet_batch tnl_pkt;
>> int i;
>> diff --git a/lib/netdev.c b/lib/netdev.c
>> index ad90ef6..9ab4e4c 100644
>> --- a/lib/netdev.c
>> +++ b/lib/netdev.c
>> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
>> return netdev->netdev_class->is_pmd;
>> }
>>
>> +bool
>> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
>> +{
>> + return netdev->netdev_class->push_header
>> + && netdev->netdev_class->pop_header;
>> +}
>> +
>> static void
>> netdev_initialize(void)
>> OVS_EXCLUDED(netdev_mutex)
>> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
>> * if a partial packet was transmitted or if a packet is too big or too small
>> * to transmit on the device.
>> *
>> + * The caller must make sure that 'netdev' supports sending by making sure that
>> + * 'netdev_n_txq(netdev)' returns >= 1.
>> + *
>> * If the function returns a non-zero value, some of the packets might have
>> * been sent anyway.
>> *
>> @@ -710,11 +720,6 @@ int
>> netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>> bool may_steal, bool concurrent_txq)
>> {
>> - if (!netdev->netdev_class->send) {
>> - dp_packet_delete_batch(batch, may_steal);
>> - return EOPNOTSUPP;
>> - }
>> -
>> int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
>> concurrent_txq);
>> if (!error) {
>> diff --git a/lib/netdev.h b/lib/netdev.h
>> index bad28c4..03059ca 100644
>> --- a/lib/netdev.h
>> +++ b/lib/netdev.h
>> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
>> int netdev_n_txq(const struct netdev *netdev);
>> int netdev_n_rxq(const struct netdev *netdev);
>> bool netdev_is_pmd(const struct netdev *netdev);
>> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>>
>> /* Open and close. */
>> int netdev_open(const char *name, const char *type, struct netdev **netdevp);
>>
More information about the dev
mailing list