[ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

Eli Britstein elibr at nvidia.com
Sun Nov 21 06:33:26 UTC 2021


Hi Harsha,

It's a clever idea, though have some problems in the implementation. PSB.


On 11/20/2021 11:20 AM, Sriharsha Basavapatna wrote:
> The hw_miss_packet_recover() API results in performance degradation, for
> ports that are either not offload capable or do not support this specific
> offload API.
>
> For example, in the test configuration shown below, the vhost-user port
> does not support offloads and the VF port doesn't support hw_miss offload
> API. But because tunnel offload needs to be configured in other bridges
> (br-vxlan and br-phy), OVS has been built with -DALLOW_EXPERIMENTAL_API.
>
>      br-vhost            br-vxlan            br-phy
> vhost-user<-->VF    VF-Rep<-->VxLAN       uplink-port
>
> For every packet between the VF and the vhost-user ports, hw_miss API is
> called even though it is not supported by the ports involved. This leads
> to significant performance drop (~3x in some cases; both cycles and pps).
>
> To fix this, return EOPNOTSUPP when this API fails for a device that
"To fix" -> "To improve"
> doesn't support it and avoid this API on that port for subsequent packets.
>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
> ---
>   lib/dpif-netdev-private.h |  2 +-
>   lib/dpif-netdev.c         | 29 +++++++++++++++++++++--------
>   lib/netdev-offload-dpdk.c |  9 +++++++--
>   3 files changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
> index 4593649bd..e2a6a9d3a 100644
> --- a/lib/dpif-netdev-private.h
> +++ b/lib/dpif-netdev-private.h
> @@ -46,7 +46,7 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
>   
>   int
>   dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
> -                  odp_port_t port_no,
> +                  void *port,

void * -> struct tx_port *. use a forward declaration.

>                     struct dp_packet *packet,
>                     struct dp_netdev_flow **flow);
>   
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 69d7ec26e..207b1961c 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -434,6 +434,7 @@ struct tx_port {
>       long long last_used;
>       struct hmap_node node;
>       long long flush_time;
> +    bool hw_miss_api_supported;
>       struct dp_packet_batch output_pkts;
>       struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
>   };
> @@ -6972,6 +6973,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>       tx->port = port;
>       tx->qid = -1;
>       tx->flush_time = 0LL;
> +    tx->hw_miss_api_supported = true;
>       dp_packet_batch_init(&tx->output_pkts);
>   
>       hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no));
> @@ -7327,22 +7329,28 @@ static struct tx_port * pmd_send_port_cache_lookup(
>   
>   inline int
>   dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
> -                  odp_port_t port_no OVS_UNUSED,
> +                  void *port,
don't omit OVS_UNUSED. it is for compiling without ALLOW_EXPERIMENTAL_API
>                     struct dp_packet *packet,
>                     struct dp_netdev_flow **flow)
>   {
> -    struct tx_port *p OVS_UNUSED;
> +    struct tx_port *p = port;
no need for this local variable, you get it from the function arguments
>       uint32_t mark;
>   
>   #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
>       /* Restore the packet if HW processing was terminated before completion. */
> -    p = pmd_send_port_cache_lookup(pmd, port_no);
> -    if (OVS_LIKELY(p)) {
> +    if (OVS_LIKELY(p) && p->hw_miss_api_supported) {
>           int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
>   
> -        if (err && err != EOPNOTSUPP) {
> -            COVERAGE_INC(datapath_drop_hw_miss_recover);
> -            return -1;
> +        if (err) {
> +            if (err != EOPNOTSUPP) {
> +                COVERAGE_INC(datapath_drop_hw_miss_recover);
> +                return -1;
> +            } else {
> +                /* API unsupported by the port; avoid subsequent calls. */
> +                VLOG_DBG("hw_miss_api unsupported: port: %d",
> +                         p->port->port_no);
> +                p->hw_miss_api_supported = false;
> +            }
>           }
>       }
>   #endif
> @@ -7394,6 +7402,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>       uint16_t tcp_flags;
>       size_t map_cnt = 0;
>       bool batch_enable = true;
> +    struct tx_port *port = NULL;
> +
> +#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
> +    port = pmd_send_port_cache_lookup(pmd, port_no);
> +#endif
>   
>       pmd_perf_update_counter(&pmd->perf_stats,
>                               md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
> @@ -7420,7 +7433,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>           }
>   
>           if (netdev_flow_api && recirc_depth == 0) {
> -            if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port_no, packet, &flow))) {
> +            if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port, packet, &flow))) {
>                   /* Packet restoration failed and it was dropped, do not
>                    * continue processing.
>                    */
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 9fee7570a..8ddddbd2e 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -2292,11 +2292,16 @@ netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *netdev,
>       odp_port_t vport_odp;
>       int ret = 0;
>   
> -    if (netdev_dpdk_rte_flow_get_restore_info(netdev, packet,
> -                                              &rte_restore_info, NULL)) {
> +    ret = netdev_dpdk_rte_flow_get_restore_info(netdev, packet,
> +                                                &rte_restore_info, NULL);
> +    if (ret) {
>           /* This function is called for every packet, and in most cases there
>            * will be no restore info from the HW, thus error is expected.
>            */
> +        VLOG_DBG_RL(&rl, "flow_get_restore_info failed: %d\n", ret);
It is likely that most of the packets will have an "error" when calling 
this API, if there is nothing to get. See the comment. There was no 
print on purpose.
> +        if (ret == -EOPNOTSUPP) {

How can we guarantee EOPNOTSUPP is only if indeed not supported and not 
that the PMD returned it (for this packet only)?

Maybe it is better to have another API to query (might also need changes 
in dpdk).

> +            return -ret;
> +        }
>           return 0;
>       }
>   


More information about the dev mailing list