[ovs-dev] [PATCH 15/20] netdev-offload-dpdk-flow: Support offload of output action

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Mon Dec 16 08:47:56 UTC 2019


On Sun, Dec 8, 2019 at 3:08 PM Eli Britstein <elibr at mellanox.com> wrote:
>
>
> On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote:
> > On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr at mellanox.com> wrote:
> >>
> >> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
> >>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr at mellanox.com> wrote:
> >>>> Signed-off-by: Eli Britstein <elibr at mellanox.com>
> >>>> Reviewed-by: Oz Shlomo <ozsh at mellanox.com>
> >>>> ---
> >>>>    NEWS                           |  2 +
> >>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
> >>>>    2 files changed, 85 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/NEWS b/NEWS
> >>>> index 88b818948..ca9c2b230 100644
> >>>> --- a/NEWS
> >>>> +++ b/NEWS
> >>>> @@ -10,6 +10,8 @@ Post-v2.12.0
> >>>>           if supported by libbpf.
> >>>>         * Add option to enable, disable and query TCP sequence checking in
> >>>>           conntrack.
> >>>> +   - DPDK:
> >>>> +     * Add hardware offload support for output actions.
> >>>>
> >>>>    v2.12.0 - 03 Sep 2019
> >>>>    ---------------------
> >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
> >>>> index dbbc45e99..6e7efb315 100644
> >>>> --- a/lib/netdev-offload-dpdk-flow.c
> >>>> +++ b/lib/netdev-offload-dpdk-flow.c
> >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
> >>>>            } else {
> >>>>                ds_put_cstr(s, "  RSS = null\n");
> >>>>            }
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> >>>> +        const struct rte_flow_action_count *count = actions->conf;
> >>>> +
> >>>> +        ds_put_cstr(s, "rte flow count action:\n");
> >>>> +        if (count) {
> >>>> +            ds_put_format(s,
> >>>> +                          "  Count: shared=%d, id=%d\n",
> >>>> +                          count->shared, count->id);
> >>>> +        } else {
> >>>> +            ds_put_cstr(s, "  Count = null\n");
> >>>> +        }
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> >>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> >>>> +
> >>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
> >>>> +        if (port_id) {
> >>>> +            ds_put_format(s,
> >>>> +                          "  Port-id: original=%d, id=%d\n",
> >>>> +                          port_id->original, port_id->id);
> >>>> +        } else {
> >>>> +            ds_put_cstr(s, "  Port-id = null\n");
> >>>> +        }
> >>>>        } else {
> >>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>>>        }
> >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
> >>>>        return 0;
> >>>>    }
> >>>>
> >>>> +static void
> >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> >>>> +{
> >>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> >>>> +
> >>>> +    count->shared = 0;
> >>>> +    /* Each flow has a single count action, so no need of id */
> >>>> +    count->id = 0;
> >>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> >>>> +}
> >>>> +
> >>>> +static void
> >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> >>>> +                                    struct netdev *outdev)
> >>>> +{
> >>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> >>>> +
> >>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> >>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> >>>> +}
> >>>> +
> >>>> +static int
> >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> >>>> +                                   const struct nlattr *nla,
> >>>> +                                   struct offload_info *info)
> >>>> +{
> >>>> +    struct netdev *outdev;
> >>>> +    odp_port_t port;
> >>>> +
> >>>> +    port = nl_attr_get_odp_port(nla);
> >>>> +    outdev = netdev_ports_get(port, info->dpif_class);
> >>>> +    if (outdev == NULL) {
> >>>> +        VLOG_DBG_RL(&error_rl,
> >>>> +                    "Cannot find netdev for odp port %d", port);
> >>>> +        return -1;
> >>>> +    }
> >>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> >>>> +        VLOG_DBG_RL(&error_rl,
> >>>> +                    "Output to %s cannot be offloaded",
> >>>> +                    netdev_get_name(outdev));
> >>>> +        return -1;
> >>>> +    }
> >>>> +
> >>>> +    netdev_dpdk_flow_add_count_action(actions);
> >>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> >>>> +    netdev_close(outdev);
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>>    int
> >>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
> >>>>                                 struct nlattr *nl_actions,
> >>>>                                 size_t nl_actions_len,
> >>>> -                             struct offload_info *info OVS_UNUSED)
> >>>> +                             struct offload_info *info)
> >>>>    {
> >>>>        struct nlattr *nla;
> >>>>        size_t left;
> >>>>
> >>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> >>>> -        VLOG_DBG_RL(&error_rl,
> >>>> -                    "Unsupported action type %d", nl_attr_type(nla));
> >>>> -        return -1;
> >>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> >>>> +            left <= NLA_ALIGN(nla->nla_len)) {
> >>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> >>>> +                return -1;
> >>>> +            }
> >>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
> >>> decide whether the device is capable of full-offload, since it assumes
> >>> that the output action is supported if netdev_dpdk type is
> >>> DPDK_DEV_ETH.
> >>>
> >>> There are a couple of issues with this approach:
> >>>
> >>> 1. For a device that is not capable of full-offload, it results in 2
> >>> calls to the PMD for every flow-offload request. The first attempt for
> >>> full-offload fails, followed by another call for partial-offload.
> >> We cannot avoid this, as we want to do full offload, and only fallback
> >> to partial if it doesn't work.
> >>> 2. Even for devices that support full-offload, the offload might fail
> >>> if the in-port and out-port are not in the same switch-domain. For
> >>> example, if only partial-offload is configured (vhost-user ports) in
> >>> the vSwitch, then the PMD should check if the in-port and out-port are
> >>> on the same switch (domain-id must match) and fail the request
> >>> otherwise. This check is important to alert the user of
> >>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
> >>> have to do this check and that duplication is not desirable.
> >> I don't think it's in the OVS scope to do such checks, but it should be
> >> in the driver's level (PMD), the same way it is in the kernel offloading
> >> using TC.
> >>
> > IMO, it is not out-of-scope for OVS to add these checks to make better
> > offload decisions:
> > - This layer of OVS (netdev-offload-dpdk) is already offload-aware and
> > it is using a set of offload APIs (rte_flow)
> It is aware of the offload APIs, but not on specific HW capabilities, so
> in some cases
>
> trying to make such restrictions won't cover all use cases in best case and result in
> redundant code.
>
> For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with
> embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but
> we can in ConnectX-6.

OVS will only do the first level validation that the 2 ports belong to
the same eSwitch, as per the switchdev architecture. If there are
additional chip specific restrictions (i.e, something not compliant
with switchdev spec), those of course could be validated and rejected
by the driver.  I'm not suggesting that the driver shouldn't have such
chip-specific additional checks, but the first level check should be
done by OVS.

>
> It is not something exposed at all to the user, specifically to OVS.
>
> So, I think those kind of decisions that involve HW specifics should be done in the
> layer that has the HW knowledge, e.g. - the driver.
>
> > - Full offload may not be the preferred configuration for a customer
> > (since it requires additional config like SRIOV). That is, the code
> > should not make the distinction that full-offload is always preferred
> > and partial-offload is only a fallback option.
> At the moment, any kind of offload requires SRIOV. BTW, to resolve this,
> after full
>
> offload is accepted, we plan to resubmit vDPA (see
> https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for
> virtio as well.

But vDPA requires more infrastructure changes that may not be needed
for a simple vhost-user configuration and it shouldn't stop us from
supporting additional actions for vhost-user.

Thanks,
-Harsha

>
> > - Without such checks, offload rate of partial-offload configuration
> > could be potentially impacted with a large number of flows.
> I don't think failing in the PMD level will be much slower than failing
> in OVS level.
> > - Similar improvement (checking/logging in OVS as opposed to
> > individual drivers) could be made in TC kernel offloads too
> In order to add such checks, we can maybe think in the future of a
> capability
>
> querying mechanism, to query each device (at init time, maybe using
> rte_flow_validate) and make decisions based on that.
>
> However, I think this mechanism is neither a simple one, nor should be a
> prerequisite for this patch-set.
>
> >
> >> I agree that the argument of logging is important, but this is something
> >> that I think out of scope of this series. All prints in DPDK go to
> >> /dev/null, and not logged into OVS' log.
> >>
> >> BTW, in kernel, there is a similar issue with drivers that logs using
> >> extack. Those messages are not dumped to dmesg, and OVS also doesn't
> >> read them to its log. Also out of scope of this series...
> >>
> >>> Here's how this could be addressed, by identifying whether the device
> >>> is capable of full-offload or partial-offload.
> >>>
> >>> 1. Check if the target device (in-port) is full-offload capable. This
> >>> could be done by checking if rte_flow api is supported, along with a
> >>> valid switch-domain-id. We could even improve this further, similar to
> >>> the kernel switchdev framework in which the device mode is
> >>> configurable (legacy/eswitch modes) and available to consumers, but
> >>> this might require some more DPDK framework changes.
> >> I think we can enhance to check if both are ETH. However, As I wrote
> >> above, I think checking other HW properties should not be in OVS scope,
> >> but in driver's domain.
> >>> 2. If the device is full-offload capable, and if output action is
> >>> specified,  check if in-port and out-port are on the same switch
> >>> (switch-domain-id match mentioned above).
> >>>
> >>> If both 1 and 2 succeed, then the device is capable of full-offload
> >>> and we can proceed with full-offload item/action setup, followed by
> >>> rte_flow_create().
> >>>
> >>> Otherwise, we fallback to partial-offload code path. Here, we can
> >>> handle the case where a vhost-user interface is either the in-port or
> >>> out-port of the flow being offloaded. Depending on whether the
> >>> vhost-user interface is the in-port or out-port, if the other port
> >>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
> >>> offload the flow to that device. This would help us build further
> >>> extensions to the existing partial-offload framework by adding more
> >>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
> >>> the output action that still needs to be processed in software.
> >>>
> >>> Thanks,
> >>> -Harsha
> >>>
> >>>> +        } else {
> >>>> +            VLOG_DBG_RL(&error_rl,
> >>>> +                        "Unsupported action type %d", nl_attr_type(nla));
> >>>> +            return -1;
> >>>> +        }
> >>>>        }
> >>>>
> >>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >>>> --
> >>>> 2.14.5
> >>>>
> >>>> _______________________________________________
> >>>> dev mailing list
> >>>> dev at openvswitch.org
> >>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&reserved=0


More information about the dev mailing list