[ovs-dev] [PATCH V2 13/14] netdev-offload-dpdk: Support vports flows offload
Eli Britstein
elibr at nvidia.com
Mon Mar 1 11:49:53 UTC 2021
On 3/1/2021 1:30 PM, Sriharsha Basavapatna wrote:
> On Thu, Feb 25, 2021 at 7:51 PM Eli Britstein <elibr at nvidia.com> wrote:
>>
>> On 2/25/2021 9:35 AM, Sriharsha Basavapatna wrote:
>>> On Wed, Feb 24, 2021 at 4:50 PM Eli Britstein <elibr at nvidia.com> wrote:
>>>> On 2/24/2021 12:37 PM, Sriharsha Basavapatna wrote:
>>>>> On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr at nvidia.com> wrote:
>>>>>> Vports are virtual, OVS only logical devices, so rte_flows cannot be
>>>>>> applied as is on them. Instead, apply the rules the physical port from
>>>>>> which the packet has arrived, provided by orig_in_port field.
>>>>>>
>>>>>> Signed-off-by: Eli Britstein <elibr at nvidia.com>
>>>>>> Reviewed-by: Gaetan Rivet <gaetanr at nvidia.com>
>>>>>> ---
>>>>>> lib/netdev-offload-dpdk.c | 169 ++++++++++++++++++++++++++++++--------
>>>>>> 1 file changed, 137 insertions(+), 32 deletions(-)
>>>>>>
>>>>>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>>>>>> index f6e668bff..ad47d717f 100644
>>>>>> --- a/lib/netdev-offload-dpdk.c
>>>>>> +++ b/lib/netdev-offload-dpdk.c
>>>>>> @@ -62,6 +62,7 @@ struct ufid_to_rte_flow_data {
>>>>>> struct rte_flow *rte_flow;
>>>>>> bool actions_offloaded;
>>>>>> struct dpif_flow_stats stats;
>>>>>> + struct netdev *physdev;
>>>>>> };
>>>>>>
>>>>>> /* Find rte_flow with @ufid. */
>>>>>> @@ -87,7 +88,8 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid, bool warn)
>>>>>>
>>>>>> static inline struct ufid_to_rte_flow_data *
>>>>>> ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct netdev *netdev,
>>>>>> - struct rte_flow *rte_flow, bool actions_offloaded)
>>>>>> + struct netdev *vport, struct rte_flow *rte_flow,
>>>>>> + bool actions_offloaded)
>>>>>> {
>>>>>> size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
>>>>>> struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
>>>>>> @@ -105,7 +107,8 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct netdev *netdev,
>>>>>> }
>>>>>>
>>>>>> data->ufid = *ufid;
>>>>>> - data->netdev = netdev_ref(netdev);
>>>>>> + data->netdev = vport ? netdev_ref(vport) : netdev_ref(netdev);
>>>>>> + data->physdev = netdev_ref(netdev);
>>>>> For non-tunnel offloads, we end up getting two references to the same
>>>>> 'netdev'; can we avoid this ? That is, get a reference to physdev only
>>>>> for the vport case.
>>>> I know. This is on purpose. It simplifies other places, for example
>>>> query, to always use physdev, and always close both without any
>>>> additional logic there.
>>> Ok, please add this as an inline comment so we know why it is done
>>> this way. Also, you missed my last comment in this patch (see
>>> VXLAN_DECAP action below).
>> OK.
>>
>> Sorry, see below.
>>
>>>
>>>>>> data->rte_flow = rte_flow;
>>>>>> data->actions_offloaded = actions_offloaded;
>>>>>>
>>>>>> @@ -122,6 +125,7 @@ ufid_to_rte_flow_disassociate(struct ufid_to_rte_flow_data *data)
>>>>>> cmap_remove(&ufid_to_rte_flow,
>>>>>> CONST_CAST(struct cmap_node *, &data->node), hash);
>>>>>> netdev_close(data->netdev);
>>>>>> + netdev_close(data->physdev);
>>>>> Similar comment, release reference to physdev only if we got a
>>>>> reference earlier (i.e., physdev should be non-null only when netdev
>>>>> is a vport).
>>>> Right. As it is written this way, no need for any additional logic here.
>>>>>> ovsrcu_postpone(free, data);
>>>>>> }
>>>>>>
>>>>>> @@ -134,6 +138,8 @@ struct flow_patterns {
>>>>>> struct rte_flow_item *items;
>>>>>> int cnt;
>>>>>> int current_max;
>>>>>> + uint32_t num_of_tnl_items;
>>>>> change to --> num_pmd_tnl_items
>>>> OK.
>>>>>> + struct ds s_tnl;
>>>>>> };
>>>>>>
>>>>>> struct flow_actions {
>>>>>> @@ -154,16 +160,20 @@ struct flow_actions {
>>>>>> static void
>>>>>> dump_flow_attr(struct ds *s, struct ds *s_extra,
>>>>>> const struct rte_flow_attr *attr,
>>>>>> + struct flow_patterns *flow_patterns,
>>>>>> struct flow_actions *flow_actions)
>>>>>> {
>>>>>> if (flow_actions->num_of_tnl_actions) {
>>>>>> ds_clone(s_extra, &flow_actions->s_tnl);
>>>>>> + } else if (flow_patterns->num_of_tnl_items) {
>>>>>> + ds_clone(s_extra, &flow_patterns->s_tnl);
>>>>>> }
>>>>>> - ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
>>>>>> + ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s%s",
>>>>>> attr->ingress ? "ingress " : "",
>>>>>> attr->egress ? "egress " : "", attr->priority, attr->group,
>>>>>> attr->transfer ? "transfer " : "",
>>>>>> - flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
>>>>>> + flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "",
>>>>>> + flow_patterns->num_of_tnl_items ? "tunnel_match 1 " : "");
>>>>>> }
>>>>>>
>>>>>> /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
>>>>>> @@ -177,9 +187,18 @@ dump_flow_attr(struct ds *s, struct ds *s_extra,
>>>>>> }
>>>>>>
>>>>>> static void
>>>>>> -dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
>>>>>> +dump_flow_pattern(struct ds *s,
>>>>>> + struct flow_patterns *flow_patterns,
>>>>>> + int pattern_index)
>>>>>> {
>>>>>> - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
>>>>>> + const struct rte_flow_item *item = &flow_patterns->items[pattern_index];
>>>>>> +
>>>>>> + if (item->type == RTE_FLOW_ITEM_TYPE_END) {
>>>>>> + ds_put_cstr(s, "end ");
>>>>>> + } else if (flow_patterns->num_of_tnl_items &&
>>>>>> + pattern_index < flow_patterns->num_of_tnl_items) {
>>>>>> + return;
>>>>>> + } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
>>>>>> const struct rte_flow_item_eth *eth_spec = item->spec;
>>>>>> const struct rte_flow_item_eth *eth_mask = item->mask;
>>>>>>
>>>>>> @@ -569,19 +588,19 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
>>>>>> static struct ds *
>>>>>> dump_flow(struct ds *s, struct ds *s_extra,
>>>>>> const struct rte_flow_attr *attr,
>>>>>> - const struct rte_flow_item *items,
>>>>>> + struct flow_patterns *flow_patterns,
>>>>>> struct flow_actions *flow_actions)
>>>>>> {
>>>>>> int i;
>>>>>>
>>>>>> if (attr) {
>>>>>> - dump_flow_attr(s, s_extra, attr, flow_actions);
>>>>>> + dump_flow_attr(s, s_extra, attr, flow_patterns, flow_actions);
>>>>>> }
>>>>>> ds_put_cstr(s, "pattern ");
>>>>>> - while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
>>>>>> - dump_flow_pattern(s, items++);
>>>>>> + for (i = 0; i < flow_patterns->cnt; i++) {
>>>>>> + dump_flow_pattern(s, flow_patterns, i);
>>>>>> }
>>>>>> - ds_put_cstr(s, "end actions ");
>>>>>> + ds_put_cstr(s, "actions ");
>>>>>> for (i = 0; i < flow_actions->cnt; i++) {
>>>>>> dump_flow_action(s, s_extra, flow_actions, i);
>>>>>> }
>>>>>> @@ -591,11 +610,12 @@ dump_flow(struct ds *s, struct ds *s_extra,
>>>>>> static struct rte_flow *
>>>>>> netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>>> const struct rte_flow_attr *attr,
>>>>>> - const struct rte_flow_item *items,
>>>>>> + struct flow_patterns *flow_patterns,
>>>>>> struct flow_actions *flow_actions,
>>>>>> struct rte_flow_error *error)
>>>>>> {
>>>>>> const struct rte_flow_action *actions = flow_actions->actions;
>>>>>> + const struct rte_flow_item *items = flow_patterns->items;
>>>>>> struct ds s_extra = DS_EMPTY_INITIALIZER;
>>>>>> struct ds s = DS_EMPTY_INITIALIZER;
>>>>>> struct rte_flow *flow;
>>>>>> @@ -604,7 +624,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>>> flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
>>>>>> if (flow) {
>>>>>> if (!VLOG_DROP_DBG(&rl)) {
>>>>>> - dump_flow(&s, &s_extra, attr, items, flow_actions);
>>>>>> + dump_flow(&s, &s_extra, attr, flow_patterns, flow_actions);
>>>>>> extra_str = ds_cstr(&s_extra);
>>>>>> VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
>>>>>> netdev_get_name(netdev), (intptr_t) flow, extra_str,
>>>>>> @@ -619,7 +639,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>>> VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
>>>>>> netdev_get_name(netdev), error->type, error->message);
>>>>>> if (!vlog_should_drop(&this_module, level, &rl)) {
>>>>>> - dump_flow(&s, &s_extra, attr, items, flow_actions);
>>>>>> + dump_flow(&s, &s_extra, attr, flow_patterns, flow_actions);
>>>>>> extra_str = ds_cstr(&s_extra);
>>>>>> VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
>>>>>> netdev_get_name(netdev), extra_str,
>>>>>> @@ -654,6 +674,28 @@ add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
>>>>>> patterns->cnt++;
>>>>>> }
>>>>>>
>>>>>> +static void
>>>>>> +add_flow_tnl_patterns(struct flow_patterns *all_patterns,
>>>>>> + struct rte_flow_item *tnl_items,
>>>>> --> pmd_tnl_items
>>>>>> + uint32_t num_of_tnl_items,
>>>>> --> num_pmd_tnl_items
>>>>>> + struct flow_patterns *flow_patterns)
>>>>>> +{
>>>>>> + int i;
>>>>>> +
>>>>>> + all_patterns->num_of_tnl_items = num_of_tnl_items;
>>>>>> +
>>>>>> + for (i = 0; i < num_of_tnl_items; i++) {
>>>>>> + add_flow_pattern(all_patterns, tnl_items[i].type, tnl_items[i].spec,
>>>>>> + tnl_items[i].mask);
>>>>>> + }
>>>>>> +
>>>>>> + for (i = 0; i < flow_patterns->cnt; i++) {
>>>>>> + add_flow_pattern(all_patterns, flow_patterns->items[i].type,
>>>>>> + flow_patterns->items[i].spec,
>>>>>> + flow_patterns->items[i].mask);
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> static void
>>>>>> add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>>>>>> const void *conf)
>>>>>> @@ -1487,6 +1529,7 @@ parse_flow_actions(struct netdev *netdev,
>>>>>>
>>>>>> static struct ufid_to_rte_flow_data *
>>>>>> create_netdev_offload(struct netdev *netdev,
>>>>>> + struct netdev *vport,
>>>>>> const ovs_u128 *ufid,
>>>>>> struct flow_patterns *flow_patterns,
>>>>>> struct flow_actions *flow_actions,
>>>>>> @@ -1495,32 +1538,34 @@ create_netdev_offload(struct netdev *netdev,
>>>>>> {
>>>>>> struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1, };
>>>>>> struct flow_actions rss_actions = { .actions = NULL, .cnt = 0 };
>>>>>> - struct rte_flow_item *items = flow_patterns->items;
>>>>>> struct ufid_to_rte_flow_data *flow_data = NULL;
>>>>>> bool actions_offloaded = true;
>>>>>> struct rte_flow *flow = NULL;
>>>>>> struct rte_flow_error error;
>>>>>>
>>>>>> if (enable_full) {
>>>>>> - flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, items,
>>>>>> - flow_actions, &error);
>>>>>> + flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr,
>>>>>> + flow_patterns, flow_actions,
>>>>>> + &error);
>>>>>> }
>>>>>>
>>>>>> - if (!flow) {
>>>>>> + if (!vport && !flow) {
>>>>>> /* If we failed to offload the rule actions fallback to MARK+RSS
>>>>>> * actions.
>>>>>> */
>>>>>> actions_offloaded = false;
>>>>>> flow_attr.transfer = 0;
>>>>>> add_flow_mark_rss_actions(&rss_actions, flow_mark, netdev);
>>>>>> - flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, items,
>>>>>> - &rss_actions, &error);
>>>>>> + flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr,
>>>>>> + flow_patterns, &rss_actions,
>>>>>> + &error);
>>>>>> }
>>>>>>
>>>>>> if (flow) {
>>>>>> - flow_data = ufid_to_rte_flow_associate(ufid, netdev, flow,
>>>>>> + flow_data = ufid_to_rte_flow_associate(ufid, netdev, vport, flow,
>>>>>> actions_offloaded);
>>>>>> - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT,
>>>>>> + VLOG_DBG("%s/%s: installed flow %p by ufid "UUID_FMT,
>>>>>> + vport ? netdev_get_name(vport) : netdev_get_name(netdev),
>>>>>> netdev_get_name(netdev), flow,
>>>>>> UUID_ARGS((struct uuid *) ufid));
>>>>>> }
>>>>>> @@ -1529,6 +1574,55 @@ create_netdev_offload(struct netdev *netdev,
>>>>>> return flow_data;
>>>>>> }
>>>>>>
>>>>>> +static struct ufid_to_rte_flow_data *
>>>>>> +create_vport_offload(struct netdev *vport,
>>>>>> + odp_port_t orig_in_port,
>>>>>> + const ovs_u128 *ufid,
>>>>>> + struct flow_patterns *flow_patterns,
>>>>>> + struct flow_actions *flow_actions)
>>>>>> +{
>>>>>> + struct flow_patterns all_patterns = { .items = NULL, .cnt = 0 };
>>>>>> + struct ufid_to_rte_flow_data *flows_data = NULL;
>>>>>> + struct rte_flow_item *tnl_items;
>>>>>> + struct rte_flow_tunnel tunnel;
>>>>>> + struct rte_flow_error error;
>>>>>> + uint32_t num_of_tnl_items;
>>>>>> + struct netdev *physdev;
>>>>>> +
>>>>>> + physdev = netdev_ports_get(orig_in_port, vport->dpif_type);
>>>>>> + if (physdev == NULL) {
>>>>>> + return NULL;
>>>>>> + }
>>>>>> +
>>>>>> + if (vport_to_rte_tunnel(vport, &tunnel, physdev,
>>>>>> + &all_patterns.s_tnl)) {
>>>>>> + goto out;
>>>>>> + }
>>>>>> + if (netdev_dpdk_rte_flow_tunnel_match(physdev, &tunnel, &tnl_items,
>>>>>> + &num_of_tnl_items, &error)) {
>>>>>> + VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_match failed: "
>>>>>> + "%d (%s).", netdev_get_name(physdev), error.type,
>>>>>> + error.message);
>>>>>> + goto out;
>>>>>> + }
>>>>>> + add_flow_tnl_patterns(&all_patterns, tnl_items, num_of_tnl_items,
>>>>>> + flow_patterns);
>>>>>> + flows_data = create_netdev_offload(physdev, vport, ufid, &all_patterns,
>>>>>> + flow_actions, true, 0);
>>>>>> + if (netdev_dpdk_rte_flow_tunnel_item_release(physdev, tnl_items,
>>>>>> + num_of_tnl_items,
>>>>>> + &error)) {
>>>>>> + VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_item_release "
>>>>>> + "failed: %d (%s).", netdev_get_name(physdev),
>>>>>> + error.type, error.message);
>>>>>> + }
>>>>>> +out:
>>>>>> + all_patterns.cnt = 0;
>>>>>> + free_flow_patterns(&all_patterns);
>>>>>> + netdev_close(physdev);
>>>>>> + return flows_data;
>>>>>> +}
>>>>>> +
>>>>>> static struct ufid_to_rte_flow_data *
>>>>>> netdev_offload_dpdk_add_flow(struct netdev *netdev,
>>>>>> struct match *match,
>>>>>> @@ -1550,8 +1644,14 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
>>>>>>
>>>>>> err = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
>>>>> For the vport offload (flow-F2), we should add a VXLAN_DECAP action to
>>>>> the action list. Otherwise, there is no action in either F1 or F2 that
>>>>> tells the PMD to pop the tunnel header.
>> Sorry I missed this comment.
>>
>> The PMD is instructed to decap with the DPDK API
>> rte_flow_tunnel_decap_set, so no need to add another VXLAN_DECAP action.
> rte_flow_tunnel_decap_set() is issued prior to F1 and it provides a
> way for the PMD to add some private actions to F1. But F2 has no
> knowledge of tunnel_decap_set() and also F2 could get offloaded prior
> to F1.
The actions provided by rte_flow_tunnel_decap_set() are part of F1's
actions. As the name of the API ("_decap_"), the PMD is instructed to decap.
Regarding F2, that's right, but there is no decap there either. Please
explain.
>
>
>>>>> if (!strcmp(netdev_get_type(netdev), "vxlan")) {
>>>>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, NULL);
>>>>> }
>>>>>
>>>>> Thanks,
>>>>> -Harsha
>>>>>> - flow_data = create_netdev_offload(netdev, ufid, &patterns, &actions, !err,
>>>>>> - info->flow_mark);
>>>>>> + if (netdev_vport_is_vport_class(netdev->netdev_class)) {
>>>>>> + flow_data = err ? NULL :
>>>>>> + create_vport_offload(netdev, info->orig_in_port, ufid, &patterns,
>>>>>> + &actions);
>>>>>> + } else {
>>>>>> + flow_data = create_netdev_offload(netdev, NULL, ufid, &patterns,
>>>>>> + &actions, !err, info->flow_mark);
>>>>>> + }
>>>>>>
>>>>>> out:
>>>>>> free_flow_patterns(&patterns);
>>>>>> @@ -1564,26 +1664,30 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
>>>>>> {
>>>>>> struct rte_flow_error error;
>>>>>> struct rte_flow *rte_flow;
>>>>>> + struct netdev *physdev;
>>>>>> struct netdev *netdev;
>>>>>> ovs_u128 *ufid;
>>>>>> int ret;
>>>>>>
>>>>>> rte_flow = rte_flow_data->rte_flow;
>>>>>> + physdev = rte_flow_data->physdev;
>>>>>> netdev = rte_flow_data->netdev;
>>>>>> ufid = &rte_flow_data->ufid;
>>>>>>
>>>>>> - ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
>>>>>> + ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
>>>>>>
>>>>>> if (ret == 0) {
>>>>>> ufid_to_rte_flow_disassociate(rte_flow_data);
>>>>>> - VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR
>>>>>> + VLOG_DBG_RL(&rl, "%s/%s: rte_flow 0x%"PRIxPTR
>>>>>> " flow destroy %d ufid " UUID_FMT,
>>>>>> - netdev_get_name(netdev), (intptr_t) rte_flow,
>>>>>> + netdev_get_name(netdev), netdev_get_name(physdev),
>>>>>> + (intptr_t) rte_flow,
>>>>>> netdev_dpdk_get_port_id(netdev),
>>>>>> UUID_ARGS((struct uuid *) ufid));
>>>>>> } else {
>>>>>> - VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT,
>>>>>> - netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev),
>>>>>> + VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
>>>>>> + netdev_get_name(netdev), netdev_get_name(physdev),
>>>>>> + netdev_dpdk_get_port_id(netdev),
>>>>>> UUID_ARGS((struct uuid *) ufid));
>>>>>> }
>>>>>>
>>>>>> @@ -1688,8 +1792,9 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev,
>>>>>> goto out;
>>>>>> }
>>>>>> attrs->dp_layer = "dpdk";
>>>>>> - ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
>>>>>> - &query, &error);
>>>>>> + ret = netdev_dpdk_rte_flow_query_count(rte_flow_data->physdev,
>>>>>> + rte_flow_data->rte_flow, &query,
>>>>>> + &error);
>>>>>> if (ret) {
>>>>>> VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p",
>>>>>> netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
>>>>>> @@ -1713,7 +1818,7 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)
>>>>>> struct ufid_to_rte_flow_data *data;
>>>>>>
>>>>>> CMAP_FOR_EACH (data, node, &ufid_to_rte_flow) {
>>>>>> - if (data->netdev != netdev) {
>>>>>> + if (data->netdev != netdev && data->physdev != netdev) {
>>>>>> continue;
>>>>>> }
>>>>>>
>>>>>> --
>>>>>> 2.28.0.546.g385c171
>>>>>>
More information about the dev
mailing list