[ovs-dev] [PATCH v14 7/7] netdev-offload-tc: Add offload support for sFlow
Eelco Chaudron
echaudro at redhat.com
Mon Sep 13 09:18:06 UTC 2021
On 10 Sep 2021, at 13:02, Chris Mi wrote:
> On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>>
>> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>>
>> Create a unique group ID to map the sFlow info when offloading sFlow
>> action to TC. When showing the offloaded datapath flows, translate
>> the
>> group ID from TC sample action to sFlow info using the mapping.
>>
>> See comments below and this completes my review of the v14. See the other email on the update of the flows going wrong.
>>
> I'm struggling with the test these days and make little progress. I usually use sflowtool to receive the sFlow packets.
> After investigation, I found the test uses 'ovstest test-sflow'. If I use it manually, it works both for offload and none-offload.
> But if I integrate it in the new test, it only works for non-offload. It can't receive sFlow packets for offload.
> I don't know what is missing.
The test framework behaves oddly sometimes. I always add a lot of debugging stuff to make sure I know what is running, and/or add a long sleep before the error, so I have time to debug the running setup.
> So I plan to address v14 comments next week and ignore the test now. Maybe I can find something
> unusual later on. And thanks for the full review on v14.
>
> -Chris
>>
>> I’ll do a more thorough review on v15 assuming the issue gets fixed, especially the sgid allocation/re-use/free.
>>
>> Signed-off-by: Chris Mi <cmi at nvidia.com>
>> Reviewed-by: Eli Britstein <elibr at nvidia.com>
>> ---
>> NEWS | 1 +
>> lib/netdev-offload-tc.c | 212
>> +++++++++++++++++++++++++++++++++++++---
>> lib/tc.c | 61 +++++++++++-
>> lib/tc.h | 15 ++-
>> 4 files changed, 272 insertions(+), 17 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 6cdccc715..7c0361e18 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -50,6 +50,7 @@ Post-v2.15.0
>> - OVS now reports the datapath capability 'ct_zero_snat', which
>> reflects
>> whether the SNAT with all-zero IP address is supported.
>> See ovs-vswitchd.conf.db(5) for details.
>> + - Add sFlow offload support for kernel (netlink) datapath.
>>
>> v2.15.0 - 15 Feb 2021
>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>> index 2f16cf279..71e51394f 100644
>> --- a/lib/netdev-offload-tc.c
>> +++ b/lib/netdev-offload-tc.c
>> @@ -20,6 +20,7 @@
>> #include <linux/if_ether.h>
>>
>> #include "dpif.h"
>> +#include "dpif-offload-provider.h"
>> #include "hash.h"
>> #include "openvswitch/hmap.h"
>> #include "openvswitch/match.h"
>> @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>> *flower,
>> action = flower->actions;
>> for (i = 0; i < flower->action_count; i++, action++) {
>> switch (action->type) {
>> + case TC_ACT_SAMPLE: {
>> + const struct sgid_node *node;
>> +
>> + node = sgid_find(action->sample.group_id);
>> + if (!node) {
>> + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>> + __func__, action->sample.group_id);
>>
>> This should probably be a warning, as this could happen if the DP has not yet been synced? i.e. revalidator being in the process of cleanup and someone requesting dp content?
>>
>> + return ENOENT;
>> + }
>> + nl_msg_put(buf, node->sflow.action,
>> + node->sflow.action->nla_len);
>> + }
>> + break;
>> case TC_ACT_VLAN_POP: {
>> nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>> }
>> @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>> tc_flower *flower, struct match *match)
>> }
>> }
>>
>> +static int
>> +parse_userspace_attributes(const struct nlattr *actions,
>> + struct dpif_sflow_attr *sflow_attr)
>> +{
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>> + const struct nlattr *nla;
>> + unsigned int left;
>> +
>> + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>> + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>> + struct user_action_cookie *cookie;
>> +
>> + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>> + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>>
>> Here I think the userdata type should be changed to "struct nlattr * " just like in the struct dpif_upcall.
>> This should probably be done in the patch introducing the dpif_sflow_attr, so it will also be aligned with the actions member.
>>
>> + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>> + sflow_attr->userdata_len = nl_attr_get_size(nla);
>> + return 0;
>> + }
>> + }
>> + }
>> +
>> + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>> sFlow",
>> + __func__);
>> + return EOPNOTSUPP;
>> +}
>> +
>> +static int
>> +parse_sample_actions_attribute(const struct nlattr *actions,
>> + struct dpif_sflow_attr *sflow_attr)
>> +{
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>> + const struct nlattr *nla;
>> + unsigned int left;
>> + int err = EINVAL;
>> +
>> + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>> + err = parse_userspace_attributes(nla, sflow_attr);
>> + } else {
>> + /* We can't offload other nested actions */
>> + VLOG_DBG_RL(&rl, "%s: can only offload "
>> + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>> + return EINVAL;
>> + }
>> + }
>> +
>> + if (err) {
>> + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>> attribute",
>> + __func__);
>> + }
>> + return err;
>> +}
>> +
>> +static int
>> +parse_sample_action(struct tc_flower *flower, struct tc_action
>> *tc_action,
>> + const struct nlattr *sample_action,
>> + const struct flow_tnl *tnl, uint32_t *group_id,
>> + const ovs_u128 *ufid)
>> +{
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>> + struct dpif_sflow_attr sflow_attr;
>> + const struct nlattr *nla;
>> + unsigned int left;
>> + int ret = EINVAL;
>> +
>> + if (*group_id) {
>> + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>> + "per flow is supported", __func__);
>> + return EOPNOTSUPP;
>> + }
>> +
>> + memset(&sflow_attr, 0, sizeof sflow_attr);
>> + sflow_attr.ufid = *ufid;
>> + sflow_attr.action = sample_action;
>> +
>> + if (flower->tunnel) {
>> + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>> + }
>> +
>> + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>> + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>> + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>> + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>> + tc_action->type = TC_ACT_SAMPLE;
>> + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>> + } else {
>> + return EINVAL;
>> + }
>> + }
>> +
>> + if (!tc_action->sample.rate || ret) {
>> + return EINVAL;
>> + }
>> +
>> + *group_id = sgid_alloc_ctx(&sflow_attr);
>> + if (!*group_id) {
>> + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>> action",
>> + __func__);
>> + return ENOENT;
>> + }
>> + tc_action->sample.group_id = *group_id;
>> + flower->action_count++;
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +parse_userspace_action(struct tc_flower *flower, struct tc_action
>> *tc_action,
>> + const struct nlattr *userspace_action,
>> + const struct flow_tnl *tnl, uint32_t *group_id,
>> + const ovs_u128 *ufid)
>> +{
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>> + struct dpif_sflow_attr sflow_attr;
>> + int err;
>> +
>> + if (*group_id) {
>> + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>> + "per flow is supported", __func__);
>> + return EOPNOTSUPP;
>> + }
>> +
>> + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>> + * userspace action, but no sample attribute. That means we can
>> + * only offload userspace actions for sFlow.
>> + */
>> + memset(&sflow_attr, 0, sizeof sflow_attr);
>> + sflow_attr.ufid = *ufid;
>> + if (flower->tunnel) {
>> + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>> + }
>> + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>> + if (err) {
>> + return err;
>> + }
>> + sflow_attr.action = userspace_action;
>> + *group_id = sgid_alloc_ctx(&sflow_attr);
>> + if (!*group_id) {
>> + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>> action",
>> + __func__);
>> + return ENOENT;
>> + }
>> + tc_action->type = TC_ACT_SAMPLE;
>> + tc_action->sample.group_id = *group_id;
>> + tc_action->sample.rate = 1;
>> + flower->action_count++;
>> +
>> + return 0;
>> +}
>> +
>> static int
>> netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>> struct nlattr *actions, size_t actions_len,
>> @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>> const struct flow_tnl *tnl_mask = &mask->tunnel;
>> struct tc_action *action;
>> bool recirc_act = false;
>> + uint32_t sample_gid = 0;
>> uint32_t block_id = 0;
>> struct nlattr *nla;
>> struct tcf_id id;
>> @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>> NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>> if (flower.action_count >= TCA_ACT_MAX_NUM) {
>> VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>> - return EOPNOTSUPP;
>> + err = EOPNOTSUPP;
>> + goto out;
>> }
>> action = &flower.actions[flower.action_count];
>> if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>> @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>>
>> if (!outdev) {
>> VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>> - return ENODEV;
>> + err = ENODEV;
>> + goto out;
>> }
>> action->out.ifindex_out = netdev_get_ifindex(outdev);
>> action->out.ingress = is_internal_port(netdev_get_type(outdev));
>> @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>>
>> err = parse_put_flow_set_action(&flower, action, set, set_len);
>> if (err) {
>> - return err;
>> + goto out;
>> }
>> if (action->type == TC_ACT_ENCAP) {
>> action->encap.tp_dst = info->tp_dst_port;
>> @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>> err = parse_put_flow_set_masked_action(&flower, action, set,
>> set_len, true);
>> if (err) {
>> - return err;
>> + goto out;
>> }
>> } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>> const struct nlattr *ct = nl_attr_get(nla);
>> @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>>
>> err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>> if (err) {
>> - return err;
>> + goto out;
>> }
>> } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>> action->type = TC_ACT_CT;
>> @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>> action->chain = 0; /* 0 is reserved and not used by recirc. */
>> flower.action_count++;
>> } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>> - struct dpif_sflow_attr sflow_attr;
>> -
>> - memset(&sflow_attr, 0, sizeof sflow_attr);
>> - sgid_alloc_ctx(&sflow_attr);
>> + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>> + ufid);
>> + if (err) {
>> + goto out;
>> + }
>> + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>> + err = parse_userspace_action(&flower, action, nla, tnl,
>> + &sample_gid, ufid);
>> + if (err) {
>> + goto out;
>> + }
>> } else {
>> VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>> nl_attr_type(nla));
>> - return EOPNOTSUPP;
>> + err = EOPNOTSUPP;
>> + goto out;
>> }
>> }
>>
>> if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>> VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>> - return EOPNOTSUPP;
>> + err = EOPNOTSUPP;
>> + goto out;
>> }
>>
>> if (get_ufid_tc_mapping(ufid, &id) == 0) {
>> @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>> struct match *match,
>> prio = get_prio_for_tc_flower(&flower);
>> if (prio == 0) {
>> VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>> - return ENOSPC;
>> + err = ENOSPC;
>> + goto out;
>> }
>>
>> flower.act_cookie.data = ufid;
>> flower.act_cookie.len = sizeof *ufid;
>>
>> block_id = get_block_id_from_netdev(netdev);
>> - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>> + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>> sample_gid);
>> err = tc_replace_flower(&id, &flower);
>> if (!err) {
>> if (stats) {
>> memset(stats, 0, sizeof *stats);
>> }
>> add_ufid_tc_mapping(netdev, ufid, &id);
>> + } else {
>> + goto out;
>> + }
>>
>> Guess this will look nicer:
>>
>> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); } add_ufid_tc_mapping(netdev, ufid, &id); return 0; |
>>
>> +
>> + return 0;
>> +
>> +out:
>> + if (sample_gid) {
>> + sgid_free(sample_gid);
>> }
>>
>> return err;
>> diff --git a/lib/tc.c b/lib/tc.c
>> index 33287ea05..c5788220f 100644
>> --- a/lib/tc.c
>> +++ b/lib/tc.c
>> @@ -23,14 +23,15 @@
>> #include <linux/if_packet.h>
>> #include <linux/rtnetlink.h>
>> #include <linux/tc_act/tc_csum.h>
>> +#include <linux/tc_act/tc_ct.h>
>> #include <linux/tc_act/tc_gact.h>
>> #include <linux/tc_act/tc_mirred.h>
>> #include <linux/tc_act/tc_mpls.h>
>> #include <linux/tc_act/tc_pedit.h>
>> +#include <linux/tc_act/tc_sample.h>
>> #include <linux/tc_act/tc_skbedit.h>
>> #include <linux/tc_act/tc_tunnel_key.h>
>> #include <linux/tc_act/tc_vlan.h>
>> -#include <linux/tc_act/tc_ct.h>
>> #include <linux/gen_stats.h>
>> #include <net/if.h>
>> #include <unistd.h>
>> @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>> struct tc_flower *flower)
>> return 0;
>> }
>>
>> +static const struct nl_policy sample_policy[] = {
>> + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>> + .min_len = sizeof(struct tc_sample),
>> + .optional = false, },
>> + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>> + .optional = false, },
>> + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>> + .optional = false, },
>> +};
>> +
>> +static int
>> +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>> *flower)
>> +{
>> + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>> + struct tc_action *action;
>> +
>> + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>> + ARRAY_SIZE(sample_policy))) {
>> + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>> + return EPROTO;
>> + }
>> +
>> + action = &flower->actions[flower->action_count++];
>> + action->type = TC_ACT_SAMPLE;
>> + action->sample.group_id =
>> + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>> + action->sample.rate =
>> + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>> +
>> + return 0;
>> +}
>> +
>> static const struct nl_policy mirred_policy[] = {
>> [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>> .min_len = sizeof(struct tc_mirred),
>> @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>> *action, struct tc_flower *flower,
>> /* Added for TC rule only (not in OvS rule) so ignore. */
>> } else if (!strcmp(act_kind, "ct")) {
>> nl_parse_act_ct(act_options, flower);
>> + } else if (!strcmp(act_kind, "sample")) {
>> + nl_parse_act_sample(act_options, flower);
>> } else {
>> VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>> err = EINVAL;
>> @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>> *request, int ifindex, int action,
>> nl_msg_end_nested(request, offset);
>> }
>>
>> +static void
>> +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>> uint32_t group_id)
>> +{
>> + size_t offset;
>> +
>> + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>> + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>> NLA_F_NESTED);
>> + {
>> + struct tc_sample parm = { .action = TC_ACT_PIPE };
>> +
>> + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>> + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>> + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>> + }
>> + nl_msg_end_nested(request, offset);
>> +}
>> +
>> static inline void
>> nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>> if (ck->len) {
>> @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>> *request, struct tc_flower *flower)
>> nl_msg_end_nested(request, act_offset);
>> }
>> break;
>> + case TC_ACT_SAMPLE: {
>> + act_offset = nl_msg_start_nested(request, act_index++);
>> + nl_msg_put_act_sample(request, action->sample.rate,
>> + action->sample.group_id);
>> + nl_msg_end_nested(request, act_offset);
>> + }
>> + break;
>> case TC_ACT_OUTPUT: {
>> if (!released && flower->tunnel) {
>> act_offset = nl_msg_start_nested(request, act_index++);
>> diff --git a/lib/tc.h b/lib/tc.h
>> index 2e4084f48..f764d7d1e 100644
>> --- a/lib/tc.h
>> +++ b/lib/tc.h
>> @@ -174,6 +174,7 @@ enum tc_action_type {
>> TC_ACT_MPLS_SET,
>> TC_ACT_GOTO,
>> TC_ACT_CT,
>> + TC_ACT_SAMPLE,
>> };
>>
>> enum nat_type {
>> @@ -256,6 +257,11 @@ struct tc_action {
>> bool force;
>> bool commit;
>> } ct;
>> +
>> + struct {
>> + uint32_t rate;
>> + uint32_t group_id;
>> + } sample;
>> };
>>
>> enum tc_action_type type;
>> @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>> block_id, uint16_t prio,
>> }
>>
>> static inline struct tcf_id
>> -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>> - uint16_t prio, enum tc_qdisc_hook hook)
>> +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>> + uint16_t prio, enum tc_qdisc_hook hook,
>> + uint32_t sample_group_id)
>> {
>> struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>>
>> id.chain = chain;
>> + id.sample_group_id = sample_group_id;
>>
>> return id;
>> }
>> @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>> *id2)
>> && id1->hook == id2->hook
>> && id1->block_id == id2->block_id
>> && id1->ifindex == id2->ifindex
>> - && id1->chain == id2->chain;
>> + && id1->chain == id2->chain
>> + && id1->sample_group_id == id2->sample_group_id;
>> }
>>
>> enum tc_offload_policy {
>> --
>> 2.21.0
>>
More information about the dev
mailing list