[ovs-dev] [PATCH v14 7/7] netdev-offload-tc: Add offload support for sFlow

Chris Mi cmi at nvidia.com
Fri Sep 10 11:02:48 UTC 2021


On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>
> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>
>     Create a unique group ID to map the sFlow info when offloading sFlow
>     action to TC. When showing the offloaded datapath flows, translate
>     the
>     group ID from TC sample action to sFlow info using the mapping.
>
> See comments below and this completes my review of the v14. See the 
> other email on the update of the flows going wrong.
>
I'm struggling with the test these days and make little progress. I 
usually use sflowtool to receive the sFlow packets.
After investigation, I found the test uses 'ovstest test-sflow'. If I 
use it manually, it works both for offload and none-offload.
But if I integrate it in the new test, it only works for non-offload. It 
can't receive sFlow packets for offload.
I don't know what is missing. So I plan to address v14 comments next 
week and ignore the test now. Maybe I can find something
unusual later on.  And thanks for the full review on v14.

-Chris
>
> I’ll do a more thorough review on v15 assuming the issue gets fixed, 
> especially the sgid allocation/re-use/free.
>
>     Signed-off-by: Chris Mi <cmi at nvidia.com>
>     Reviewed-by: Eli Britstein <elibr at nvidia.com>
>     ---
>     NEWS | 1 +
>     lib/netdev-offload-tc.c | 212
>     +++++++++++++++++++++++++++++++++++++---
>     lib/tc.c | 61 +++++++++++-
>     lib/tc.h | 15 ++-
>     4 files changed, 272 insertions(+), 17 deletions(-)
>
>     diff --git a/NEWS b/NEWS
>     index 6cdccc715..7c0361e18 100644
>     --- a/NEWS
>     +++ b/NEWS
>     @@ -50,6 +50,7 @@ Post-v2.15.0
>     - OVS now reports the datapath capability 'ct_zero_snat', which
>     reflects
>     whether the SNAT with all-zero IP address is supported.
>     See ovs-vswitchd.conf.db(5) for details.
>     + - Add sFlow offload support for kernel (netlink) datapath.
>
>     v2.15.0 - 15 Feb 2021
>     diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>     index 2f16cf279..71e51394f 100644
>     --- a/lib/netdev-offload-tc.c
>     +++ b/lib/netdev-offload-tc.c
>     @@ -20,6 +20,7 @@
>     #include <linux/if_ether.h>
>
>     #include "dpif.h"
>     +#include "dpif-offload-provider.h"
>     #include "hash.h"
>     #include "openvswitch/hmap.h"
>     #include "openvswitch/match.h"
>     @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>     *flower,
>     action = flower->actions;
>     for (i = 0; i < flower->action_count; i++, action++) {
>     switch (action->type) {
>     + case TC_ACT_SAMPLE: {
>     + const struct sgid_node *node;
>     +
>     + node = sgid_find(action->sample.group_id);
>     + if (!node) {
>     + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>     + __func__, action->sample.group_id);
>
> This should probably be a warning, as this could happen if the DP has 
> not yet been synced? i.e. revalidator being in the process of cleanup 
> and someone requesting dp content?
>
>     + return ENOENT;
>     + }
>     + nl_msg_put(buf, node->sflow.action,
>     + node->sflow.action->nla_len);
>     + }
>     + break;
>     case TC_ACT_VLAN_POP: {
>     nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>     }
>     @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>     tc_flower *flower, struct match *match)
>     }
>     }
>
>     +static int
>     +parse_userspace_attributes(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>     + struct user_action_cookie *cookie;
>     +
>     + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>     + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>
> Here I think the userdata type should be changed to "struct nlattr * " 
> just like in the struct dpif_upcall.
> This should probably be done in the patch introducing the 
> dpif_sflow_attr, so it will also be aligned with the actions member.
>
>     + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>     + sflow_attr->userdata_len = nl_attr_get_size(nla);
>     + return 0;
>     + }
>     + }
>     + }
>     +
>     + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>     sFlow",
>     + __func__);
>     + return EOPNOTSUPP;
>     +}
>     +
>     +static int
>     +parse_sample_actions_attribute(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int err = EINVAL;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_attributes(nla, sflow_attr);
>     + } else {
>     + /* We can't offload other nested actions */
>     + VLOG_DBG_RL(&rl, "%s: can only offload "
>     + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (err) {
>     + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>     attribute",
>     + __func__);
>     + }
>     + return err;
>     +}
>     +
>     +static int
>     +parse_sample_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *sample_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int ret = EINVAL;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + sflow_attr.action = sample_action;
>     +
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>     + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>     + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>     + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>     + } else {
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (!tc_action->sample.rate || ret) {
>     + return EINVAL;
>     + }
>     +
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->sample.group_id = *group_id;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     +static int
>     +parse_userspace_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *userspace_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + int err;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>     + * userspace action, but no sample attribute. That means we can
>     + * only offload userspace actions for sFlow.
>     + */
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>     + if (err) {
>     + return err;
>     + }
>     + sflow_attr.action = userspace_action;
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.group_id = *group_id;
>     + tc_action->sample.rate = 1;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     static int
>     netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>     struct nlattr *actions, size_t actions_len,
>     @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     const struct flow_tnl *tnl_mask = &mask->tunnel;
>     struct tc_action *action;
>     bool recirc_act = false;
>     + uint32_t sample_gid = 0;
>     uint32_t block_id = 0;
>     struct nlattr *nla;
>     struct tcf_id id;
>     @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>     if (flower.action_count >= TCA_ACT_MAX_NUM) {
>     VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     action = &flower.actions[flower.action_count];
>     if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>     @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     if (!outdev) {
>     VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>     - return ENODEV;
>     + err = ENODEV;
>     + goto out;
>     }
>     action->out.ifindex_out = netdev_get_ifindex(outdev);
>     action->out.ingress = is_internal_port(netdev_get_type(outdev));
>     @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_set_action(&flower, action, set, set_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     if (action->type == TC_ACT_ENCAP) {
>     action->encap.tp_dst = info->tp_dst_port;
>     @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     err = parse_put_flow_set_masked_action(&flower, action, set,
>     set_len, true);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>     const struct nlattr *ct = nl_attr_get(nla);
>     @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>     action->type = TC_ACT_CT;
>     @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     action->chain = 0; /* 0 is reserved and not used by recirc. */
>     flower.action_count++;
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>     - struct dpif_sflow_attr sflow_attr;
>     -
>     - memset(&sflow_attr, 0, sizeof sflow_attr);
>     - sgid_alloc_ctx(&sflow_attr);
>     + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>     + ufid);
>     + if (err) {
>     + goto out;
>     + }
>     + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_action(&flower, action, nla, tnl,
>     + &sample_gid, ufid);
>     + if (err) {
>     + goto out;
>     + }
>     } else {
>     VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>     nl_attr_type(nla));
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     }
>
>     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>     VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>
>     if (get_ufid_tc_mapping(ufid, &id) == 0) {
>     @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     prio = get_prio_for_tc_flower(&flower);
>     if (prio == 0) {
>     VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>     - return ENOSPC;
>     + err = ENOSPC;
>     + goto out;
>     }
>
>     flower.act_cookie.data = ufid;
>     flower.act_cookie.len = sizeof *ufid;
>
>     block_id = get_block_id_from_netdev(netdev);
>     - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>     + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>     sample_gid);
>     err = tc_replace_flower(&id, &flower);
>     if (!err) {
>     if (stats) {
>     memset(stats, 0, sizeof *stats);
>     }
>     add_ufid_tc_mapping(netdev, ufid, &id);
>     + } else {
>     + goto out;
>     + }
>
> Guess this will look nicer:
>
> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); 
> } add_ufid_tc_mapping(netdev, ufid, &id); return 0; |
>
>     +
>     + return 0;
>     +
>     +out:
>     + if (sample_gid) {
>     + sgid_free(sample_gid);
>     }
>
>     return err;
>     diff --git a/lib/tc.c b/lib/tc.c
>     index 33287ea05..c5788220f 100644
>     --- a/lib/tc.c
>     +++ b/lib/tc.c
>     @@ -23,14 +23,15 @@
>     #include <linux/if_packet.h>
>     #include <linux/rtnetlink.h>
>     #include <linux/tc_act/tc_csum.h>
>     +#include <linux/tc_act/tc_ct.h>
>     #include <linux/tc_act/tc_gact.h>
>     #include <linux/tc_act/tc_mirred.h>
>     #include <linux/tc_act/tc_mpls.h>
>     #include <linux/tc_act/tc_pedit.h>
>     +#include <linux/tc_act/tc_sample.h>
>     #include <linux/tc_act/tc_skbedit.h>
>     #include <linux/tc_act/tc_tunnel_key.h>
>     #include <linux/tc_act/tc_vlan.h>
>     -#include <linux/tc_act/tc_ct.h>
>     #include <linux/gen_stats.h>
>     #include <net/if.h>
>     #include <unistd.h>
>     @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>     struct tc_flower *flower)
>     return 0;
>     }
>
>     +static const struct nl_policy sample_policy[] = {
>     + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>     + .min_len = sizeof(struct tc_sample),
>     + .optional = false, },
>     + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>     + .optional = false, },
>     + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>     + .optional = false, },
>     +};
>     +
>     +static int
>     +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>     *flower)
>     +{
>     + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>     + struct tc_action *action;
>     +
>     + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>     + ARRAY_SIZE(sample_policy))) {
>     + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>     + return EPROTO;
>     + }
>     +
>     + action = &flower->actions[flower->action_count++];
>     + action->type = TC_ACT_SAMPLE;
>     + action->sample.group_id =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>     + action->sample.rate =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>     +
>     + return 0;
>     +}
>     +
>     static const struct nl_policy mirred_policy[] = {
>     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>     .min_len = sizeof(struct tc_mirred),
>     @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>     *action, struct tc_flower *flower,
>     /* Added for TC rule only (not in OvS rule) so ignore. */
>     } else if (!strcmp(act_kind, "ct")) {
>     nl_parse_act_ct(act_options, flower);
>     + } else if (!strcmp(act_kind, "sample")) {
>     + nl_parse_act_sample(act_options, flower);
>     } else {
>     VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>     err = EINVAL;
>     @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>     *request, int ifindex, int action,
>     nl_msg_end_nested(request, offset);
>     }
>
>     +static void
>     +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>     uint32_t group_id)
>     +{
>     + size_t offset;
>     +
>     + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>     + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>     NLA_F_NESTED);
>     + {
>     + struct tc_sample parm = { .action = TC_ACT_PIPE };
>     +
>     + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>     + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>     + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>     + }
>     + nl_msg_end_nested(request, offset);
>     +}
>     +
>     static inline void
>     nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>     if (ck->len) {
>     @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>     *request, struct tc_flower *flower)
>     nl_msg_end_nested(request, act_offset);
>     }
>     break;
>     + case TC_ACT_SAMPLE: {
>     + act_offset = nl_msg_start_nested(request, act_index++);
>     + nl_msg_put_act_sample(request, action->sample.rate,
>     + action->sample.group_id);
>     + nl_msg_end_nested(request, act_offset);
>     + }
>     + break;
>     case TC_ACT_OUTPUT: {
>     if (!released && flower->tunnel) {
>     act_offset = nl_msg_start_nested(request, act_index++);
>     diff --git a/lib/tc.h b/lib/tc.h
>     index 2e4084f48..f764d7d1e 100644
>     --- a/lib/tc.h
>     +++ b/lib/tc.h
>     @@ -174,6 +174,7 @@ enum tc_action_type {
>     TC_ACT_MPLS_SET,
>     TC_ACT_GOTO,
>     TC_ACT_CT,
>     + TC_ACT_SAMPLE,
>     };
>
>     enum nat_type {
>     @@ -256,6 +257,11 @@ struct tc_action {
>     bool force;
>     bool commit;
>     } ct;
>     +
>     + struct {
>     + uint32_t rate;
>     + uint32_t group_id;
>     + } sample;
>     };
>
>     enum tc_action_type type;
>     @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>     block_id, uint16_t prio,
>     }
>
>     static inline struct tcf_id
>     -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>     - uint16_t prio, enum tc_qdisc_hook hook)
>     +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>     + uint16_t prio, enum tc_qdisc_hook hook,
>     + uint32_t sample_group_id)
>     {
>     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>
>     id.chain = chain;
>     + id.sample_group_id = sample_group_id;
>
>     return id;
>     }
>     @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>     *id2)
>     && id1->hook == id2->hook
>     && id1->block_id == id2->block_id
>     && id1->ifindex == id2->ifindex
>     - && id1->chain == id2->chain;
>     + && id1->chain == id2->chain
>     + && id1->sample_group_id == id2->sample_group_id;
>     }
>
>     enum tc_offload_policy {
>     -- 
>     2.21.0
>



More information about the dev mailing list