[ovs-dev] [PATCH v9 11/11] netdev-offload-tc: Add offload support for sFlow

Eelco Chaudron echaudro at redhat.com
Tue Jan 12 20:27:55 UTC 2021



On 15 Dec 2020, at 4:38, Chris Mi wrote:

> Create a unique group ID to map the sFlow info when offloading sFlow
> action to TC. When showing the offloaded datapath flows, translate the
> group ID from TC sample action to sFlow info using the mapping.
>
> Signed-off-by: Chris Mi <cmi at nvidia.com>
> Reviewed-by: Eli Britstein <elibr at nvidia.com>
> ---
>  lib/netdev-offload-tc.c | 171 
> +++++++++++++++++++++++++++++++++++++---
>  lib/tc.c                |  61 +++++++++++++-
>  lib/tc.h                |   9 ++-
>  3 files changed, 226 insertions(+), 15 deletions(-)
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index e5bf719a4..0da16feed 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -988,6 +988,18 @@ parse_tc_flower_to_match(struct tc_flower 
> *flower,
>          action = flower->actions;
>          for (i = 0; i < flower->action_count; i++, action++) {
>              switch (action->type) {
> +            case TC_ACT_SAMPLE: {
> +                const struct gid_node *node;
> +
> +                node = gid_find(action->sample.action_group_id);
> +                if (!node) {
> +                    VLOG_ERR_RL(&error_rl, "gid node is NULL, gid: 
> %d",
> +                                action->sample.action_group_id);

Think this should be a warning as the gid can be deleted, but the tc dp 
might have not been fully synced.

> +                    return ENOENT;
> +                }
> +                nl_msg_put(buf, node->sflow.sflow, 
> node->sflow.sflow->nla_len);
> +            }
> +            break;
>              case TC_ACT_VLAN_POP: {
>                  nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>              }
> @@ -1667,6 +1679,119 @@ flower_match_to_tun_opt(struct tc_flower 
> *flower, const struct flow_tnl *tnl,
>      flower->mask.tunnel.metadata.present.len = 
> tnl->metadata.present.len;
>  }
>
> +static int
> +parse_sflow_cookie(const struct nlattr *actions,
> +                   struct dpif_sflow_attr *sflow_attr)

Your not parsing an sflow cookie, but the userspace_attributes to 
extract the userspace cookie, so the name should be 
parse_userspace_attributes()

> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    const struct nlattr *nla;
> +    unsigned int left;
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +        if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
> +            struct user_action_cookie *cookie;
> +
> +            cookie = CONST_CAST(struct user_action_cookie *, 
> nl_attr_get(nla));
> +            if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
> +                sflow_attr->userdata = CONST_CAST(void *, 
> nl_attr_get(nla));
> +                sflow_attr->userdata_len = nl_attr_get_size(nla);
> +                return 0;
> +            }				 +        }

I assume that any other OVS_USERSPACE_ATTR_* attribute present will not 
withhold offloading the sflow part.

> +    }
> +
> +    VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than 
> sFlow",
> +                __func__);
> +    return EOPNOTSUPP;
> +}
> +
> +static int
> +parse_sample_action_userspace(const struct nlattr *actions,
> +                              struct dpif_sflow_attr *sflow_attr)

Rename this to parse_sample_actions_attribute() as you are parsing a 
list of actions, not a single action! Also, the code should be like 
this, because if any other attributes exist you can not offload them. 
See also comments on earlier versions:

1708 static int
1709 parse_sample_actions_attribute(const struct nlattr *actions,
1710                                struct dpif_sflow_attr *sflow_attr)
1711 {
1712     const struct nlattr *nla;
1713     unsigned int left;
1714     int err = EINVAL;
1715
1716     NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
1717         if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
1718             err = parse_userspace_attributes(nla, sflow_attr);
1719         } else {
1720             /* We can't offload other nested actions */
1721             VLOG_DBG_RL(&error_rl,
1722                         "%s: can only offload 
OVS_ACTION_ATTR_USERSPACE attribute",
1723                         __func__);
1724             return EINVAL;
1725         }
1726     }
1727
1728     if (err) {
1729         VLOG_DBG_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
attribute",
1730                     __func__);
1731     }
1732     return err;
1733 }

> +{
> +    const struct nlattr *nla;
> +    unsigned int left;
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> +            return parse_sflow_cookie(nla, sflow_attr);
> +        }
> +    }
> +
> +    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
> attribute",
> +                __func__);
> +    return EINVAL;
> +}
> +
> +static int
> +parse_sample_action(struct tc_flower *flower, struct tc_action 
> *tc_action,
> +                    const struct nlattr *sample_action,
> +                    const struct flow_tnl *tnl, uint32_t *group_id)
> +{
> +    struct dpif_sflow_attr sflow_attr;
> +    const struct nlattr *nla;
> +    unsigned int left;
> +    int ret = EINVAL;
> +
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    sflow_attr.sflow = sample_action;
> +
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
> +        if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
> +            ret = parse_sample_action_userspace(nla, &sflow_attr);

Make this parse_sample_actions_attribute(nla, &sflow_attr) as you are 
not parsing userspace, but a list of actions!

> +        } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) 
> {
> +            tc_action->type = TC_ACT_SAMPLE;
> +            tc_action->sample.action_rate = UINT32_MAX / 
> nl_attr_get_u32(nla);
> +        } else {
> +            return EINVAL;
> +        }
> +    }
> +
> +    if (!tc_action->sample.action_rate || ret) {
> +        return EINVAL;
> +    }
> +
> +    *group_id = gid_alloc_ctx(&sflow_attr);

Add error check, as I think gid_alloc_ctx() can fail.

> +    tc_action->sample.action_group_id = *group_id;
> +    flower->action_count++;
> +
> +    return 0;
> +}
> +
> +static int
> +parse_userspace_action(struct tc_flower *flower, struct tc_action 
> *tc_action,
> +                       const struct nlattr *sample_action,

sample_action points not to sample_action attribute, but the userspace 
action, so please name it userspace_action

> +                       const struct flow_tnl *tnl, uint32_t 
> *group_id)
> +{
> +    struct dpif_sflow_attr sflow_attr;
> +    int err;
> +
> +    /* If sampling rate is 1, there is only a sFlow cookie inside of 
> a
> +     * userspace action, but no sample attribute. That means we can
> +     * only offload userspace actions for sFlow.
> +     */
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
> +    err = parse_sflow_cookie(sample_action, &sflow_attr);

parse_userspace_attributes()

> +    if (err) {
> +        return err;
> +    }
> +    sflow_attr.sflow = sample_action;
> +    *group_id = gid_alloc_ctx(&sflow_attr);

Add error check, as I think gid_alloc_ctx() can fail.

> +    tc_action->type = TC_ACT_SAMPLE;
> +    tc_action->sample.action_group_id = *group_id;
> +    tc_action->sample.action_rate = 1;
> +    flower->action_count++;
> +
> +    return 0;
> +}
> +
>  static int
>  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>                     struct nlattr *actions, size_t actions_len,
> @@ -1683,6 +1808,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>      struct tc_action *action;
>      bool recirc_act = false;
>      uint32_t block_id = 0; +    uint32_t group_id = 0;

Maybe call it sample_gid ? so we know it's used for sampling?

>      struct nlattr *nla;
>      struct tcf_id id;
>      uint32_t chain;
> @@ -1972,7 +2098,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>      NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>          if (flower.action_count >= TCA_ACT_MAX_NUM) {
>              VLOG_DBG_RL(&rl, "Can only support %d actions", 
> TCA_ACT_MAX_NUM);
> -            return EOPNOTSUPP;
> +            err = EOPNOTSUPP;
> +            goto out;
>          }
>          action = &flower.actions[flower.action_count];
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
> @@ -1982,7 +2109,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              if (!outdev) {
>                  VLOG_DBG_RL(&rl, "Can't find netdev for output port 
> %d", port);
> -                return ENODEV;
> +                err = ENODEV;
> +                goto out;
>              }
>              action->out.ifindex_out = netdev_get_ifindex(outdev);
>              action->out.ingress = 
> is_internal_port(netdev_get_type(outdev));
> @@ -2020,7 +2148,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              err = parse_put_flow_set_action(&flower, action, set, 
> set_len);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>              if (action->type == TC_ACT_ENCAP) {
>                  action->encap.tp_dst = info->tp_dst_port;
> @@ -2033,7 +2161,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>              err = parse_put_flow_set_masked_action(&flower, action, 
> set,
>                                                     set_len, true);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>              const struct nlattr *ct = nl_attr_get(nla);
> @@ -2041,7 +2169,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              err = parse_put_flow_ct_action(&flower, action, ct, 
> ct_len);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>              action->type = TC_ACT_CT;
> @@ -2057,20 +2185,27 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>              action->chain = 0;  /* 0 is reserved and not used by 
> recirc. */
>              flower.action_count++;
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
> -            struct dpif_sflow_attr sflow_attr;
> -
> -            memset(&sflow_attr, 0, sizeof sflow_attr);
> -            gid_alloc_ctx(&sflow_attr);
> +            err = parse_sample_action(&flower, action, nla, tnl, 
> &group_id);
> +            if (err) {
> +                goto out;
> +            }

For this case and the case below we should probably error out if the 
group_id is already used?
 From the command line you can probably add a dpath rule with two sample 
actions.

> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> +            err = parse_userspace_action(&flower, action, nla, tnl, 
> &group_id);
> +            if (err) {
> +                goto out;
> +            }
>          } else {
>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>                          nl_attr_type(nla));
> -            return EOPNOTSUPP;
> +            err = EOPNOTSUPP;
> +            goto out;
>          }
>      }
>
>      if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>          VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not 
> supported");
> -        return EOPNOTSUPP;
> +        err = EOPNOTSUPP;
> +        goto out;
>      }
>
>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
> @@ -2082,20 +2217,30 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>      prio = get_prio_for_tc_flower(&flower);
>      if (prio == 0) {
>          VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", 
> ovs_strerror(ENOSPC));
> -        return ENOSPC;
> +        err = ENOSPC;
> +        goto out;
>      }
>
>      flower.act_cookie.data = ufid;
>      flower.act_cookie.len = sizeof *ufid;
>
>      block_id = get_block_id_from_netdev(netdev);
> -    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
> +    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook, 
> group_id);
>      err = tc_replace_flower(&id, &flower);
>      if (!err) {
>          if (stats) {
>              memset(stats, 0, sizeof *stats);
>          }
>          add_ufid_tc_mapping(netdev, ufid, &id);
> +    } else {
> +        goto out;
> +    }
> +
> +    return 0;
> +
> +out:
> +    if (group_id) {
> +        gid_free(group_id);
>      }
>
>      return err;
> diff --git a/lib/tc.c b/lib/tc.c
> index c2de78bfe..dd67ad6b0 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -23,14 +23,15 @@
>  #include <linux/if_packet.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/tc_act/tc_csum.h>
> +#include <linux/tc_act/tc_ct.h>
>  #include <linux/tc_act/tc_gact.h>
>  #include <linux/tc_act/tc_mirred.h>
>  #include <linux/tc_act/tc_mpls.h>
>  #include <linux/tc_act/tc_pedit.h>
> +#include <linux/tc_act/tc_sample.h>
>  #include <linux/tc_act/tc_skbedit.h>
>  #include <linux/tc_act/tc_tunnel_key.h>
>  #include <linux/tc_act/tc_vlan.h>
> -#include <linux/tc_act/tc_ct.h>
>  #include <linux/gen_stats.h>
>  #include <net/if.h>
>  #include <unistd.h>
> @@ -1291,6 +1292,38 @@ nl_parse_act_gact(struct nlattr *options, 
> struct tc_flower *flower)
>      return 0;
>  }
>
> +static const struct nl_policy sample_policy[] = {
> +    [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
> +                           .min_len = sizeof(struct tc_sample),
> +                           .optional = false, },
> +    [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
> +                                   .optional = false, },
> +    [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
> +                          .optional = false, },
> +};
> +
> +static int
> +nl_parse_act_sample(struct nlattr *options, struct tc_flower *flower)
> +{
> +    struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
> +    struct tc_action *action;
> +
> +    if (!nl_parse_nested(options, sample_policy, sample_attrs,
> +                         ARRAY_SIZE(sample_policy))) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse sample action 
> options");
> +        return EPROTO;
> +    }
> +
> +    action = &flower->actions[flower->action_count++];
> +    action->type = TC_ACT_SAMPLE;
> +    action->sample.action_group_id =
> +        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
> +    action->sample.action_rate =
> +        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
> +
> +    return 0;
> +}
> +
>  static const struct nl_policy mirred_policy[] = {
>      [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>                             .min_len = sizeof(struct tc_mirred),
> @@ -1699,6 +1732,8 @@ nl_parse_single_action(struct nlattr *action, 
> struct tc_flower *flower,
>          /* Added for TC rule only (not in OvS rule) so ignore. */
>      } else if (!strcmp(act_kind, "ct")) {
>          nl_parse_act_ct(act_options, flower);
> +    } else if (!strcmp(act_kind, "sample")) {
> +        nl_parse_act_sample(act_options, flower);
>      } else {
>          VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", 
> act_kind);
>          err = EINVAL;
> @@ -2294,6 +2329,23 @@ nl_msg_put_act_mirred(struct ofpbuf *request, 
> int ifindex, int action,
>      nl_msg_end_nested(request, offset);
>  }
>
> +static void
> +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate, uint32_t 
> group_id)
> +{
> +    size_t offset;
> +
> +    nl_msg_put_string(request, TCA_ACT_KIND, "sample");
> +    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | 
> NLA_F_NESTED);
> +    {
> +        struct tc_sample parm = { .action = TC_ACT_PIPE };
> +
> +        nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof 
> parm);
> +        nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
> +        nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
> +    }
> +    nl_msg_end_nested(request, offset);
> +}
> +
>  static inline void
>  nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>      if (ck->len) {
> @@ -2553,6 +2605,13 @@ nl_msg_put_flower_acts(struct ofpbuf *request, 
> struct tc_flower *flower)
>                  nl_msg_end_nested(request, act_offset);
>              }
>              break;
> +            case TC_ACT_SAMPLE: {
> +                act_offset = nl_msg_start_nested(request, 
> act_index++);
> +                nl_msg_put_act_sample(request, 
> action->sample.action_rate,
> +                                      
> action->sample.action_group_id);
> +                nl_msg_end_nested(request, act_offset);
> +            }
> +            break;
>              case TC_ACT_OUTPUT: {
>                  if (!released && flower->tunnel) {
>                      act_offset = nl_msg_start_nested(request, 
> act_index++);
> diff --git a/lib/tc.h b/lib/tc.h
> index cc2ad025d..143e225d1 100644
> --- a/lib/tc.h
> +++ b/lib/tc.h
> @@ -171,6 +171,7 @@ enum tc_action_type {
>      TC_ACT_MPLS_SET,
>      TC_ACT_GOTO,
>      TC_ACT_CT,
> +    TC_ACT_SAMPLE,
>  };
>
>  enum nat_type {
> @@ -253,6 +254,11 @@ struct tc_action {
>              bool force;
>              bool commit;
>          } ct;
> +
> +        struct {
> +            uint32_t action_rate;
> +            uint32_t action_group_id;
> +        } sample;
>       };
>
>       enum tc_action_type type;
> @@ -292,11 +298,12 @@ tc_make_tcf_id(int ifindex, uint32_t block_id, 
> uint16_t prio,
>
>  static inline struct tcf_id
>  tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
> -                     uint16_t prio, enum tc_qdisc_hook hook)
> +                     uint16_t prio, enum tc_qdisc_hook hook, uint32_t 
> group_id)
>  {
>      struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>
>      id.chain = chain;
> +    id.sflow_group_id = group_id;

I don't think overloading the tcf_id with the group ID for cleanup makes 
sense, or maybe it does, but then we should make it clear, that "struct 
tcf_id " is now also used to track flow state.
If we do this, rather than keep adding parameters to tc_make_tcf_id(),
we might want to set the optional fields in the structure ptr returned 
by it.

Also, I think the group_id should be called sample_group_id, as we might 
use this sample for other use cases (read dec_ttl). Talking about 
multiple use cases, how to handle multiple sample_group_ids in a single 
rule with this API?

>      return id;
>  }
> -- 
> 2.26.2

Guess this concludes my v9 review…


More information about the dev mailing list