[ovs-dev] [PATCH v15 4/7] netdev-offload-tc: Introduce group ID management API

Chris Mi cmi at nvidia.com
Tue Oct 12 06:12:18 UTC 2021


On 10/1/2021 6:20 PM, Eelco Chaudron wrote:
> One nit, rest looks good. Thanks for removing the expired list handling, as it looks way cleaner now!
>
> //Eelco
>
> On 15 Sep 2021, at 14:43, Chris Mi wrote:
>
>> When offloading sample action to TC, userspace creates a unique ID
>> to map sFlow action and tunnel info and passes this ID to kernel instead
>> of the sFlow info. psample will send this ID and sampled packet to
>> userspace. Using the ID, userspace can recover the sFlow info and send
>> sampled packet to the right sFlow monitoring host.
>>
>> Signed-off-by: Chris Mi <cmi at nvidia.com>
>> Reviewed-by: Eli Britstein <elibr at nvidia.com>
>> ---
>>   lib/dpif-offload-provider.h |  17 +++
>>   lib/netdev-offload-tc.c     | 247 +++++++++++++++++++++++++++++++++++-
>>   lib/netdev-offload.h        |   1 +
>>   lib/tc.h                    |   1 +
>>   4 files changed, 260 insertions(+), 6 deletions(-)
>>
>> diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
>> index 75238c4cb..95c29c0d8 100644
>> --- a/lib/dpif-offload-provider.h
>> +++ b/lib/dpif-offload-provider.h
>> @@ -17,9 +17,26 @@
>>   #ifndef DPIF_OFFLOAD_PROVIDER_H
>>   #define DPIF_OFFLOAD_PROVIDER_H
>>
>> +#include "netlink-protocol.h"
>> +#include "openvswitch/packets.h"
>> +#include "openvswitch/types.h"
>> +
>>   struct dpif;
>>   struct dpif_offload_sflow;
>>
>> +/* When offloading sample action, userspace creates a unique ID to map
>> + * sFlow action and tunnel info and passes this ID to datapath instead
>> + * of the sFlow info. Datapath will send this ID and sampled packet to
>> + * userspace. Using the ID, userspace can recover the sFlow info and send
>> + * sampled packet to the right sFlow monitoring host.
>> + */
>> +struct dpif_sflow_attr {
>> +    const struct nlattr *action;    /* SFlow action. */
>> +    const struct nlattr *userdata;  /* Struct user_action_cookie. */
>> +    struct flow_tnl *tunnel;        /* Tunnel info. */
>> +    ovs_u128 ufid;                  /* Flow ufid. */
>> +};
>> +
>>   /* Datapath interface offload structure, to be defined by each implementation
>>    * of a datapath interface.
>>    */
>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>> index 9845e8d3f..5d817b6cf 100644
>> --- a/lib/netdev-offload-tc.c
>> +++ b/lib/netdev-offload-tc.c
>> @@ -40,6 +40,7 @@
>>   #include "unaligned.h"
>>   #include "util.h"
>>   #include "dpif-provider.h"
>> +#include "cmap.h"
>>
>>   VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);
>>
>> @@ -62,6 +63,234 @@ struct chain_node {
>>       uint32_t chain;
>>   };
>>
>> +/* This maps a psample group ID to struct dpif_sflow_attr for sFlow */
>> +struct sgid_node {
>> +    struct ovs_list exp_node OVS_GUARDED;
>> +    struct cmap_node metadata_node;
>> +    struct cmap_node id_node;
>> +    struct ovs_refcount refcount;
>> +    uint32_t hash;
>> +    uint32_t id;
>> +    const struct dpif_sflow_attr sflow;
>> +};
>> +
>> +static struct ovs_rwlock sgid_rwlock = OVS_RWLOCK_INITIALIZER;
>> +static struct cmap sgid_map = CMAP_INITIALIZER;
>> +static struct cmap sgid_metadata_map = CMAP_INITIALIZER;
>> +static struct ovs_list sgid_expiring OVS_GUARDED_BY(sgid_rwlock)
>> +    = OVS_LIST_INITIALIZER(&sgid_expiring);
>> +static uint32_t next_sample_group_id OVS_GUARDED_BY(sgid_rwlock) = 1;
>> +
>> +static void
>> +sgid_node_free(struct sgid_node *node)
>> +{
>> +    free(node->sflow.tunnel);
>> +    free(CONST_CAST(void *, node->sflow.action));
>> +    free(CONST_CAST(void *, node->sflow.userdata));
>> +    free(node);
>> +}
>> +
>> +/* Lockless RCU protected lookup.  If node is needed accross RCU quiescent
>> + * state, caller should copy the contents. */
>> +static const struct sgid_node *
>> +sgid_find(uint32_t id)
>> +{
>> +    const struct cmap_node *node = cmap_find(&sgid_map, id);
>> +
>> +    return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL;
>> +}
>> +
>> +const struct dpif_sflow_attr *
>> +dpif_offload_sflow_attr_find(uint32_t id)
>> +{
>> +    const struct sgid_node *node;
>> +
>> +    node = sgid_find(id);
>> +    if (!node) {
>> +        return NULL;
>> +    }
>> +
>> +    return &node->sflow;
>> +}
>> +
>> +static uint32_t
>> +dpif_sflow_attr_hash(const struct dpif_sflow_attr *sflow)
>> +{
>> +    return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0);
>> +}
>> +
>> +static bool
>> +dpif_sflow_attr_equal(const struct dpif_sflow_attr *a,
>> +                      const struct dpif_sflow_attr *b)
>> +{
>> +    if (!ovs_u128_equals(a->ufid, b->ufid)) {
>> +        return false;
>> +    }
>> +    if (!a->action || !b->action) {
>> +        return false;
>> +    }
>> +    if (a->action->nla_len != b->action->nla_len
>> +        || memcmp(a->action, b->action, a->action->nla_len)) {
>> +        return false;
>> +    }
>> +    if (!a->tunnel && !b->tunnel) {
>> +        return true;
>> +    }
>> +    if (!a->tunnel || !b->tunnel) {
>> +        return false;
>> +    }
>> +
>> +    return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel);
>> +}
>> +
>> +/* Lockless RCU protected lookup.  If node is needed accross RCU quiescent
>> + * state, caller should take a reference. */
>> +static struct sgid_node *
>> +sgid_find_equal(const struct dpif_sflow_attr *target, uint32_t hash)
>> +{
>> +    struct sgid_node *node;
>> +
>> +    CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) {
>> +        if (dpif_sflow_attr_equal(&node->sflow, target)) {
>> +            return node;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static struct sgid_node *
>> +sgid_ref_equal(const struct dpif_sflow_attr *target, uint32_t hash)
>> +{
>> +    struct sgid_node *node;
>> +
>> +    do {
>> +        node = sgid_find_equal(target, hash);
>> +        /* Try again if the node was released before we get the reference. */
>> +    } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
>> +
>> +    return node;
>> +}
>> +
>> +static void
>> +dpif_sflow_attr_clone(struct dpif_sflow_attr *new,
>> +                      const struct dpif_sflow_attr *old)
>> +{
>> +    new->action = xmemdup(old->action, old->action->nla_len);
>> +    new->userdata = xmemdup(old->userdata, old->userdata->nla_len);
>> +    new->tunnel = old->tunnel
>> +                  ? xmemdup(old->tunnel, sizeof *old->tunnel)
>> +                  : NULL;
>> +    new->ufid = old->ufid;
>> +}
>> +
>> +/* We use the id as the hash value, which works due to cmap internal rehashing.
>> + * We also only insert nodes with unique IDs, so all possible hash collisions
>> + * remain internal to the cmap. */
>> +static struct sgid_node *
>> +sgid_find__(uint32_t id)
>> +    OVS_REQUIRES(sgid_rwlock)
>> +{
>> +    struct cmap_node *node = cmap_find_protected(&sgid_map, id);
>> +
>> +    return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
>> +}
>> +
>> +/* Allocate a unique group id for the given set of flow metadata. The id
>> + * space is 2^^32, but if looping too many times, we still can't find a
>> + * free one, that means something is wrong, return NULL.
>> + */
>> +#define SGID_ALLOC_RETRY_MAX 8192
>> +static struct sgid_node *
>> +sgid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash)
>> +{
>> +    struct sgid_node *node = xzalloc(sizeof *node);
>> +    int i = 0;
>> +
>> +    node->hash = hash;
>> +    ovs_refcount_init(&node->refcount);
>> +    dpif_sflow_attr_clone(CONST_CAST(struct dpif_sflow_attr *,
>> +                                     &node->sflow), sflow);
>> +
>> +    ovs_rwlock_wrlock(&sgid_rwlock);
>> +    for (;;) {
>> +        node->id = next_sample_group_id++;
>> +        if (OVS_UNLIKELY(!node->id)) {
>> +            next_sample_group_id = 1;
>> +            node->id = next_sample_group_id++;
>> +        }
>> +        /* Find if the id is free. */
>> +        if (OVS_LIKELY(!sgid_find__(node->id))) {
>> +            break;
>> +        }
>> +        if (++i == SGID_ALLOC_RETRY_MAX) {
>> +            VLOG_ERR("%s: Can't find a free group ID after %d retries",
>> +                     __func__, i);
>> +            ovs_rwlock_unlock(&sgid_rwlock);
>> +            sgid_node_free(node);
>> +            return NULL;
>> +        }
>> +    }
>> +    cmap_insert(&sgid_map, &node->id_node, node->id);
>> +    cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash);
>> +    ovs_rwlock_unlock(&sgid_rwlock);
>> +    return node;
>> +}
>> +
>> +/* Allocate a unique group id for the given set of flow metadata and
>> +   optional actions. */
>> +static uint32_t
>> +sgid_alloc_ctx(const struct dpif_sflow_attr *sflow)
>> +{
>> +    uint32_t hash = dpif_sflow_attr_hash(sflow);
>> +    struct sgid_node *node = sgid_ref_equal(sflow, hash);
>> +
>> +    if (!node) {
>> +        node = sgid_alloc__(sflow, hash);
>> +    }
>> +
>> +    return node ? node->id : 0;
>> +}
>> +
>> +static void
>> +sgid_node_unref(const struct sgid_node *node_)
>> +    OVS_EXCLUDED(sgid_rwlock)
>> +{
>> +    struct sgid_node *node = CONST_CAST(struct sgid_node *, node_);
>> +
>> +    ovs_rwlock_wrlock(&sgid_rwlock);
>> +    if (node && ovs_refcount_unref(&node->refcount) == 1) {
>> +        cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash);
>> +        cmap_remove(&sgid_map, &node->id_node, node->id);
>> +        ovsrcu_postpone(sgid_node_free, node);
>> +    }
>> +    ovs_rwlock_unlock(&sgid_rwlock);
>> +}
>> +
>> +static void
>> +sgid_free(uint32_t id)
>> +{
>> +    const struct sgid_node *node;
>> +
>> +    if (!id) {
>> +        return;
>> +    }
>> +
>> +    node = sgid_find(id);
>> +    if (node) {
>> +        sgid_node_unref(node);
>> +    } else {
>> +        VLOG_ERR("%s: Freeing nonexistent group ID: %"PRIu32, __func__, id);
>> +    }
>> +}
>> +
>> +static int
>> +del_filter_and_sgid(struct tcf_id *id)
>> +{
>> +    sgid_free(id->sample_group_id);
>> +    id->sample_group_id = 0;
>> +    return tc_del_filter(id);
>> +}
>> +
>>   static bool
>>   is_internal_port(const char *type)
>>   {
>> @@ -198,13 +427,13 @@ del_ufid_tc_mapping(const ovs_u128 *ufid)
>>       ovs_mutex_unlock(&ufid_lock);
>>   }
>>
>> -/* Wrapper function to delete filter and ufid tc mapping */
>> +/* Wrapper function to delete filter, sgid and ufid tc mapping */
>>   static int
>> -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
>> +del_filter_sgid_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
>>   {
>>       int err;
>>
>> -    err = tc_del_filter(id);
>> +    err = del_filter_and_sgid(id);
>>       if (!err) {
>>           del_ufid_tc_mapping(ufid);
>>       }
>> @@ -426,7 +655,7 @@ netdev_tc_flow_flush(struct netdev *netdev)
>>               continue;
>>           }
>>
>> -        err = tc_del_filter(&data->id);
>> +        err = del_filter_and_sgid(&data->id);
>>           if (!err) {
>>               del_ufid_tc_mapping_unlocked(&data->ufid);
>>           }
>> @@ -1917,6 +2146,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>>               action->type = TC_ACT_GOTO;
>>               action->chain = 0;  /* 0 is reserved and not used by recirc. */
>>               flower.action_count++;
>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>> +            struct dpif_sflow_attr sflow_attr;
>> +
>> +            memset(&sflow_attr, 0, sizeof sflow_attr);
>> +            sgid_alloc_ctx(&sflow_attr);
>>           } else {
>>               VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>>                           nl_attr_type(nla));
>> @@ -1932,7 +2166,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>>       if (get_ufid_tc_mapping(ufid, &id) == 0) {
>>           VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
>>                       id.handle, id.prio);
>> -        info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid);
>> +        info->tc_modify_flow_deleted = !del_filter_sgid_ufid_mapping(&id,
>> +                                                                     ufid);
>>       }
>>
>>       prio = get_prio_for_tc_flower(&flower);
>> @@ -2021,7 +2256,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
>>           }
>>       }
>>
>> -    error = del_filter_and_ufid_mapping(&id, ufid);
>> +    error = del_filter_sgid_ufid_mapping(&id, ufid);
>>
>>       return error;
>>   }
>> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
>> index e7fcedae9..d57d70470 100644
>> --- a/lib/netdev-offload.h
>> +++ b/lib/netdev-offload.h
>> @@ -138,6 +138,7 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match,
>>   int netdev_ports_get_n_flows(const char *dpif_type,
>>                                odp_port_t port_no, uint64_t *n_flows);
>>
>> +const struct dpif_sflow_attr *dpif_offload_sflow_attr_find(uint32_t id);
> Please keep the extra new line here.
Done.
>
>>   #ifdef  __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/tc.h b/lib/tc.h
>> index a147ca461..2e4084f48 100644
>> --- a/lib/tc.h
>> +++ b/lib/tc.h
>> @@ -276,6 +276,7 @@ struct tcf_id {
>>       uint32_t chain;
>>       uint16_t prio;
>>       uint32_t handle;
>> +    uint32_t sample_group_id;
>>   };
>>
>>   static inline struct tcf_id
>> -- 
>> 2.27.0



More information about the dev mailing list