[ovs-dev] [RFC v3 PATCH 5/5] dpif-netdev: Support partial-action-offload of VXLAN encap flow

Eli Britstein elibr at mellanox.com
Mon Jun 1 15:48:01 UTC 2020


On 5/28/2020 11:19 AM, Sriharsha Basavapatna wrote:
> In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
> port (aka "partial-action-offload"). At the time of offloading the flow, we
> determine if the flow can be offloaded to an egress device, if the input
> port is not offload capable such as a vhost-user port. We then offload the
> flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
> the OUTPUT RTE action, which indicates to the PMD that is is a partial
> action offload request. Note that since the action is being offloaded in
> egress direction, classification is expected to be done by OVS SW datapath
> and hence there's no need to offload a MARK action.
>
> If offload succeeds, we save the information in 'dp_netdev_flow' so that
> we skip execution of the corresponding action (previous patch) during SW
> datapath processing.
>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
> ---
>   lib/dpif-netdev.c         | 352 ++++++++++++++++++++++++++++++++++++--
>   lib/netdev-offload-dpdk.c |  43 +++--
>   lib/netdev-offload.h      |   2 +
>   3 files changed, 376 insertions(+), 21 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 2383bfd41..48bd5df3b 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2363,10 +2363,300 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
>       ovs_mutex_unlock(&dp_flow_offload.mutex);
>   }
>   
> +/*
> + * Mapping structure to map ufid to a partial offload egress device.
> + * Synchronization: accessed only in the context of the offload thread.
> + */
> +struct ufid_to_egdev_data {
> +    const struct cmap_node node;   /* link to cmap */
> +    ovs_u128 mega_ufid;            /* mega-ufid being mapped */
> +    odp_port_t egress_port_num;    /* Port number mapped to */
> +    struct dp_netdev_flow *flow;   /* flow that maps to this ufid */
> +};
> +
> +/*
> + * A mapping from mega-ufid to partial-offload egress-device.
> + */
> +static struct cmap ufid_to_egdev = CMAP_INITIALIZER;
> +
> +static uint32_t
> +ufid_to_egdev_refcnt(const ovs_u128 *mega_ufid)
> +{
> +    size_t hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
> +    struct ufid_to_egdev_data *data;
> +    uint32_t refcnt = 0;
> +
> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
> +        if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {
> +            refcnt++;
> +        }
> +    }
> +
> +    return refcnt;
> +}
> +
> +/* Find egdev_data @(ufid, flow) */
> +static struct ufid_to_egdev_data *
> +ufid_to_egdev_data_find(const ovs_u128 *ufid,
> +                        const struct dp_netdev_flow *flow)
> +{
> +    size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
> +    struct ufid_to_egdev_data *data;
> +
> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
> +        if (data->flow == flow && ovs_u128_equals(*ufid, data->mega_ufid)) {
> +            return data;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +/* Map the given pair of mega-ufid and flow to the given port. Returns 0
> + * when the mapping is created initially in the context of a flow. For
> + * subsequent calls, if it is a new flow with the same mega-ufid, creates
> + * a mapping entry but returns EEXIST (i.e, at least one other flow with
> + * the same ufid exists in the table). If it is an already mapped mega-ufid
> + * & flow pair, returns EEXIST.
> + */
> +static int
> +map_ufid_to_egdev(const ovs_u128 *mega_ufid,
> +                  const struct dp_netdev_flow *flow,
> +                  odp_port_t egress_port_num)
> +{
> +    struct ufid_to_egdev_data *data;
> +    size_t hash;
> +
> +    data = ufid_to_egdev_data_find(mega_ufid, flow);
> +    if (data) {
> +        /* mapping already exists for the given <mega-ufid,flow> pair */
> +        VLOG_DBG_RL("ufid_to_egdev mapping already exists for flow: %p\n",
> +                    flow);
> +        return EEXIST;
> +    }
> +
> +    data = xzalloc(sizeof *data);
> +    data->mega_ufid = *mega_ufid;
> +    data->egress_port_num = egress_port_num;
> +    data->flow = flow;
> +
> +    hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
> +    cmap_insert(&ufid_to_egdev,
> +                CONST_CAST(struct cmap_node *, &data->node), hash);
> +
> +    if (ufid_to_egdev_refcnt(mega_ufid) > 1) {
> +        /* at least one mapping exists for the mega_ufid */
> +        VLOG_DBG_RL("At least one ufid_to_egdev mapping exists, flow: %p\n",
> +                    flow);
> +        return EEXIST;
> +    }
> +
> +    /* first mapping created for the mega_ufid */
> +    VLOG_DBG_RL("Created the first ufid_to_egdev mapping; flow: %p\n", flow);
> +    return 0;
> +}
> +
> +static uint32_t
> +unmap_ufid_to_egdev(const ovs_u128 *mega_ufid,
> +                    const struct dp_netdev_flow *flow)
> +{
> +    struct ufid_to_egdev_data *data;
> +    uint32_t refcnt;
> +    size_t hash;
> +
> +    data = ufid_to_egdev_data_find(mega_ufid, flow);
> +    ovs_assert(data);
> +
> +    hash = hash_bytes(&data->mega_ufid, sizeof data->mega_ufid, 0);
> +    cmap_remove(&ufid_to_egdev,
> +                CONST_CAST(struct cmap_node *, &data->node), hash);
> +    ovsrcu_postpone(free, data);
> +
> +    refcnt = ufid_to_egdev_refcnt(mega_ufid);
> +    VLOG_DBG_RL("Unmapped ufid_to_egdev: flow: %p, refcnt: %d\n",
> +                flow, refcnt);
> +
> +    return refcnt;
> +}
> +
> +static int
> +partial_offload_egress_flow_del(struct dp_flow_offload_item *offload)
> +{
> +    struct dp_netdev_pmd_thread *pmd = offload->pmd;
> +    struct dp_netdev_flow *flow = offload->flow;
> +    const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
> +    struct netdev *port;
> +    uint32_t refcnt;
> +    int ret;
> +
> +    refcnt = unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
> +    if (refcnt) {
> +        flow->egress_offload_port = NULL;
> +        flow->partial_actions_offloaded = false;
> +        return 0;
> +    }
> +
> +    /* The egress dev is not referenced by any flow with the given ufid.
> +     * We can now remove the partial-action egress-flow from hardware.
> +     */
> +    port = netdev_ports_get(flow->egress_offload_port, dpif_type_str);
> +    if (!port) {
> +        return -1;
> +    }
> +
> +    /* Taking a global 'port_mutex' to fulfill thread safety
> +     * restrictions for the netdev-offload-dpdk module. */
> +    ovs_mutex_lock(&pmd->dp->port_mutex);
> +    ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
> +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> +    netdev_close(port);
> +
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    flow->egress_offload_port = NULL;
> +    flow->partial_actions_offloaded = false;
> +
> +    VLOG_DBG_RL("Deleted partial_offloaded egress flow: %p pmd: %p id: %d\n",
> +                flow, pmd, offload->flow->pmd_id);
> +    return ret;
> +}
> +
>   static int
>   dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
>   {
> -    return mark_to_flow_disassociate(offload->pmd, offload->flow);
> +    if (unlikely(offload->flow->partial_actions_offloaded &&
> +        offload->flow->egress_offload_port != ODPP_NONE)) {
> +        return partial_offload_egress_flow_del(offload);
> +    } else {
> +        return mark_to_flow_disassociate(offload->pmd, offload->flow);
> +    }
> +}
> +
> +/* Structure to hold a nl_parsed OVS action */
> +struct action_attr {
> +    int type;                /* OVS action type */
> +    struct nlattr *action;   /* action attribute */
> +};
> +
> +/*
> + * Maxium number of actions to be parsed while selecting a flow for partial
> + * action offload. This number is currently based on the minimum number of
> + * attributes seen with the tunnel encap action (clone, tunnel_push, output).
> + * This number includes output action to a single egress device (uplink) and
> + * supports neither multiple clone() actions nor multiple output actions.
> + * This number could change if and when we support other actions or
> + * combinations of actions for partial offload.
> + */
> +#define MAX_ACTION_ATTRS    3 /* Max # action attributes supported */
> +
> +/*
> + * This function parses the list of OVS "actions" of length "actions_len",
> + * and returns them in an array of action "attrs", of size "max_attrs".
> + * The parsed number of actions is returned in "num_attrs". If the number
> + * of actions exceeds "max_attrs", parsing is stopped and E2BIG is returned.
> + * Otherwise, returns success (0).
> + */
> +static int
> +parse_nlattr_actions(struct nlattr *actions, size_t actions_len,
> +                     struct action_attr *attrs, int max_attrs, int *num_attrs)
> +{
> +    const struct nlattr *a;
> +    unsigned int left;
> +    int num_actions = 0;
> +    int n_attrs = 0;
> +    int rc = 0;
> +    int type;
> +
> +    *num_attrs = 0;
> +
> +    NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
> +        type = nl_attr_type(a);
> +
> +        if (num_actions >= max_attrs) {
> +            *num_attrs = num_actions;
> +            return E2BIG;
> +        }
> +
> +        attrs[num_actions].type = type;
> +        attrs[num_actions].action = a;
> +        num_actions++;
> +        if (type == OVS_ACTION_ATTR_CLONE) {
> +            rc = parse_nlattr_actions(nl_attr_get(a), nl_attr_get_size(a),
> +                                      &attrs[num_actions],
> +                                      (max_attrs - num_actions), &n_attrs);
> +            num_actions += n_attrs;
> +            if (rc == E2BIG) {
> +                *num_attrs = num_actions;
> +                return rc;
> +            }
> +        }
> +    }
> +
> +    *num_attrs = num_actions;
> +    return 0;
> +}
> +
> +/* This function determines if the given flow should be partially offloaded
> + * on the egress device, when the in-port is not offload-capable like a
> + * vhost-user port. The function currently supports offloading of only
> + * tunnel encap action.
> + */
> +static bool
> +should_partial_offload_egress(struct netdev *in_netdev,
> +                              struct dp_flow_offload_item *offload,
> +                              struct netdev **egress_netdev)
> +{
> +    const char *dpif_type_str =
> +        dpif_normalize_type(offload->pmd->dp->class->type);
> +    struct action_attr attrs[MAX_ACTION_ATTRS];
> +    odp_port_t out_port = ODPP_NONE;
> +    struct netdev *out_netdev;
> +    int num_attrs = 0;
> +    int type;
> +    int rc;
> +
> +    /* Support egress partial-offload only when in-port is vhost-user. */
> +    if (!is_dpdk_vhost_netdev(in_netdev)) {
> +        return false;
> +    }
> +
> +    rc = parse_nlattr_actions(offload->actions, offload->actions_len, attrs,
> +                              MAX_ACTION_ATTRS, &num_attrs);
> +    if (rc == E2BIG) {
> +        /* Action list too big; decline partial offload */
> +        return false;
> +    }
> +
> +    /* Number of attrs expected with tunnel encap action */
> +    if (num_attrs < MAX_ACTION_ATTRS) {
> +        return false;
> +    }
> +
> +    /* Only support clone sub-actions for now, tnl-push specifically. */
> +    if (attrs[0].type != OVS_ACTION_ATTR_CLONE ||
> +        attrs[1].type != OVS_ACTION_ATTR_TUNNEL_PUSH ||
> +        attrs[2].type != OVS_ACTION_ATTR_OUTPUT) {
> +        return false;
> +    }
> +
> +    /* Egress partial-offload needs an output action at the end. */
> +    out_port = nl_attr_get_odp_port(attrs[2].action);
> +    if (out_port == ODPP_NONE) {
> +        return false;
> +    }
> +
> +    /* Support egress partial-offload only when out-port is offload capable. */
> +    out_netdev = netdev_ports_get(out_port, dpif_type_str);
> +    if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
> +        return false;
> +    }
> +
> +    /* Flow can be egress partial-offloaded. */
> +    *egress_netdev = out_netdev;
> +    offload->flow->egress_offload_port = out_port;
> +    return true;
>   }
>   
>   static int
> @@ -2427,7 +2717,9 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>       bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
>       struct offload_info info;
>       struct netdev *port;
> -    uint32_t mark;
> +    struct netdev *egress_port = NULL;
> +    bool alloc_mark = true;
> +    uint32_t mark = INVALID_FLOW_MARK;
>       int ret;
>   
>       if (flow->dead) {
> @@ -2439,11 +2731,36 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>           return -1;
>       }
>   
> -    if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
> -            /* flow already offloaded */
> +    info.attr_egress = 0;
> +    info.partial_actions = 0;
> +
> +    if (unlikely(should_partial_offload_egress(port, offload, &egress_port))) {
> +        if (egress_port) {
> +            netdev_close(port);
> +            port = egress_port;
> +            info.attr_egress = 1;
> +            alloc_mark = false;
> +        }
> +        info.partial_actions = 1;
> +    }
> +
> +    if (unlikely(info.partial_actions && egress_port)) {
> +        if (map_ufid_to_egdev(&flow->mega_ufid, flow,
> +            flow->egress_offload_port) == EEXIST) {
> +            /* Partial action already offloaded for the ufid+egdev */
> +            flow->partial_actions_offloaded = true;
>               netdev_close(port);
> +            VLOG_DBG_RL("Partial offload exists, flow: %p pmd: %p id: %d\n",
> +                        flow, offload->pmd, flow->pmd_id);
>               return 0;
> +        }
> +    } else if (alloc_mark &&
> +               dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
> +            /* flow already offloaded */
> +        netdev_close(port);
> +        return 0;
>       }
> +
>       info.flow_mark = mark;
>   
>       /* Taking a global 'port_mutex' to fulfill thread safety restrictions for
> @@ -2460,17 +2777,29 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>           goto err_free;
>       }
>   
> -    if (!modification) {
> +    if (unlikely(info.partial_actions && egress_port)) {
> +        flow->partial_actions_offloaded = true;
> +        VLOG_DBG_RL("Partial offloaded (egress) flow: %p pmd: %p id: %d\n",
> +                    flow, offload->pmd, flow->pmd_id);
> +    } else if (!modification) {
>           megaflow_to_mark_associate(&flow->mega_ufid, mark);
>           mark_to_flow_associate(mark, flow);
>       }
> +
>       return 0;
>   
>   err_free:
> -    if (!modification) {
> -        netdev_offload_flow_mark_free(mark);
> -    } else {
> -        mark_to_flow_disassociate(pmd, flow);
> +    if (unlikely(info.partial_actions) && egress_port) {
> +        VLOG_DBG_RL("Partial offload(egress) failed flow: %p pmd: %p id: %d\n",
> +                    flow, offload->pmd, flow->pmd_id);
> +        unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
> +    }
> +    if (mark != INVALID_FLOW_MARK) {
> +        if (!modification) {
> +            netdev_offload_flow_mark_free(mark);
> +        } else {
> +            mark_to_flow_disassociate(pmd, flow);
> +        }
>       }
>       return -1;
>   }
> @@ -2585,7 +2914,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>       ovs_assert(cls != NULL);
>       dpcls_remove(cls, &flow->cr);
>       cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
> -    if (flow->mark != INVALID_FLOW_MARK) {
> +    if (flow->mark != INVALID_FLOW_MARK || (flow->partial_actions_offloaded
> +        && flow->egress_offload_port != ODPP_NONE)) {
>           queue_netdev_flow_del(pmd, flow);
>       }
>       flow->dead = true;
> @@ -3343,6 +3673,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>       flow->dead = false;
>       flow->batch = NULL;
>       flow->mark = INVALID_FLOW_MARK;
> +    flow->partial_actions_offloaded = false;
> +    flow->egress_offload_port = ODPP_NONE;
>       *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
>       *CONST_CAST(struct flow *, &flow->flow) = match->flow;
>       *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 7be504688..d61433e2b 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -1698,7 +1698,8 @@ static int
>   parse_clone_actions(struct netdev *netdev,
>                       struct flow_actions *actions,
>                       const struct nlattr *clone_actions,
> -                    const size_t clone_actions_len)
> +                    const size_t clone_actions_len,
> +                    struct offload_info *info)
>   {
>       const struct nlattr *ca;
>       unsigned int cleft;
> @@ -1723,8 +1724,11 @@ parse_clone_actions(struct netdev *netdev,
>               add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
>                               raw_encap);
>           } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
> -            if (add_output_action(netdev, actions, ca)) {
> -                return -1;
> +            /* add output action only if full-offload */
> +            if (!info->partial_actions) {
> +                if (add_output_action(netdev, actions, ca)) {
> +                    return -1;
> +                }
Here, the outer encap headers are known. we can get access to the flow's 
matches, and check if it matches. If it does, fail the rule.
>               }
>           } else {
>               VLOG_DBG_RL(&rl,
> @@ -1788,7 +1792,8 @@ parse_flow_actions(struct netdev *netdev,
>                      struct flow_actions *actions,
>                      struct nlattr *nl_actions,
>                      size_t nl_actions_len,
> -                   struct act_resources *act_resources)
> +                   struct act_resources *act_resources,
> +                   struct offload_info *info)
>   {
>       struct nlattr *nla;
>       size_t left;
> @@ -1796,7 +1801,9 @@ parse_flow_actions(struct netdev *netdev,
>       if (!strcmp(netdev_get_type(netdev), "vxlan")) {
>           add_vxlan_decap_action(actions);
>       }
> -    add_count_action(actions);
> +    if (!info->partial_actions) {
> +        add_count_action(actions);
> +    }
>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>           if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>               if (add_output_action(netdev, actions, nla)) {
> @@ -1821,7 +1828,7 @@ parse_flow_actions(struct netdev *netdev,
>               size_t clone_actions_len = nl_attr_get_size(nla);
>   
>               if (parse_clone_actions(netdev, actions, clone_actions,
> -                                    clone_actions_len)) {
> +                                    clone_actions_len, info)) {
>                   return -1;
>               }
>           } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> @@ -1899,16 +1906,23 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>                               size_t actions_len,
>                               const ovs_u128 *ufid,
>                               struct act_resources *act_resources,
> -                            struct flows_handle *flows)
> +                            struct flows_handle *flows,
> +                            struct offload_info *info)
>   {
> -    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> +    struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
>       struct flow_actions actions = { .actions = NULL, .cnt = 0 };
>       struct flow_item flow_item = { .devargs = NULL };
>       struct rte_flow_error error;
>       int ret;
>   
> +    if (info->attr_egress) {
> +        flow_attr.ingress = 0;
> +        flow_attr.egress = 1;
> +        flow_attr.transfer = 0;
> +    }
> +
>       ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,
> -                             act_resources);
> +                             act_resources, info);
>       if (ret) {
>           goto out;
>       }
> @@ -1957,8 +1971,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
>   
>       ret = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
>                                         actions_len, ufid, &act_resources,
> -                                      &flows);
> -    if (ret) {
> +                                      &flows, info);
> +    if (!ret) {
> +        if (info->partial_actions && info->attr_egress) {
> +            /* actions_offloaded should be set to false with partial actions,
> +             * since it is still considered as partial-offload and not
> +             * full-offload. */
> +            actions_offloaded = false;
> +        }
> +    } else if (!(info->partial_actions && info->attr_egress)) {
>           /* If we failed to offload the rule actions fallback to MARK+RSS
>            * actions.
>            */
> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> index d6dd98367..3c079cc9d 100644
> --- a/lib/netdev-offload.h
> +++ b/lib/netdev-offload.h
> @@ -67,6 +67,8 @@ struct offload_info {
>   
>       bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will be in
>                                        * sync with datapath recirc ids. */
> +    uint8_t attr_egress;      /* Egress direction offload */
> +    uint8_t partial_actions;  /* Partial action offload; no forward action */
>   
>       /*
>        * The flow mark id assigened to the flow. If any pkts hit the flow,


More information about the dev mailing list