[ovs-dev] [RFC PATCH 2/4] dpif-netdev: Support partial-action-offload of VXLAN encap flow

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Wed May 13 09:34:12 UTC 2020


On Wed, May 6, 2020 at 5:56 PM Eli Britstein <elibr at mellanox.com> wrote:
>
>
> On 4/24/2020 11:23 AM, Sriharsha Basavapatna wrote:
>
> In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
> port (aka "partial-action-offload"). At the time of offloading the flow, we
> determine if the flow can be offloaded to an egress device, if the input
> port is not offload capable such as a vhost-user port. We then offload the
> flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
> the OUTPUT RTE action, which indicates to the PMD that is is a partial
> action offload request. Note that since the action is being offloaded in
> egress direction, classification is expected to be done by OVS SW datapath
> and hence there's no need to offload a MARK action.
>
> If offload succeeds, we save the information in 'dp_netdev_flow' so that we
> skip execution of the corresponding action (subsequent patch) during SW
> datapath processing.
>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
> ---
>  lib/dpif-netdev.c         | 172 ++++++++++++++++++++++++++++++++++++--
>  lib/netdev-dpdk.c         |   5 ++
>  lib/netdev-dpdk.h         |   1 +
>  lib/netdev-offload-dpdk.c |  45 +++++++---
>  lib/netdev-offload.h      |   2 +
>  5 files changed, 206 insertions(+), 19 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 781b233f4..a47230067 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -538,6 +538,14 @@ struct dp_netdev_flow {
>      bool dead;
>      uint32_t mark;               /* Unique flow mark assigned to a flow */
>
> +    /* The next two members are used to support partial offloading of
> +     * actions. The boolean flag tells if this flow has its actions partially
> +     * offloaded. The egress port# tells if the action should be offloaded
> +     * on the egress (output) port instead of the in-port for the flow.
> +     */
> +    bool partial_actions_offloaded;
> +    odp_port_t  egress_offload_port;
>
> A flow might have multiple egress ports, and also multiple egress ports in clone.
>
> For example:
>
> ufid:fef472c4-e796-4d0d-a655-f45f9d1767d7, skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(vm1),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),packet_type(ns=0,id=0),eth(src=e4:11:22:33:44:50,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=4.4.4.10,tip=4.4.4.15,op=1/0xff,sha=e4:11:22:33:44:50/00:00:00:00:00:00,tha=00:00:00:00:00:00/00:00:00:00:00:00), packets:30, bytes:1560, used:0.432s, dp:ovs, actions:clone(tnl_push(tnl_port(vxlan_sys_4789),header(size=50,type=4,eth(dst=24:8a:07:ad:79:12,src=24:8a:07:ad:77:4a,dl_type=0x0800),ipv4(src=111.168.1.1,dst=111.168.1.2,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x64)),out_port(br-phy)),pf),br-int,vm2,clone(tnl_push(tnl_port(gre_sys),header(size=42,type=3,eth(dst=24:8a:07:ad:79:13,src=24:8a:07:ad:77:4b,dl_type=0x0800),ipv4(src=222.168.1.1,dst=222.168.1.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),key=0xc8)),out_port(br-phy2)),pf2)
>
> Here, I configured 2 PFs (one in each br-phy bridge), and in br-int i configured VXLAN and GRE (but it can be also 2 VXLANs). How is such use case handled?

[Harsha] The current design goal is to keep this simple by supporting
only one output-action/egress-port. The code currently rejects the
flow for partial action offloading, if there are other actions (apart
from tunnel-push and output) within clone() action. But it does not
currently handle multiple multiple clone() actions and egress ports.
I'll fix this to reject under these conditions too.  I'll also update
comments on this.
>
> +
>      /* Statistics. */
>      struct dp_netdev_flow_stats stats;
>
> @@ -2351,12 +2359,141 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
>      ovs_mutex_unlock(&dp_flow_offload.mutex);
>  }
>
> +static int dp_netdev_egress_flow_del(struct dp_netdev_pmd_thread *pmd,
> +                                     struct dp_netdev_flow *flow)
> +{
> +    int ret;
> +    struct netdev *port;
> +    odp_port_t out_port = flow->egress_offload_port;
> +    const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
> +
> +    port = netdev_ports_get(out_port, dpif_type_str);
> +    if (!port) {
> +        return -1;
> +    }
> +
> +    /* Taking a global 'port_mutex' to fulfill thread safety
> +     * restrictions for the netdev-offload-dpdk module. */
> +    ovs_mutex_lock(&pmd->dp->port_mutex);
> +    ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
> +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> +    netdev_close(port);
> +
> +    return ret;
> +}
> +
>  static int
>  dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
>  {
> +    if (offload->flow->partial_actions_offloaded &&
> +        offload->flow->egress_offload_port != ODPP_NONE) {
> +        return dp_netdev_egress_flow_del(offload->pmd, offload->flow);
> +    }
>      return mark_to_flow_disassociate(offload->pmd, offload->flow);
>  }
>
> +/* This function determines if the given flow should be partially offloaded
> + * on the egress device, when the in-port is not offload-capable like a
> + * vhost-user port. The function currently supports offloading of only
> + * tunnel encap action.
> + */
> +static bool
> +should_partial_offload_egress(struct netdev *in_netdev,
> +                              struct dp_flow_offload_item *offload,
> +                              struct netdev **egress_netdev)
> +{
> +    const char *dpif_type_str =
> +        dpif_normalize_type(offload->pmd->dp->class->type);
> +    odp_port_t out_port = ODPP_NONE;
> +    struct nlattr *clone_actions;
> +    struct netdev *out_netdev;
> +    bool encap_action = false;
> +    size_t clone_actions_len;
> +    const struct nlattr *a;
> +    unsigned int left;
> +    int type;
> +
> +    /* Support egress partial-offload only when in-port is vhost-user. */
> +    if (!is_dpdk_vhost_netdev(in_netdev)) {
> +        return false;
> +    }
> +
> +    /* Only support clone sub-actions for now, tnl-push specifically. */
> +    type = nl_attr_type(offload->actions);
> +    if (type != OVS_ACTION_ATTR_CLONE) {
> +        return false;
> +    }
> +    clone_actions = nl_attr_get(offload->actions);
> +    clone_actions_len = nl_attr_get_size(offload->actions);
> +
> +    NL_ATTR_FOR_EACH (a, left, clone_actions, clone_actions_len) {
> +        type = nl_attr_type(a);
> +        bool last_action = (left <= NLA_ALIGN(a->nla_len));
> +
> +        if (type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> +            encap_action = true;
> +        } else if (type == OVS_ACTION_ATTR_OUTPUT) {
> +            if (last_action) {
> +                    out_port = nl_attr_get_odp_port(a);
> +                    break;
> +            }
> +        } else {
> +            /* Reject if there are other actions */
> +            return false;
> +        }
> +    }
> +
> +    /* Support egress partial-offload only when there is an output action. */
> +    if (out_port == ODPP_NONE) {
> +        return false;
> +    }
> +
> +    /* Support only encap action for now. */
> +    if (!encap_action) {
> +        return false;
> +    }
> +
> +    /* Support egress partial-offload only when out-port is offload capable. */
> +    out_netdev = netdev_ports_get(out_port, dpif_type_str);
> +    if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
> +        return false;
> +    }
> +
> +    /* Flow can be egress partial-offloaded. */
> +    *egress_netdev = out_netdev;
> +    offload->flow->egress_offload_port = out_port;
> +    return true;
> +}
> +
> +/* Place-holder function for partial action offload in ingress direction,
> + * with a vhost-user port as the output port for the flow.
> + */
>
> No need for "place-holder". If there is a need in the future, add it then.

[Harsha] Ok.
>
> +static bool
> +should_partial_offload_ingress(struct netdev *in_netdev OVS_UNUSED,
> +                               struct dp_flow_offload_item *offload OVS_UNUSED)
> +{
> +    return false;
> +}
> +
> +/* This function determines if the given flow actions can be partially
> + * offloaded. Partial action offload is attempted when either the in-port
> + * or the out-port for the flow is a vhost-user port.
> + */
> +static bool
> +dp_netdev_partial_offload_supported(struct netdev *in_netdev,
> +                                   struct dp_flow_offload_item *offload,
> +                                   struct netdev **egress_netdev)
> +{
> +    if (should_partial_offload_ingress(in_netdev, offload)) {
> +        return true;
> +    } else if (should_partial_offload_egress(in_netdev, offload,
> +               egress_netdev)) {
> +        return true;
> +    } else {
> +        return false;
> +    }
> +}
> +
>  static int
>  dp_netdev_alloc_flow_mark(struct dp_netdev_flow *flow, bool modification,
>                            uint32_t *markp)
> @@ -2415,7 +2552,9 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>      bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
>      struct offload_info info;
>      struct netdev *port;
> -    uint32_t mark;
> +    struct netdev *egress_port = NULL;
> +    bool alloc_mark = true;
> +    uint32_t mark = INVALID_FLOW_MARK;
>      int ret;
>
>      if (flow->dead) {
> @@ -2427,7 +2566,20 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>          return -1;
>      }
>
> -    if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
> +    info.attr_egress = 0;
> +    info.partial_actions = 0;
> +
> +    if (dp_netdev_partial_offload_supported(port, offload, &egress_port)) {
> +        if (egress_port) {
> +            netdev_close(port);
> +            port = egress_port;
> +            info.attr_egress = 1;
> +            alloc_mark = false;
> +        }
> +        info.partial_actions = 1;
> +    }
> +
> +    if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
>              /* flow already offloaded */
>              netdev_close(port);
>              return 0;
> @@ -2448,17 +2600,21 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
>          goto err_free;
>      }
>
> -    if (!modification) {
> +    if (info.partial_actions) {
> +        flow->partial_actions_offloaded = true;
> +    } else if (!modification) {
>          megaflow_to_mark_associate(&flow->mega_ufid, mark);
>          mark_to_flow_associate(mark, flow);
>      }
>      return 0;
>
>  err_free:
> -    if (!modification) {
> -        netdev_offload_flow_mark_free(mark);
> -    } else {
> -        mark_to_flow_disassociate(pmd, flow);
> +    if (mark != INVALID_FLOW_MARK) {
> +        if (!modification) {
> +            netdev_offload_flow_mark_free(mark);
> +        } else {
> +            mark_to_flow_disassociate(pmd, flow);
> +        }
>      }
>      return -1;
>  }
> @@ -3325,6 +3481,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>      flow->dead = false;
>      flow->batch = NULL;
>      flow->mark = INVALID_FLOW_MARK;
> +    flow->partial_actions_offloaded = false;
> +    flow->egress_offload_port = ODPP_NONE;
>      *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
>      *CONST_CAST(struct flow *, &flow->flow) = match->flow;
>      *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 944e4a152..ad880f3ff 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -559,6 +559,11 @@ is_dpdk_class(const struct netdev_class *class)
>             || class->destruct == netdev_dpdk_vhost_destruct;
>  }
>
> +bool is_dpdk_vhost_netdev(struct netdev *netdev)
> +{
> +    return netdev->netdev_class->destruct == netdev_dpdk_vhost_destruct;
> +}
> +
>
> Split this query function to a separate commit.

[Harsha] Ok.
>
>  /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
>   * aligned at 1k or less. If a declared mbuf size is not a multiple of this
>   * value, insufficient buffers are allocated to accomodate the packet in its
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 94e08e516..67e70d09f 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -37,6 +37,7 @@ void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
>
>  bool netdev_dpdk_flow_api_supported(struct netdev *);
> +bool is_dpdk_vhost_netdev(struct netdev *);
>
>  int
>  netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 7be504688..d87d8b1fa 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -1698,7 +1698,8 @@ static int
>  parse_clone_actions(struct netdev *netdev,
>                      struct flow_actions *actions,
>                      const struct nlattr *clone_actions,
> -                    const size_t clone_actions_len)
> +                    const size_t clone_actions_len,
> +                    struct offload_info *info)
>  {
>      const struct nlattr *ca;
>      unsigned int cleft;
> @@ -1723,8 +1724,11 @@ parse_clone_actions(struct netdev *netdev,
>              add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
>                              raw_encap);
>          } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
> -            if (add_output_action(netdev, actions, ca)) {
> -                return -1;
> +            /* add output action only if full-offload */
> +            if (!info->partial_actions) {
> +                if (add_output_action(netdev, actions, ca)) {
> +                    return -1;
> +                }
>              }
>          } else {
>              VLOG_DBG_RL(&rl,
> @@ -1788,7 +1792,8 @@ parse_flow_actions(struct netdev *netdev,
>                     struct flow_actions *actions,
>                     struct nlattr *nl_actions,
>                     size_t nl_actions_len,
> -                   struct act_resources *act_resources)
> +                   struct act_resources *act_resources,
> +                   struct offload_info *info)
>  {
>      struct nlattr *nla;
>      size_t left;
> @@ -1799,8 +1804,11 @@ parse_flow_actions(struct netdev *netdev,
>      add_count_action(actions);
>      NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
> -            if (add_output_action(netdev, actions, nla)) {
> -                return -1;
> +            /* add output action only if full-offload */
> +            if (!info->partial_actions) {
> +                if (add_output_action(netdev, actions, nla)) {
> +                    return -1;
> +                }
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
>              free_flow_actions(actions);
> @@ -1821,7 +1829,7 @@ parse_flow_actions(struct netdev *netdev,
>              size_t clone_actions_len = nl_attr_get_size(nla);
>
>              if (parse_clone_actions(netdev, actions, clone_actions,
> -                                    clone_actions_len)) {
> +                                    clone_actions_len, info)) {
>                  return -1;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> @@ -1899,16 +1907,22 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>                              size_t actions_len,
>                              const ovs_u128 *ufid,
>                              struct act_resources *act_resources,
> -                            struct flows_handle *flows)
> +                            struct flows_handle *flows,
> +                            struct offload_info *info)
>  {
> -    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> +    struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
>      struct flow_actions actions = { .actions = NULL, .cnt = 0 };
>      struct flow_item flow_item = { .devargs = NULL };
>      struct rte_flow_error error;
>      int ret;
>
> +    if (info->attr_egress) {
> +        flow_attr.ingress = 0;
> +        flow_attr.egress = 1;
>
> Should also .transfer = 0;

[Harsha] agreed.

>
> +    }
> +
>      ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,
> -                             act_resources);
> +                             act_resources, info);
>      if (ret) {
>          goto out;
>      }
> @@ -1957,8 +1971,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
>
>      ret = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
>                                        actions_len, ufid, &act_resources,
> -                                      &flows);
> -    if (ret) {
> +                                      &flows, info);
> +    if (!ret) {
> +        if (info->partial_actions && info->attr_egress) {
> +            /* actions_offloaded should be set to false with partial actions,
> +             * since it is still considered as partial-offload and not
> +             * full-offload. */
> +            actions_offloaded = false;
> +        }
> +    } else {
>          /* If we failed to offload the rule actions fallback to MARK+RSS
>           * actions.
>           */
> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> index d6dd98367..3c079cc9d 100644
> --- a/lib/netdev-offload.h
> +++ b/lib/netdev-offload.h
> @@ -67,6 +67,8 @@ struct offload_info {
>
>      bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will be in
>                                       * sync with datapath recirc ids. */
> +    uint8_t attr_egress;      /* Egress direction offload */
> +    uint8_t partial_actions;  /* Partial action offload; no forward action */
>
>      /*
>       * The flow mark id assigened to the flow. If any pkts hit the flow,


More information about the dev mailing list