[ovs-dev] [RFC v3 PATCH 5/5] dpif-netdev: Support partial-action-offload of VXLAN encap flow

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Thu May 28 08:19:11 UTC 2020


In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
port (aka "partial-action-offload"). At the time of offloading the flow, we
determine if the flow can be offloaded to an egress device, if the input
port is not offload capable such as a vhost-user port. We then offload the
flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
the OUTPUT RTE action, which indicates to the PMD that is is a partial
action offload request. Note that since the action is being offloaded in
egress direction, classification is expected to be done by OVS SW datapath
and hence there's no need to offload a MARK action.

If offload succeeds, we save the information in 'dp_netdev_flow' so that
we skip execution of the corresponding action (previous patch) during SW
datapath processing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
---
 lib/dpif-netdev.c         | 352 ++++++++++++++++++++++++++++++++++++--
 lib/netdev-offload-dpdk.c |  43 +++--
 lib/netdev-offload.h      |   2 +
 3 files changed, 376 insertions(+), 21 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2383bfd41..48bd5df3b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2363,10 +2363,300 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
     ovs_mutex_unlock(&dp_flow_offload.mutex);
 }
 
+/*
+ * Mapping structure to map ufid to a partial offload egress device.
+ * Synchronization: accessed only in the context of the offload thread.
+ */
+struct ufid_to_egdev_data {
+    const struct cmap_node node;   /* link to cmap */
+    ovs_u128 mega_ufid;            /* mega-ufid being mapped */
+    odp_port_t egress_port_num;    /* Port number mapped to */
+    struct dp_netdev_flow *flow;   /* flow that maps to this ufid */
+};
+
+/*
+ * A mapping from mega-ufid to partial-offload egress-device.
+ */
+static struct cmap ufid_to_egdev = CMAP_INITIALIZER;
+
+static uint32_t
+ufid_to_egdev_refcnt(const ovs_u128 *mega_ufid)
+{
+    size_t hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
+    struct ufid_to_egdev_data *data;
+    uint32_t refcnt = 0;
+
+    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
+        if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {
+            refcnt++;
+        }
+    }
+
+    return refcnt;
+}
+
+/* Find egdev_data @(ufid, flow) */
+static struct ufid_to_egdev_data *
+ufid_to_egdev_data_find(const ovs_u128 *ufid,
+                        const struct dp_netdev_flow *flow)
+{
+    size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
+    struct ufid_to_egdev_data *data;
+
+    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
+        if (data->flow == flow && ovs_u128_equals(*ufid, data->mega_ufid)) {
+            return data;
+        }
+    }
+
+    return NULL;
+}
+
+/* Map the given pair of mega-ufid and flow to the given port. Returns 0
+ * when the mapping is created initially in the context of a flow. For
+ * subsequent calls, if it is a new flow with the same mega-ufid, creates
+ * a mapping entry but returns EEXIST (i.e, at least one other flow with
+ * the same ufid exists in the table). If it is an already mapped mega-ufid
+ * & flow pair, returns EEXIST.
+ */
+static int
+map_ufid_to_egdev(const ovs_u128 *mega_ufid,
+                  const struct dp_netdev_flow *flow,
+                  odp_port_t egress_port_num)
+{
+    struct ufid_to_egdev_data *data;
+    size_t hash;
+
+    data = ufid_to_egdev_data_find(mega_ufid, flow);
+    if (data) {
+        /* mapping already exists for the given <mega-ufid,flow> pair */
+        VLOG_DBG_RL("ufid_to_egdev mapping already exists for flow: %p\n",
+                    flow);
+        return EEXIST;
+    }
+
+    data = xzalloc(sizeof *data);
+    data->mega_ufid = *mega_ufid;
+    data->egress_port_num = egress_port_num;
+    data->flow = flow;
+
+    hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
+    cmap_insert(&ufid_to_egdev,
+                CONST_CAST(struct cmap_node *, &data->node), hash);
+
+    if (ufid_to_egdev_refcnt(mega_ufid) > 1) {
+        /* at least one mapping exists for the mega_ufid */
+        VLOG_DBG_RL("At least one ufid_to_egdev mapping exists, flow: %p\n",
+                    flow);
+        return EEXIST;
+    }
+
+    /* first mapping created for the mega_ufid */
+    VLOG_DBG_RL("Created the first ufid_to_egdev mapping; flow: %p\n", flow);
+    return 0;
+}
+
+static uint32_t
+unmap_ufid_to_egdev(const ovs_u128 *mega_ufid,
+                    const struct dp_netdev_flow *flow)
+{
+    struct ufid_to_egdev_data *data;
+    uint32_t refcnt;
+    size_t hash;
+
+    data = ufid_to_egdev_data_find(mega_ufid, flow);
+    ovs_assert(data);
+
+    hash = hash_bytes(&data->mega_ufid, sizeof data->mega_ufid, 0);
+    cmap_remove(&ufid_to_egdev,
+                CONST_CAST(struct cmap_node *, &data->node), hash);
+    ovsrcu_postpone(free, data);
+
+    refcnt = ufid_to_egdev_refcnt(mega_ufid);
+    VLOG_DBG_RL("Unmapped ufid_to_egdev: flow: %p, refcnt: %d\n",
+                flow, refcnt);
+
+    return refcnt;
+}
+
+static int
+partial_offload_egress_flow_del(struct dp_flow_offload_item *offload)
+{
+    struct dp_netdev_pmd_thread *pmd = offload->pmd;
+    struct dp_netdev_flow *flow = offload->flow;
+    const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
+    struct netdev *port;
+    uint32_t refcnt;
+    int ret;
+
+    refcnt = unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
+    if (refcnt) {
+        flow->egress_offload_port = NULL;
+        flow->partial_actions_offloaded = false;
+        return 0;
+    }
+
+    /* The egress dev is not referenced by any flow with the given ufid.
+     * We can now remove the partial-action egress-flow from hardware.
+     */
+    port = netdev_ports_get(flow->egress_offload_port, dpif_type_str);
+    if (!port) {
+        return -1;
+    }
+
+    /* Taking a global 'port_mutex' to fulfill thread safety
+     * restrictions for the netdev-offload-dpdk module. */
+    ovs_mutex_lock(&pmd->dp->port_mutex);
+    ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
+    ovs_mutex_unlock(&pmd->dp->port_mutex);
+    netdev_close(port);
+
+    if (ret) {
+        return ret;
+    }
+
+    flow->egress_offload_port = NULL;
+    flow->partial_actions_offloaded = false;
+
+    VLOG_DBG_RL("Deleted partial_offloaded egress flow: %p pmd: %p id: %d\n",
+                flow, pmd, offload->flow->pmd_id);
+    return ret;
+}
+
 static int
 dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
 {
-    return mark_to_flow_disassociate(offload->pmd, offload->flow);
+    if (unlikely(offload->flow->partial_actions_offloaded &&
+        offload->flow->egress_offload_port != ODPP_NONE)) {
+        return partial_offload_egress_flow_del(offload);
+    } else {
+        return mark_to_flow_disassociate(offload->pmd, offload->flow);
+    }
+}
+
+/* Structure to hold a nl_parsed OVS action */
+struct action_attr {
+    int type;                /* OVS action type */
+    struct nlattr *action;   /* action attribute */
+};
+
+/*
+ * Maxium number of actions to be parsed while selecting a flow for partial
+ * action offload. This number is currently based on the minimum number of
+ * attributes seen with the tunnel encap action (clone, tunnel_push, output).
+ * This number includes output action to a single egress device (uplink) and
+ * supports neither multiple clone() actions nor multiple output actions.
+ * This number could change if and when we support other actions or
+ * combinations of actions for partial offload.
+ */
+#define MAX_ACTION_ATTRS    3 /* Max # action attributes supported */
+
+/*
+ * This function parses the list of OVS "actions" of length "actions_len",
+ * and returns them in an array of action "attrs", of size "max_attrs".
+ * The parsed number of actions is returned in "num_attrs". If the number
+ * of actions exceeds "max_attrs", parsing is stopped and E2BIG is returned.
+ * Otherwise, returns success (0).
+ */
+static int
+parse_nlattr_actions(struct nlattr *actions, size_t actions_len,
+                     struct action_attr *attrs, int max_attrs, int *num_attrs)
+{
+    const struct nlattr *a;
+    unsigned int left;
+    int num_actions = 0;
+    int n_attrs = 0;
+    int rc = 0;
+    int type;
+
+    *num_attrs = 0;
+
+    NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
+        type = nl_attr_type(a);
+
+        if (num_actions >= max_attrs) {
+            *num_attrs = num_actions;
+            return E2BIG;
+        }
+
+        attrs[num_actions].type = type;
+        attrs[num_actions].action = a;
+        num_actions++;
+        if (type == OVS_ACTION_ATTR_CLONE) {
+            rc = parse_nlattr_actions(nl_attr_get(a), nl_attr_get_size(a),
+                                      &attrs[num_actions],
+                                      (max_attrs - num_actions), &n_attrs);
+            num_actions += n_attrs;
+            if (rc == E2BIG) {
+                *num_attrs = num_actions;
+                return rc;
+            }
+        }
+    }
+
+    *num_attrs = num_actions;
+    return 0;
+}
+
+/* This function determines if the given flow should be partially offloaded
+ * on the egress device, when the in-port is not offload-capable like a
+ * vhost-user port. The function currently supports offloading of only
+ * tunnel encap action.
+ */
+static bool
+should_partial_offload_egress(struct netdev *in_netdev,
+                              struct dp_flow_offload_item *offload,
+                              struct netdev **egress_netdev)
+{
+    const char *dpif_type_str =
+        dpif_normalize_type(offload->pmd->dp->class->type);
+    struct action_attr attrs[MAX_ACTION_ATTRS];
+    odp_port_t out_port = ODPP_NONE;
+    struct netdev *out_netdev;
+    int num_attrs = 0;
+    int type;
+    int rc;
+
+    /* Support egress partial-offload only when in-port is vhost-user. */
+    if (!is_dpdk_vhost_netdev(in_netdev)) {
+        return false;
+    }
+
+    rc = parse_nlattr_actions(offload->actions, offload->actions_len, attrs,
+                              MAX_ACTION_ATTRS, &num_attrs);
+    if (rc == E2BIG) {
+        /* Action list too big; decline partial offload */
+        return false;
+    }
+
+    /* Number of attrs expected with tunnel encap action */
+    if (num_attrs < MAX_ACTION_ATTRS) {
+        return false;
+    }
+
+    /* Only support clone sub-actions for now, tnl-push specifically. */
+    if (attrs[0].type != OVS_ACTION_ATTR_CLONE ||
+        attrs[1].type != OVS_ACTION_ATTR_TUNNEL_PUSH ||
+        attrs[2].type != OVS_ACTION_ATTR_OUTPUT) {
+        return false;
+    }
+
+    /* Egress partial-offload needs an output action at the end. */
+    out_port = nl_attr_get_odp_port(attrs[2].action);
+    if (out_port == ODPP_NONE) {
+        return false;
+    }
+
+    /* Support egress partial-offload only when out-port is offload capable. */
+    out_netdev = netdev_ports_get(out_port, dpif_type_str);
+    if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
+        return false;
+    }
+
+    /* Flow can be egress partial-offloaded. */
+    *egress_netdev = out_netdev;
+    offload->flow->egress_offload_port = out_port;
+    return true;
 }
 
 static int
@@ -2427,7 +2717,9 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
     bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
     struct offload_info info;
     struct netdev *port;
-    uint32_t mark;
+    struct netdev *egress_port = NULL;
+    bool alloc_mark = true;
+    uint32_t mark = INVALID_FLOW_MARK;
     int ret;
 
     if (flow->dead) {
@@ -2439,11 +2731,36 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
         return -1;
     }
 
-    if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
-            /* flow already offloaded */
+    info.attr_egress = 0;
+    info.partial_actions = 0;
+
+    if (unlikely(should_partial_offload_egress(port, offload, &egress_port))) {
+        if (egress_port) {
+            netdev_close(port);
+            port = egress_port;
+            info.attr_egress = 1;
+            alloc_mark = false;
+        }
+        info.partial_actions = 1;
+    }
+
+    if (unlikely(info.partial_actions && egress_port)) {
+        if (map_ufid_to_egdev(&flow->mega_ufid, flow,
+            flow->egress_offload_port) == EEXIST) {
+            /* Partial action already offloaded for the ufid+egdev */
+            flow->partial_actions_offloaded = true;
             netdev_close(port);
+            VLOG_DBG_RL("Partial offload exists, flow: %p pmd: %p id: %d\n",
+                        flow, offload->pmd, flow->pmd_id);
             return 0;
+        }
+    } else if (alloc_mark &&
+               dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
+            /* flow already offloaded */
+        netdev_close(port);
+        return 0;
     }
+
     info.flow_mark = mark;
 
     /* Taking a global 'port_mutex' to fulfill thread safety restrictions for
@@ -2460,17 +2777,29 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
         goto err_free;
     }
 
-    if (!modification) {
+    if (unlikely(info.partial_actions && egress_port)) {
+        flow->partial_actions_offloaded = true;
+        VLOG_DBG_RL("Partial offloaded (egress) flow: %p pmd: %p id: %d\n",
+                    flow, offload->pmd, flow->pmd_id);
+    } else if (!modification) {
         megaflow_to_mark_associate(&flow->mega_ufid, mark);
         mark_to_flow_associate(mark, flow);
     }
+
     return 0;
 
 err_free:
-    if (!modification) {
-        netdev_offload_flow_mark_free(mark);
-    } else {
-        mark_to_flow_disassociate(pmd, flow);
+    if (unlikely(info.partial_actions) && egress_port) {
+        VLOG_DBG_RL("Partial offload(egress) failed flow: %p pmd: %p id: %d\n",
+                    flow, offload->pmd, flow->pmd_id);
+        unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
+    }
+    if (mark != INVALID_FLOW_MARK) {
+        if (!modification) {
+            netdev_offload_flow_mark_free(mark);
+        } else {
+            mark_to_flow_disassociate(pmd, flow);
+        }
     }
     return -1;
 }
@@ -2585,7 +2914,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
     ovs_assert(cls != NULL);
     dpcls_remove(cls, &flow->cr);
     cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
-    if (flow->mark != INVALID_FLOW_MARK) {
+    if (flow->mark != INVALID_FLOW_MARK || (flow->partial_actions_offloaded
+        && flow->egress_offload_port != ODPP_NONE)) {
         queue_netdev_flow_del(pmd, flow);
     }
     flow->dead = true;
@@ -3343,6 +3673,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     flow->dead = false;
     flow->batch = NULL;
     flow->mark = INVALID_FLOW_MARK;
+    flow->partial_actions_offloaded = false;
+    flow->egress_offload_port = ODPP_NONE;
     *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
     *CONST_CAST(struct flow *, &flow->flow) = match->flow;
     *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 7be504688..d61433e2b 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1698,7 +1698,8 @@ static int
 parse_clone_actions(struct netdev *netdev,
                     struct flow_actions *actions,
                     const struct nlattr *clone_actions,
-                    const size_t clone_actions_len)
+                    const size_t clone_actions_len,
+                    struct offload_info *info)
 {
     const struct nlattr *ca;
     unsigned int cleft;
@@ -1723,8 +1724,11 @@ parse_clone_actions(struct netdev *netdev,
             add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
                             raw_encap);
         } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
-            if (add_output_action(netdev, actions, ca)) {
-                return -1;
+            /* add output action only if full-offload */
+            if (!info->partial_actions) {
+                if (add_output_action(netdev, actions, ca)) {
+                    return -1;
+                }
             }
         } else {
             VLOG_DBG_RL(&rl,
@@ -1788,7 +1792,8 @@ parse_flow_actions(struct netdev *netdev,
                    struct flow_actions *actions,
                    struct nlattr *nl_actions,
                    size_t nl_actions_len,
-                   struct act_resources *act_resources)
+                   struct act_resources *act_resources,
+                   struct offload_info *info)
 {
     struct nlattr *nla;
     size_t left;
@@ -1796,7 +1801,9 @@ parse_flow_actions(struct netdev *netdev,
     if (!strcmp(netdev_get_type(netdev), "vxlan")) {
         add_vxlan_decap_action(actions);
     }
-    add_count_action(actions);
+    if (!info->partial_actions) {
+        add_count_action(actions);
+    }
     NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
         if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
             if (add_output_action(netdev, actions, nla)) {
@@ -1821,7 +1828,7 @@ parse_flow_actions(struct netdev *netdev,
             size_t clone_actions_len = nl_attr_get_size(nla);
 
             if (parse_clone_actions(netdev, actions, clone_actions,
-                                    clone_actions_len)) {
+                                    clone_actions_len, info)) {
                 return -1;
             }
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
@@ -1899,16 +1906,23 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
                             size_t actions_len,
                             const ovs_u128 *ufid,
                             struct act_resources *act_resources,
-                            struct flows_handle *flows)
+                            struct flows_handle *flows,
+                            struct offload_info *info)
 {
-    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
+    struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
     struct flow_actions actions = { .actions = NULL, .cnt = 0 };
     struct flow_item flow_item = { .devargs = NULL };
     struct rte_flow_error error;
     int ret;
 
+    if (info->attr_egress) {
+        flow_attr.ingress = 0;
+        flow_attr.egress = 1;
+        flow_attr.transfer = 0;
+    }
+
     ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,
-                             act_resources);
+                             act_resources, info);
     if (ret) {
         goto out;
     }
@@ -1957,8 +1971,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
 
     ret = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
                                       actions_len, ufid, &act_resources,
-                                      &flows);
-    if (ret) {
+                                      &flows, info);
+    if (!ret) {
+        if (info->partial_actions && info->attr_egress) {
+            /* actions_offloaded should be set to false with partial actions,
+             * since it is still considered as partial-offload and not
+             * full-offload. */
+            actions_offloaded = false;
+        }
+    } else if (!(info->partial_actions && info->attr_egress)) {
         /* If we failed to offload the rule actions fallback to MARK+RSS
          * actions.
          */
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index d6dd98367..3c079cc9d 100644
--- a/lib/netdev-offload.h
+++ b/lib/netdev-offload.h
@@ -67,6 +67,8 @@ struct offload_info {
 
     bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will be in
                                      * sync with datapath recirc ids. */
+    uint8_t attr_egress;      /* Egress direction offload */
+    uint8_t partial_actions;  /* Partial action offload; no forward action */
 
     /*
      * The flow mark id assigened to the flow. If any pkts hit the flow,
-- 
2.25.0.rc2



More information about the dev mailing list