[ovs-dev] [RFC v2 PATCH 4/5] dpif-netdev: Skip encap action during datapath execution

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Mon May 18 19:27:30 UTC 2020


In this patch we check if action processing (apart from OUTPUT action),
should be skipped for a given dp_netdev_flow. Specifically, we check if
the action is TNL_PUSH and if it has been offloaded to HW, then we do not
push the tunnel header in SW. The datapath only executes the OUTPUT action.
The packet will be encapsulated in HW during transmit.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
---
 lib/dpif-netdev.c | 247 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 224 insertions(+), 23 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4315f237c..07d16ad61 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -112,6 +112,7 @@ COVERAGE_DEFINE(datapath_drop_recirc_error);
 COVERAGE_DEFINE(datapath_drop_invalid_port);
 COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
 COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_skip_tunnel_push);
 
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -547,6 +548,7 @@ struct dp_netdev_flow {
      */
     bool partial_actions_offloaded;
     odp_port_t  egress_offload_port;
+    struct ovs_mutex partial_action_offload_mutex;
 
     /* Statistics. */
     struct dp_netdev_flow_stats stats;
@@ -801,7 +803,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                                       bool should_steal,
                                       const struct flow *flow,
                                       const struct nlattr *actions,
-                                      size_t actions_len);
+                                      size_t actions_len,
+                                      const struct dp_netdev_flow *dp_flow);
 static void dp_netdev_input(struct dp_netdev_pmd_thread *,
                             struct dp_packet_batch *, odp_port_t port_no);
 static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
@@ -2361,17 +2364,159 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
     ovs_mutex_unlock(&dp_flow_offload.mutex);
 }
 
+/*
+ * Mapping structure to map ufid to a partial offload egress device.
+ * Synchronization: accessed only in the context of the offload thread.
+ */
+struct ufid_to_egdev_data {
+    const struct cmap_node node;   /* link to cmap */
+    ovs_u128 mega_ufid;            /* mega-ufid being mapped */
+    odp_port_t egress_port_num;    /* Port number mapped to */
+    struct dp_netdev_flow *flow;   /* flow that maps to this ufid */
+};
+
+/*
+ * A mapping from mega-ufid to partial-offload egress-device.
+ */
+static struct cmap ufid_to_egdev = CMAP_INITIALIZER;
+
+static uint32_t
+ufid_to_egdev_refcnt(const ovs_u128 *mega_ufid)
+{
+    size_t hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
+    struct ufid_to_egdev_data *data;
+    uint32_t refcnt = 0;
+
+    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
+        if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {
+            refcnt++;
+        }
+    }
+
+    return refcnt;
+}
+
+/* Find egdev_data @(ufid, flow) */
+static struct ufid_to_egdev_data *
+ufid_to_egdev_data_find(const ovs_u128 *ufid,
+                        const struct dp_netdev_flow *flow)
+{
+    size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
+    struct ufid_to_egdev_data *data;
+
+    CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_egdev) {
+        if (data->flow == flow && ovs_u128_equals(*ufid, data->mega_ufid)) {
+            return data;
+        }
+    }
+
+    return NULL;
+}
+
+/* Map the given pair of mega-ufid and flow to the given port. Returns 0
+ * when the mapping is created initially in the context of a flow. For
+ * subsequent calls, if it is a new flow with the same mega-ufid, creates
+ * a mapping entry but returns EEXIST (i.e, at least one other flow with
+ * the same ufid exists in the table). If it is an already mapped mega-ufid
+ * & flow pair, returns EEXIST.
+ */
 static int
-partial_offload_egress_flow_del(struct dp_netdev_pmd_thread *pmd,
-                                struct dp_netdev_flow *flow)
+map_ufid_to_egdev(const ovs_u128 *mega_ufid,
+                  const struct dp_netdev_flow *flow,
+                  odp_port_t egress_port_num)
+{
+    struct ufid_to_egdev_data *data;
+    size_t hash;
+
+    data = ufid_to_egdev_data_find(mega_ufid, flow);
+    if (data) {
+        /* mapping already exists for the given <mega-ufid,flow> pair */
+        VLOG_DBG_RL("ufid_to_egdev mapping already exists for flow: %p\n",
+                    flow);
+        return EEXIST;
+    }
+
+    data = xzalloc(sizeof *data);
+    data->mega_ufid = *mega_ufid;
+    data->egress_port_num = egress_port_num;
+    data->flow = flow;
+
+    hash = hash_bytes(mega_ufid, sizeof *mega_ufid, 0);
+    cmap_insert(&ufid_to_egdev,
+                CONST_CAST(struct cmap_node *, &data->node), hash);
+
+    if (ufid_to_egdev_refcnt(mega_ufid) > 1) {
+        /* at least one mapping exists for the mega_ufid */
+        VLOG_DBG_RL("At least one ufid_to_egdev mapping exists, flow: %p\n",
+                    flow);
+        return EEXIST;
+    }
+
+    /* first mapping created for the mega_ufid */
+    VLOG_DBG_RL("Created the first ufid_to_egdev mapping; flow: %p\n", flow);
+    return 0;
+}
+
+static uint32_t
+unmap_ufid_to_egdev(const ovs_u128 *mega_ufid,
+                    const struct dp_netdev_flow *flow)
 {
-    int ret;
-    struct netdev *port;
-    odp_port_t out_port = flow->egress_offload_port;
+    struct ufid_to_egdev_data *data;
+    uint32_t refcnt;
+    size_t hash;
+
+    data = ufid_to_egdev_data_find(mega_ufid, flow);
+    ovs_assert(data);
+
+    hash = hash_bytes(&data->mega_ufid, sizeof data->mega_ufid, 0);
+    cmap_remove(&ufid_to_egdev,
+                CONST_CAST(struct cmap_node *, &data->node), hash);
+    ovsrcu_postpone(free, data);
+
+    refcnt = ufid_to_egdev_refcnt(mega_ufid);
+    VLOG_DBG_RL("Unmapped ufid_to_egdev: flow: %p, refcnt: %d\n",
+                flow, refcnt);
+
+    return refcnt;
+}
+
+static inline void
+partial_action_offload_lock(struct dp_netdev_flow *flow)
+{
+    ovs_mutex_lock(&flow->partial_action_offload_mutex);
+}
+
+static inline void
+partial_action_offload_unlock(struct dp_netdev_flow *flow)
+{
+    ovs_mutex_unlock(&flow->partial_action_offload_mutex);
+}
+
+static int
+partial_offload_egress_flow_del(struct dp_flow_offload_item *offload)
+{
+    struct dp_netdev_pmd_thread *pmd = offload->pmd;
+    struct dp_netdev_flow *flow = offload->flow;
     const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
+    struct netdev *port;
+    uint32_t refcnt;
+    int ret;
+
+    partial_action_offload_lock(offload->flow);
+    refcnt = unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
+    if (refcnt) {
+        flow->egress_offload_port = NULL;
+        flow->partial_actions_offloaded = false;
+        partial_action_offload_unlock(flow);
+        return 0;
+    }
 
-    port = netdev_ports_get(out_port, dpif_type_str);
+    /* The egress dev is not referenced by any flow with the given ufid.
+     * We can now remove the partial-action egress-flow from hardware.
+     */
+    port = netdev_ports_get(flow->egress_offload_port, dpif_type_str);
     if (!port) {
+        partial_action_offload_unlock(flow);
         return -1;
     }
 
@@ -2382,15 +2527,27 @@ partial_offload_egress_flow_del(struct dp_netdev_pmd_thread *pmd,
     ovs_mutex_unlock(&pmd->dp->port_mutex);
     netdev_close(port);
 
+    if (ret) {
+        partial_action_offload_unlock(flow);
+        return ret;
+    }
+
+    flow->egress_offload_port = NULL;
+    flow->partial_actions_offloaded = false;
+
+    partial_action_offload_unlock(offload->flow);
+
+    VLOG_DBG_RL("Deleted partial_offloaded egress flow: %p pmd: %p id: %d\n",
+                flow, pmd, offload->flow->pmd_id);
     return ret;
 }
 
 static int
 dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
 {
-    if (offload->flow->partial_actions_offloaded &&
-        offload->flow->egress_offload_port != ODPP_NONE) {
-        return partial_offload_egress_flow_del(offload->pmd, offload->flow);
+    if (unlikely(offload->flow->partial_actions_offloaded &&
+        offload->flow->egress_offload_port != ODPP_NONE)) {
+        return partial_offload_egress_flow_del(offload);
     } else {
         return mark_to_flow_disassociate(offload->pmd, offload->flow);
     }
@@ -2608,7 +2765,8 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
     info.attr_egress = 0;
     info.partial_actions = 0;
 
-    if (dp_netdev_partial_offload_supported(port, offload, &egress_port)) {
+    if (unlikely(dp_netdev_partial_offload_supported(port, offload,
+                  &egress_port))) {
         if (egress_port) {
             netdev_close(port);
             port = egress_port;
@@ -2618,11 +2776,25 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
         info.partial_actions = 1;
     }
 
-    if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
-            /* flow already offloaded */
+    if (unlikely(info.partial_actions && egress_port)) {
+        partial_action_offload_lock(flow);
+        if (map_ufid_to_egdev(&flow->mega_ufid, flow,
+            flow->egress_offload_port) == EEXIST) {
+            /* Partial action already offloaded for the ufid+egdev */
+            flow->partial_actions_offloaded = true;
+            partial_action_offload_unlock(flow);
             netdev_close(port);
+            VLOG_DBG_RL("Partial offload exists, flow: %p pmd: %p id: %d\n",
+                        flow, offload->pmd, flow->pmd_id);
             return 0;
+        }
+    } else if (alloc_mark &&
+               dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
+            /* flow already offloaded */
+        netdev_close(port);
+        return 0;
     }
+
     info.flow_mark = mark;
 
     /* Taking a global 'port_mutex' to fulfill thread safety restrictions for
@@ -2639,15 +2811,25 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
         goto err_free;
     }
 
-    if (info.partial_actions) {
+    if (unlikely(info.partial_actions && egress_port)) {
         flow->partial_actions_offloaded = true;
+        VLOG_DBG_RL("Partial offloaded (egress) flow: %p pmd: %p id: %d\n",
+                    flow, offload->pmd, flow->pmd_id);
+        partial_action_offload_unlock(flow);
     } else if (!modification) {
         megaflow_to_mark_associate(&flow->mega_ufid, mark);
         mark_to_flow_associate(mark, flow);
     }
+
     return 0;
 
 err_free:
+    if (unlikely(info.partial_actions) && egress_port) {
+        VLOG_DBG_RL("Partial offload(egress) failed flow: %p pmd: %p id: %d\n",
+                    flow, offload->pmd, flow->pmd_id);
+        unmap_ufid_to_egdev(&flow->mega_ufid, offload->flow);
+        partial_action_offload_unlock(flow);
+    }
     if (mark != INVALID_FLOW_MARK) {
         if (!modification) {
             netdev_offload_flow_mark_free(mark);
@@ -3523,6 +3705,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     flow->mark = INVALID_FLOW_MARK;
     flow->partial_actions_offloaded = false;
     flow->egress_offload_port = ODPP_NONE;
+    ovs_mutex_init(&flow->partial_action_offload_mutex);
     *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
     *CONST_CAST(struct flow *, &flow->flow) = match->flow;
     *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -4026,7 +4209,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
     dp_packet_batch_init_packet(&pp, execute->packet);
     pp.do_not_steal = true;
     dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
-                              execute->actions, execute->actions_len);
+                              execute->actions, execute->actions_len, NULL);
     dp_netdev_pmd_flush_output_packets(pmd, true);
 
     if (pmd->core_id == NON_PMD_CORE_ID) {
@@ -6654,7 +6837,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
     actions = dp_netdev_flow_get_actions(flow);
 
     dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
-                              actions->actions, actions->size);
+                              actions->actions, actions->size, flow);
 }
 
 static inline void
@@ -6962,7 +7145,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
      * we'll send the packet up twice. */
     dp_packet_batch_init_packet(&b, packet);
     dp_netdev_execute_actions(pmd, &b, true, &match.flow,
-                              actions->data, actions->size);
+                              actions->data, actions->size, NULL);
 
     add_actions = put_actions->size ? put_actions : actions;
     if (OVS_LIKELY(error != ENOSPC)) {
@@ -7197,6 +7380,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
 struct dp_netdev_execute_aux {
     struct dp_netdev_pmd_thread *pmd;
     const struct flow *flow;
+    const void *dp_flow;    /* for partial action offload */
 };
 
 static void
@@ -7341,7 +7525,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
     if (!error || error == ENOSPC) {
         dp_packet_batch_init_packet(&b, packet);
         dp_netdev_execute_actions(pmd, &b, should_steal, flow,
-                                  actions->data, actions->size);
+                                  actions->data, actions->size, NULL);
     } else if (should_steal) {
         dp_packet_delete(packet);
         COVERAGE_INC(datapath_drop_userspace_action_error);
@@ -7360,6 +7544,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
     int type = nl_attr_type(a);
     struct tx_port *p;
     uint32_t packet_count, packets_dropped;
+    struct dp_netdev_flow *dp_flow = aux->dp_flow;
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
@@ -7417,9 +7602,24 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         }
         dp_packet_batch_apply_cutlen(packets_);
         packet_count = dp_packet_batch_size(packets_);
-        if (push_tnl_action(pmd, a, packets_)) {
-            COVERAGE_ADD(datapath_drop_tunnel_push_error,
-                         packet_count);
+        /* Execute tnl_push action in SW, only if it is not offloaded
+         * as a partial action in HW. Otherwise, HW pushes the tunnel
+         * header during output processing. */
+        if (likely(!netdev_is_flow_api_enabled() || !dp_flow)) {
+            if (push_tnl_action(pmd, a, packets_)) {
+                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
+            }
+        } else { /* netdev_flow_api_enabled && dp_flow */
+            partial_action_offload_lock(dp_flow);
+            if (!dp_flow->partial_actions_offloaded) {
+                if (push_tnl_action(pmd, a, packets_)) {
+                    COVERAGE_ADD(datapath_drop_tunnel_push_error,
+                                 packet_count);
+                }
+            } else {
+                COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
+            }
+            partial_action_offload_unlock(dp_flow);
         }
         return;
 
@@ -7707,9 +7907,10 @@ static void
 dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                           struct dp_packet_batch *packets,
                           bool should_steal, const struct flow *flow,
-                          const struct nlattr *actions, size_t actions_len)
+                          const struct nlattr *actions, size_t actions_len,
+                          const struct dp_netdev_flow *dp_flow)
 {
-    struct dp_netdev_execute_aux aux = { pmd, flow };
+    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
 
     odp_execute_actions(&aux, packets, should_steal, actions,
                         actions_len, dp_execute_cb);
-- 
2.25.0.rc2



More information about the dev mailing list