[ovs-dev] [RFC PATCH 2/2] dpif-netdev: Avoid recirculation during vxlan decap.

Bhanuprakash Bodireddy bhanuprakash.bodireddy at intel.com
Fri Jan 12 17:45:33 UTC 2018


This commit avoids datapath recirculation during packet decapsulation
by combining actions at upcall handling.

This patch uses the PTYPEs feature provided by NICs to detect the
tunnel packets and combines the actions at upcall processing there by
skipping recirculation.

If PTYPEs aren't supported by a NIC on DUT or if actions can't be
combined due to complex flow rules, this patch falls back to previous
recirculation approach.

Test results: Decap alone test 118 byte packets.

  CFLAGS="-O3 -march=native"
    OvS Master(491e05c2)   5.336 Mpps
    OvS Master + PATCH     6.224 Mpps

  CFLAGS="-g -O2"
    OvS Master(491e05c2)   4.903 Mpps
    OvS Master + PATCH     5.824 Mpps

The performance gain is approximately 15%.

Suggested-by: Sugesh Chandran <sugesh.chandran at intel.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/dpif-netdev.c             | 193 +++++++++++++++++++++++++++++++++++-------
 lib/dpif.c                    |   3 +-
 lib/dpif.h                    |   2 +-
 lib/netdev-native-tnl.c       |   8 ++
 lib/netdev-native-tnl.h       |   3 +
 lib/odp-execute.c             |   2 +-
 lib/odp-execute.h             |   3 +-
 ofproto/ofproto-dpif-upcall.c |  16 ++--
 8 files changed, 190 insertions(+), 40 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a34a1c5..c786578 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -54,6 +54,7 @@
 #include "netdev.h"
 #include "netdev-vport.h"
 #include "netlink.h"
+#include "netdev-native-tnl.h"
 #include "odp-execute.h"
 #include "odp-util.h"
 #include "openvswitch/dynamic-string.h"
@@ -4849,7 +4850,8 @@ static int
 dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
                  struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
                  enum dpif_upcall_type type, const struct nlattr *userdata,
-                 struct ofpbuf *actions, struct ofpbuf *put_actions)
+                 struct ofpbuf *actions, struct ofpbuf *put_actions,
+                 bool install_flow)
 {
     struct dp_netdev *dp = pmd->dp;
 
@@ -4883,7 +4885,8 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
     }
 
     return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
-                         actions, wc, put_actions, dp->upcall_aux);
+                         actions, wc, put_actions, dp->upcall_aux,
+                         install_flow);
 }
 
 static inline uint32_t
@@ -5096,6 +5099,109 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
     return dp_packet_batch_size(packets_);
 }
 
+static bool
+has_tunnel_pop_action(struct ofpbuf *actions, odp_port_t *portno)
+{
+    int pop_act_cnt = 0;
+    int act_num = 0;
+    int act_index = 0;
+
+    const struct nlattr *a;
+    size_t left;
+    NL_ATTR_FOR_EACH_UNSAFE (a, left, actions->data, actions->size) {
+        act_num++;
+        if (nl_attr_type(a) == OVS_ACTION_ATTR_TUNNEL_POP) {
+            *portno = nl_attr_get_odp_port(a);
+            act_index = act_num;
+            pop_act_cnt++;
+        }
+    }
+
+    /* Return true if actions has pop action and is the last action
+     * in the list. */
+    if ((pop_act_cnt == 1) && (act_index == act_num)) {
+        return true;
+    }
+
+    return false;
+}
+
+static inline int
+handle_packet_tnl_upcall(struct dp_netdev_pmd_thread *pmd,
+                         struct dp_packet *packet, struct match *match,
+                         ovs_u128 *ufid, const struct netdev_flow_key *key,
+                         struct ofpbuf *actions, struct ofpbuf *put_actions,
+                         int *lost_cnt)
+{
+    int error;
+
+    /* Process tnl header:
+     *   (a) Take temporary copy of packet.
+     *   (b) Reset tunnel ip_dst field so that miniflow extraction
+     *       would skip parsing pre-extracted tunnel info in metadata.
+     *       Tunnel header is popped in packet_mf_extract(), the
+     *       inner header and data is extracted now.
+     *   (c) Invoke upcall processing and store the actions in
+     *       tmp_actions, which later get copied in to actions.
+     */
+    struct dp_packet tmp_pkt;
+    memcpy(&tmp_pkt, packet, sizeof(struct dp_packet));
+
+    tmp_pkt.md.tunnel.ip_dst = 0;
+    struct netdev_flow_key tkey;
+    miniflow_extract(&tmp_pkt, &tkey.mf);
+    miniflow_expand(&tkey.mf, &match->flow);
+
+    uint64_t actions_stub[512 / 8];
+    struct ofpbuf tmp_actions;
+    ofpbuf_use_stub(&tmp_actions, actions_stub, sizeof actions_stub);
+    error = dp_netdev_upcall(pmd, packet, &match->flow, &match->wc,
+                         ufid, DPIF_UC_MISS, NULL, &tmp_actions,
+                         put_actions, false);
+    if (OVS_UNLIKELY(error && error != ENOSPC)) {
+        dp_packet_delete(packet);
+        (*lost_cnt)++;
+        return error;
+    } else {
+        /* Append the actions in tmp_actions to 'actions'. */
+        ofpbuf_put(actions, tmp_actions.data, tmp_actions.size);
+    }
+
+    odp_port_t tnl_portno = ODPP_NONE;
+    if (has_tunnel_pop_action(actions, &tnl_portno)) {
+        miniflow_expand(&key->mf, &match->flow);
+        dpif_flow_hash(pmd->dp->dpif, &match->flow, sizeof match->flow, ufid);
+        ofpbuf_use_stub(&tmp_actions, actions_stub, sizeof actions_stub);
+
+        if (tnl_portno != ODPP_NONE) {
+           /* Update the flow in_port to tunnel port to pretend as if packet
+              just arrived from tunnel port. */
+            match->flow.in_port.ofp_port = tnl_portno;
+
+            error = dp_netdev_upcall(pmd, packet, &match->flow, &match->wc,
+                         ufid, DPIF_UC_MISS, NULL, &tmp_actions,
+                         put_actions, true);
+            if (OVS_UNLIKELY(error && error != ENOSPC)) {
+                dp_packet_delete(packet);
+                (*lost_cnt)++;
+                return error;
+            } else {
+                /* Append the actions in tmp_actions to actions. */
+                ofpbuf_put(actions, tmp_actions.data, tmp_actions.size);
+            }
+
+            /* Reset the inport of the flow to the original port the packet
+               arrived. */
+            match->flow.in_port.odp_port = packet->md.in_port.odp_port;
+
+            /* Enable tunnel mask. */
+            memset(&match->wc.masks.tunnel, 0xff, sizeof(struct flow_tnl));
+        }
+    }
+
+    return error;
+}
+
 static inline void
 handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
                      struct dp_packet *packet,
@@ -5110,19 +5216,24 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
     int error;
 
     match.tun_md.valid = false;
-    miniflow_expand(&key->mf, &match.flow);
-
     ofpbuf_clear(actions);
     ofpbuf_clear(put_actions);
 
-    dpif_flow_hash(pmd->dp->dpif, &match.flow, sizeof match.flow, &ufid);
-    error = dp_netdev_upcall(pmd, packet, &match.flow, &match.wc,
+    if (dp_packet_is_tunnel(packet, NULL)) {
+        error = handle_packet_tnl_upcall(pmd, packet, &match, &ufid, key,
+                                         actions, put_actions, lost_cnt);
+    } else {
+        miniflow_expand(&key->mf, &match.flow);
+        dpif_flow_hash(pmd->dp->dpif, &match.flow, sizeof match.flow, &ufid);
+
+        error = dp_netdev_upcall(pmd, packet, &match.flow, &match.wc,
                              &ufid, DPIF_UC_MISS, NULL, actions,
-                             put_actions);
-    if (OVS_UNLIKELY(error && error != ENOSPC)) {
-        dp_packet_delete(packet);
-        (*lost_cnt)++;
-        return;
+                             put_actions, true);
+        if (OVS_UNLIKELY(error && error != ENOSPC)) {
+            dp_packet_delete(packet);
+            (*lost_cnt)++;
+            return;
+        }
     }
 
     /* The Netlink encoding of datapath flow keys cannot express
@@ -5470,7 +5581,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
 
     error = dp_netdev_upcall(pmd, packet, flow, NULL, ufid,
                              DPIF_UC_ACTION, userdata, actions,
-                             NULL);
+                             NULL, true);
     if (!error || error == ENOSPC) {
         dp_packet_batch_init_packet(&b, packet);
         dp_netdev_execute_actions(pmd, &b, may_steal, flow,
@@ -5482,7 +5593,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
 
 static void
 dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
-              const struct nlattr *a, bool may_steal)
+              const struct nlattr *a, bool may_steal, bool last_action)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     struct dp_netdev_execute_aux *aux = aux_;
@@ -5544,30 +5655,50 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
 
             p = pmd_tnl_port_cache_lookup(pmd, portno);
             if (p) {
-                struct dp_packet_batch tnl_pkt;
+                if (last_action) {
+                    struct dp_packet_batch tnl_pkt;
 
-                if (!may_steal) {
-                    dp_packet_batch_clone(&tnl_pkt, packets_);
-                    packets_ = &tnl_pkt;
-                    dp_packet_batch_reset_cutlen(orig_packets_);
-                }
+                    if (!may_steal) {
+                        dp_packet_batch_clone(&tnl_pkt, packets_);
+                        packets_ = &tnl_pkt;
+                        dp_packet_batch_reset_cutlen(orig_packets_);
+                    }
 
-                dp_packet_batch_apply_cutlen(packets_);
+                    dp_packet_batch_apply_cutlen(packets_);
+
+                    netdev_pop_header(p->port->netdev, packets_);
+                    if (dp_packet_batch_is_empty(packets_)) {
+                        return;
+                    }
 
-                netdev_pop_header(p->port->netdev, packets_);
-                if (dp_packet_batch_is_empty(packets_)) {
+                    struct dp_packet *packet;
+                    DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+                        packet->md.in_port.odp_port = portno;
+                    }
+
+                    (*depth)++;
+                    dp_netdev_recirculate(pmd, packets_);
+                    (*depth)--;
                     return;
-                }
+                } else {
+                    struct dp_packet *packet;
+                    size_t i, size = dp_packet_batch_size(packets_);
 
-                struct dp_packet *packet;
-                DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
-                    packet->md.in_port.odp_port = portno;
-                }
+                    dp_packet_batch_apply_cutlen(packets_);
 
-                (*depth)++;
-                dp_netdev_recirculate(pmd, packets_);
-                (*depth)--;
-                return;
+                    DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet,
+                                                     packets_) {
+                        packet = netdev_vxlan_reset_header(packet);
+                        if (packet) {
+                            reset_dp_packet_checksum_ol_flags(packet);
+                            dp_packet_batch_refill(packets_, packet, i);
+                        }
+                    }
+
+                    if (dp_packet_batch_is_empty(packets_)) {
+                        return;
+                    }
+                }
             }
         }
         break;
diff --git a/lib/dpif.c b/lib/dpif.c
index ab2e232..9999f7a 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1163,7 +1163,8 @@ struct dpif_execute_helper_aux {
  * meaningful. */
 static void
 dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
-                       const struct nlattr *action, bool may_steal)
+                       const struct nlattr *action, bool may_steal,
+                       bool last_action OVS_UNUSED)
 {
     struct dpif_execute_helper_aux *aux = aux_;
     int type = nl_attr_type(action);
diff --git a/lib/dpif.h b/lib/dpif.h
index 2e739dc..1f72ba2 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -843,7 +843,7 @@ typedef int upcall_callback(const struct dp_packet *packet,
                             struct ofpbuf *actions,
                             struct flow_wildcards *wc,
                             struct ofpbuf *put_actions,
-                            void *aux);
+                            void *aux, bool install_flow);
 
 void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);
 
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index fb5eab0..5f772aa 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -499,6 +499,14 @@ netdev_gre_build_header(const struct netdev *netdev,
 }
 
 struct dp_packet *
+netdev_vxlan_reset_header(struct dp_packet *packet)
+{
+    int hlen = packet->l4_ofs;
+    dp_packet_reset_packet(packet, hlen + VXLAN_HLEN);
+    return packet;
+}
+
+struct dp_packet *
 netdev_vxlan_pop_header(struct dp_packet *packet)
 {
     struct pkt_metadata *md = &packet->md;
diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h
index a912ce9..8471d92 100644
--- a/lib/netdev-native-tnl.h
+++ b/lib/netdev-native-tnl.h
@@ -56,6 +56,9 @@ netdev_vxlan_build_header(const struct netdev *netdev,
                           const struct netdev_tnl_build_header_params *params);
 
 struct dp_packet *
+netdev_vxlan_reset_header(struct dp_packet *packet);
+
+struct dp_packet *
 netdev_vxlan_pop_header(struct dp_packet *packet);
 
 static inline bool
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index ebaea31..47ae64b 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -712,7 +712,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                  * not need it any more. */
                 bool may_steal = steal && last_action;
 
-                dp_execute_action(dp, batch, a, may_steal);
+                dp_execute_action(dp, batch, a, may_steal, last_action);
 
                 if (last_action || batch->count == 0) {
                     /* We do not need to free the packets.
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 7223fe8..206fdba 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -29,7 +29,8 @@ struct pkt_metadata;
 struct dp_packet_batch;
 
 typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
-                               const struct nlattr *action, bool may_steal);
+                               const struct nlattr *action, bool may_steal,
+                               bool last_action);
 
 /* Actions that need to be executed in the context of a datapath are handed
  * to 'dp_execute_action', if non-NULL.  Currently this is called only for
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index cb5018d..58fc026 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -201,6 +201,7 @@ struct upcall {
     struct ofproto_dpif *ofproto;  /* Parent ofproto. */
     const struct recirc_id_node *recirc; /* Recirculation context. */
     bool have_recirc_ref;                /* Reference held on recirc ctx? */
+    bool install_flow;                   /* Install flow if set to true. */
 
     /* The flow and packet are only required to be constant when using
      * dpif-netdev.  If a modification is absolutely necessary, a const cast
@@ -383,7 +384,8 @@ static int upcall_receive(struct upcall *, const struct dpif_backer *,
                           const struct dp_packet *packet, enum dpif_upcall_type,
                           const struct nlattr *userdata, const struct flow *,
                           const unsigned int mru,
-                          const ovs_u128 *ufid, const unsigned pmd_id);
+                          const ovs_u128 *ufid, const unsigned pmd_id,
+                          bool install_flow);
 static void upcall_uninit(struct upcall *);
 
 static upcall_callback upcall_cb;
@@ -800,7 +802,7 @@ recv_upcalls(struct handler *handler)
 
         error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
                                dupcall->type, dupcall->userdata, flow, mru,
-                               &dupcall->ufid, PMD_ID_NULL);
+                               &dupcall->ufid, PMD_ID_NULL, true);
         if (error) {
             if (error == ENODEV) {
                 /* Received packet on datapath port for which we couldn't
@@ -1073,7 +1075,7 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
                const struct dp_packet *packet, enum dpif_upcall_type type,
                const struct nlattr *userdata, const struct flow *flow,
                const unsigned int mru,
-               const ovs_u128 *ufid, const unsigned pmd_id)
+               const ovs_u128 *ufid, const unsigned pmd_id, bool install_flow)
 {
     int error;
 
@@ -1101,6 +1103,7 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
 
     upcall->recirc = NULL;
     upcall->have_recirc_ref = false;
+    upcall->install_flow = install_flow;
     upcall->flow = flow;
     upcall->packet = packet;
     upcall->ufid = ufid;
@@ -1230,6 +1233,8 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
     } else if (upcall->recirc && !upcall->have_recirc_ref) {
         VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
         return false;
+    } else if (!upcall->install_flow) {
+        return false;
     }
 
     atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
@@ -1245,7 +1250,8 @@ static int
 upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
           unsigned pmd_id, enum dpif_upcall_type type,
           const struct nlattr *userdata, struct ofpbuf *actions,
-          struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
+          struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux,
+          bool install_flow)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
     struct udpif *udpif = aux;
@@ -1256,7 +1262,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi
     atomic_read_relaxed(&enable_megaflows, &megaflow);
 
     error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
-                           flow, 0, ufid, pmd_id);
+                           flow, 0, ufid, pmd_id, install_flow);
     if (error) {
         return error;
     }
-- 
2.4.11



More information about the dev mailing list