[ovs-dev] [PATCH] tunneling: Combining actions to avoid packet recirculation in tunnel encapsulation.

Sugesh Chandran sugesh.chandran at intel.com
Fri Dec 30 15:36:32 UTC 2016


Currently the tunnel packets are recirculated in OVS after the tunnel
encapsulation. This patch combines the post tunnel actions with the tunnel
push to avoid unnecessary recirculation.
Two new datapath actions OVS_ATTR_COMBINE_START and OVS_ATTR_COMBINE_END are
added to implement this feature. These dummy new actions  used as a delimiters
for combined action subset.
In our testing this patch improves the VxLAN encapsulation performance upto 30%.

Signed-off-by: Sugesh Chandran <sugesh.chandran at intel.com>
Signed-off-by: Zoltán Balogh <zoltan.balogh at ericsson.com>
Co-authored-by: Zoltán Balogh <zoltan.balogh at ericsson.com>
---
 datapath/linux/compat/include/linux/openvswitch.h |  2 +
 lib/dpif-netdev.c                                 | 22 ++----
 lib/dpif.c                                        |  2 +
 lib/odp-execute.c                                 | 59 ++++++++++++++-
 lib/odp-util.c                                    | 68 ++++++++++++++++-
 lib/odp-util.h                                    |  6 ++
 ofproto/ofproto-dpif-sflow.c                      |  2 +
 ofproto/ofproto-dpif-xlate.c                      | 89 +++++++++++++++++++++++
 8 files changed, 228 insertions(+), 22 deletions(-)

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 12260d8..4e9c7f7 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -818,6 +818,8 @@ enum ovs_action_attr {
 #ifndef __KERNEL__
 	OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
 	OVS_ACTION_ATTR_TUNNEL_POP,    /* u32 port number. */
+	OVS_ACTION_ATTR_START_COMBINE,
+	OVS_ACTION_ATTR_END_COMBINE,
 #endif
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0b73056..2e59bd1 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4430,24 +4430,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
 
     case OVS_ACTION_ATTR_TUNNEL_PUSH:
         if (*depth < MAX_RECIRC_DEPTH) {
-            struct dp_packet_batch tnl_pkt;
-            struct dp_packet_batch *orig_packets_ = packets_;
-            int err;
-
-            if (!may_steal) {
-                dp_packet_batch_clone(&tnl_pkt, packets_);
-                packets_ = &tnl_pkt;
-                dp_packet_batch_reset_cutlen(orig_packets_);
-            }
-
             dp_packet_batch_apply_cutlen(packets_);
-
-            err = push_tnl_action(pmd, a, packets_);
-            if (!err) {
-                (*depth)++;
-                dp_netdev_recirculate(pmd, packets_);
-                (*depth)--;
-            }
+            push_tnl_action(pmd, a, packets_);
             return;
         }
         break;
@@ -4597,6 +4581,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         break;
     }
 
+    case OVS_ACTION_ATTR_START_COMBINE:
+        break;
+    case OVS_ACTION_ATTR_END_COMBINE:
+        break;
     case OVS_ACTION_ATTR_PUSH_VLAN:
     case OVS_ACTION_ATTR_POP_VLAN:
     case OVS_ACTION_ATTR_PUSH_MPLS:
diff --git a/lib/dpif.c b/lib/dpif.c
index 53958c5..6477f33 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1187,6 +1187,8 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
     case OVS_ACTION_ATTR_SAMPLE:
     case OVS_ACTION_ATTR_TRUNC:
     case OVS_ACTION_ATTR_UNSPEC:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
     case __OVS_ACTION_ATTR_MAX:
         OVS_NOT_REACHED();
     }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 4fcff16..c475f98 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -532,6 +532,8 @@ requires_datapath_assistance(const struct nlattr *a)
     case OVS_ACTION_ATTR_USERSPACE:
     case OVS_ACTION_ATTR_RECIRC:
     case OVS_ACTION_ATTR_CT:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
         return true;
 
     case OVS_ACTION_ATTR_SET:
@@ -553,6 +555,29 @@ requires_datapath_assistance(const struct nlattr *a)
     return false;
 }
 
+static inline size_t
+find_combine_action_end(const struct nlattr **actions_, size_t *actions_len,
+                        const struct nlattr **comb_start,
+                        bool *comb_last_action)
+{
+    const struct nlattr *a;
+    const struct nlattr *actions = *actions_;
+    unsigned int left;
+    bool last_action;
+    size_t comb_size = nl_attr_get_u32(actions); /* Size of combined actions */
+    *actions_len -= NLA_ALIGN(actions->nla_len);
+    actions = nl_attr_next(actions);
+
+    *comb_start = actions;
+    a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size));
+    left = (*actions_len) - comb_size;
+    last_action = (left <= NLA_ALIGN(a->nla_len));
+    *actions_ = a;
+    *comb_last_action = last_action ? true : false;
+    *actions_len = left;
+    return comb_size;
+}
+
 void
 odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                     const struct nlattr *actions, size_t actions_len,
@@ -561,7 +586,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
     struct dp_packet **packets = batch->packets;
     int cnt = batch->count;
     const struct nlattr *a;
-    unsigned int left;
+    size_t left;
     int i;
 
     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
@@ -573,7 +598,35 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                 /* Allow 'dp_execute_action' to steal the packet data if we do
                  * not need it any more. */
                 bool may_steal = steal && last_action;
-
+                if ((enum ovs_action_attr) type ==
+                                           OVS_ACTION_ATTR_START_COMBINE) {
+                    size_t comb_action_len;
+                    const struct nlattr *comb_actions = NULL;
+                    bool comb_last_action = true;
+                    comb_action_len = find_combine_action_end(&a, &left,
+                                                            &comb_actions,
+                                                            &comb_last_action);
+                    if (OVS_UNLIKELY(!comb_action_len)) {
+                        /* No combine present, execute current action */
+                        goto execute;
+                    }
+                    if (OVS_LIKELY(comb_last_action)) {
+                        odp_execute_actions(dp, batch, 1, comb_actions,
+                                        comb_action_len, dp_execute_action);
+                        return;
+                    }
+                    else  {
+                        /* Packets to be duplicated for the sub action */
+                        struct dp_packet_batch comb_pkt_batch;
+                        dp_packet_batch_clone(&comb_pkt_batch, batch);
+                        dp_packet_batch_reset_cutlen(batch);
+                        odp_execute_actions(dp, &comb_pkt_batch, steal,
+                                        comb_actions, comb_action_len,
+                                        dp_execute_action);
+                    }
+                    continue;
+                }
+execute:
                 dp_execute_action(dp, batch, a, may_steal);
 
                 if (last_action) {
@@ -683,6 +736,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_RECIRC:
         case OVS_ACTION_ATTR_CT:
         case OVS_ACTION_ATTR_UNSPEC:
+        case OVS_ACTION_ATTR_START_COMBINE:
+        case OVS_ACTION_ATTR_END_COMBINE:
         case __OVS_ACTION_ATTR_MAX:
             OVS_NOT_REACHED();
         }
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 427dd65..9cb8629 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -121,6 +121,8 @@ odp_action_len(uint16_t type)
     case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
     case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
     case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;
+    case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t);
+    case OVS_ACTION_ATTR_END_COMBINE: return 0;
 
     case OVS_ACTION_ATTR_UNSPEC:
     case __OVS_ACTION_ATTR_MAX:
@@ -509,7 +511,7 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
                       gnh->oam ? "oam," : "",
                       gnh->critical ? "crit," : "",
                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
- 
+
         if (gnh->opt_len) {
             ds_put_cstr(ds, ",options(");
             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
@@ -865,6 +867,17 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
     case OVS_ACTION_ATTR_CT:
         format_odp_conntrack_action(ds, a);
         break;
+    case OVS_ACTION_ATTR_START_COMBINE: {
+        uint32_t comb_len;
+        comb_len = nl_attr_get_u32(a);
+        if (comb_len) {
+            ds_put_cstr(ds,"{");
+        }
+        break;
+    }
+    case OVS_ACTION_ATTR_END_COMBINE:
+        ds_put_cstr(ds,"}");
+        break;
     case OVS_ACTION_ATTR_UNSPEC:
     case __OVS_ACTION_ATTR_MAX:
     default:
@@ -873,6 +886,34 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
     }
 }
 
+static bool
+is_action_combined(const struct nlattr *a, size_t len)
+{
+    bool last_action = false;
+    last_action = (len <= NLA_ALIGN(a->nla_len));
+    enum ovs_action_attr type = nl_attr_type(a);
+    if (type ==  OVS_ACTION_ATTR_START_COMBINE) {
+        uint32_t comb_len;
+        comb_len = nl_attr_get_u32(a);
+        if (comb_len) {
+            return true;
+        }
+    }
+    else if (type == OVS_ACTION_ATTR_END_COMBINE) {
+        return true;
+    }
+    if (!last_action) {
+        type =  nl_attr_type(nl_attr_next(a));
+        if (type == OVS_ACTION_ATTR_END_COMBINE) {
+            return true;
+        }
+    }
+    else {
+        return true;
+    }
+    return false;
+}
+
 void
 format_odp_actions(struct ds *ds, const struct nlattr *actions,
                    size_t actions_len)
@@ -882,10 +923,10 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions,
         unsigned int left;
 
         NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
-            if (a != actions) {
+            format_odp_action(ds, a);
+            if (!is_action_combined(a, left)) {
                 ds_put_char(ds, ',');
             }
-            format_odp_action(ds, a);
         }
         if (left) {
             int i;
@@ -5338,6 +5379,27 @@ odp_put_tunnel_action(const struct flow_tnl *tunnel,
 }
 
 void
+odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t action_len)
+{
+    nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE, action_len);
+}
+
+void
+odp_update_combine_start_action(struct nlattr *start_combine,
+                                uint32_t combine_size)
+{
+    uint32_t *combine_len;
+    combine_len = (uint32_t *)(start_combine + 1);
+    *combine_len = combine_size;
+}
+
+void
+odp_put_combine_end_action(struct ofpbuf *odp_actions)
+{
+    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE);
+}
+
+void
 odp_put_tnl_push_action(struct ofpbuf *odp_actions,
                         struct ovs_action_push_tnl *data)
 {
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 42011bc..b77faee 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -308,4 +308,10 @@ void odp_put_tunnel_action(const struct flow_tnl *tunnel,
 
 void odp_put_tnl_push_action(struct ofpbuf *odp_actions,
                              struct ovs_action_push_tnl *data);
+void odp_put_combine_end_action(struct ofpbuf *odp_actions);
+void odp_put_combine_start_action(struct ofpbuf *odp_actions,
+                                  uint32_t action_len);
+void
+odp_update_combine_start_action(struct nlattr *start_combine,
+                                uint32_t combine_size);
 #endif /* odp-util.h */
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index 37992b4..893c1a5 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -1163,6 +1163,8 @@ dpif_sflow_read_actions(const struct flow *flow,
 	}
 	case OVS_ACTION_ATTR_SAMPLE:
 	case OVS_ACTION_ATTR_UNSPEC:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
 	case __OVS_ACTION_ATTR_MAX:
 	default:
 	    break;
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 2977be5..f26896f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -410,6 +410,9 @@ const char *xlate_strerror(enum xlate_error error)
 static void xlate_action_set(struct xlate_ctx *ctx);
 static void xlate_commit_actions(struct xlate_ctx *ctx);
 
+static int
+combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev);
+
 static void
 ctx_trigger_freeze(struct xlate_ctx *ctx)
 {
@@ -2865,7 +2868,22 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
     }
     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
+
+    struct nlattr *start_combine;
+    uint32_t action_size = 0;
+    uint32_t push_action_size = 0;
+    start_combine = ofpbuf_tail(ctx->odp_actions);
+    odp_put_combine_start_action(ctx->odp_actions, 0);
+    action_size = ctx->odp_actions->size;
     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
+    push_action_size = ctx->odp_actions->size;
+    combine_tunnel_actions(ctx, out_dev);
+    if (ctx->odp_actions->size > push_action_size) {
+        /* Update the combine action only when combined */
+        odp_update_combine_start_action(start_combine,
+                                    (ctx->odp_actions->size - action_size));
+        odp_put_combine_end_action(ctx->odp_actions);
+    }
     return 0;
 }
 
@@ -2903,6 +2921,77 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co
             xport_in->xbundle->protected && xport_out->xbundle->protected);
 }
 
+static int
+combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev)
+{
+    struct flow *flow = &ctx->xin->flow;
+    struct flow old_flow = ctx->xin->flow;
+    bool old_conntrack = ctx->conntracked;
+    bool old_was_mpls = ctx->was_mpls;
+    ovs_version_t old_version = ctx->xin->tables_version;
+    struct ofpbuf old_stack = ctx->stack;
+    union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
+    struct ofpbuf old_action_set = ctx->action_set;
+    uint64_t actset_stub[1024 / 8];
+    const struct xbridge *xbridge_old = ctx->xbridge;
+
+    ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
+    ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
+
+    struct xbridge *xbridge = NULL;
+
+    xbridge = out_dev->xbridge;
+
+    ctx->xbridge = xbridge;   /* peer bridge replaced*/
+    flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */
+    flow->metadata = htonll(0);
+    memset(&flow->tunnel, 0, sizeof flow->tunnel);
+    memset(flow->regs, 0, sizeof flow->regs);
+    flow->actset_output = OFPP_UNSET;
+    ctx->conntracked = false;
+    clear_conntrack(flow);
+
+    /* The bridge is now known so obtain its table version. */
+    ctx->xin->tables_version
+        = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
+
+    xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
+    if (!ctx->freezing) {
+        xlate_action_set(ctx);
+    }
+    if (ctx->freezing) {
+        finish_freezing(ctx);
+    }
+
+    ctx->xin->flow = old_flow;
+    ctx->xbridge = xbridge_old;
+    ofpbuf_uninit(&ctx->action_set);
+    ctx->action_set = old_action_set;
+    ofpbuf_uninit(&ctx->stack);
+    ctx->stack = old_stack;
+
+    /* Restore calling bridge's lookup version. */
+    ctx->xin->tables_version = old_version;
+
+    /* The peer bridge popping MPLS should have no effect on the original
+     * bridge. */
+    ctx->was_mpls = old_was_mpls;
+
+    /* The peer bridge's conntrack execution should have no effect on the
+     * original bridge. */
+    ctx->conntracked = old_conntrack;
+
+    /* The fact that the peer bridge exits (for any reason) does not mean
+     * that the original bridge should exit.  Specifically, if the peer
+     * bridge freezes translation, the original bridge must continue
+     * processing with the original, not the frozen packet! */
+    ctx->exit = false;
+
+    /* Peer bridge errors do not propagate back. */
+    ctx->error = XLATE_OK;
+
+    return 0;
+}
 
 static void
 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
-- 
2.7.4



More information about the dev mailing list