[ovs-dev] [PATCH v5] tunneling: Avoid recirculation on datapath by computing the recirculate actions at translate time.

Zoltán Balogh zoltan.balogh at ericsson.com
Tue May 9 11:46:37 UTC 2017


Hi,

Thank you for your comments. Actually, I was wrong in my previous e-mail when I stated that packet is truncated only at the end of the pipeline, when output is applied. The packet size is set according max_len set by truncate when tunnel_push is applied. So the size of packet is correct.

The root cause of the problem, why stats in underlay bridge is wrong, is that statistics will be incremented by the packet number and packet size set by the upcall_xlate(). 

static void
upcall_xlate(struct udpif *udpif, struct upcall *upcall,
             struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
    struct dpif_flow_stats stats;
    struct xlate_in xin;

    stats.n_packets = 1;
    stats.n_bytes = dp_packet_size(upcall->packet);
    stats.used = time_msec();
    stats.tcp_flags = ntohs(upcall->flow->tcp_flags);
...
}

Since we excluded recirculation in the "Avoid recirculation" patch, there is no second upcall when packet enters tunnel, but stats created by "first" and only upcall are used for statistics of both integrate and underlay bridges. And that's not correct.

I completed my old patch to solve this problem by adding two new members to struct rule_dpif and modify stats with their values.

Here comes the patch:


Since tunneling and forwarding via patch port uses clone action, the tunneled
packet will not be parsed by miniflow_extract() and flow data will not be
updated within clone. So when a tunnel header is added to the packet, the MAC
and IP data of struct flow will not be updated according to the newly created
tunnel header.

This patch stores MAC and IP data of flow before calling
apply_nested_clone_actions() in build_tunnel_send(), then restores the data
after apply_nested_clone_actions() has returned.

Furthermore, it introduces truncate_packet_size and tunnel_header_size struct
rule_dpif members in order to correct n_bytes statistics of OF flows.
---
 ofproto/ofproto-dpif-xlate.c | 92 ++++++++++++++++++++++++++++++++++++++++++++
 ofproto/ofproto-dpif.c       | 14 ++++++-
 ofproto/ofproto-dpif.h       |  5 +++
 tests/ofproto-dpif.at        | 11 +++++-
 4 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index b308f21..55015d7 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3141,6 +3141,33 @@ tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev,
     dp_packet_uninit(&packet);
 }
 
+/*
+ * Copy IP data of 'flow->tunnel' into 'flow' and 'base_flow'.
+ */
+static void
+propagate_tunnel_data_to_flow(struct flow *flow, struct flow *base_flow)
+{
+    flow->nw_dst = flow->tunnel.ip_dst;
+    flow->nw_src = flow->tunnel.ip_src;
+    flow->ipv6_dst = flow->tunnel.ipv6_dst;
+    flow->ipv6_src = flow->tunnel.ipv6_src;
+
+    flow->nw_tos = flow->tunnel.ip_tos;
+    flow->nw_ttl = flow->tunnel.ip_ttl;
+    flow->tp_dst = flow->tunnel.tp_dst;
+    flow->tp_src = flow->tunnel.tp_src;
+
+    base_flow->nw_dst = flow->nw_dst;
+    base_flow->nw_src = flow->nw_src;
+    base_flow->ipv6_dst = flow->ipv6_dst;
+    base_flow->ipv6_src = flow->ipv6_src;
+
+    base_flow->nw_tos = flow->nw_tos;
+    base_flow->nw_ttl = flow->nw_ttl;
+    base_flow->tp_dst = flow->tp_dst;
+    base_flow->tp_src = flow->tp_src;
+}
+
 static int
 build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
                   const struct flow *flow, odp_port_t tunnel_odp_port)
@@ -3156,6 +3183,11 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
     int err;
     char buf_sip6[INET6_ADDRSTRLEN];
     char buf_dip6[INET6_ADDRSTRLEN];
+    /* Structures to backup Ethernet and IP data of flow and base_flow. */
+    struct flow old_flow = ctx->xin->flow;
+    struct flow old_base_flow = ctx->base_flow;
+    /* Variable to backup actual tunnel header size. */
+    uint64_t old_ths = 0;
 
     err = tnl_route_lookup_flow(flow, &d_ip6, &s_ip6, &out_dev);
     if (err) {
@@ -3216,16 +3248,57 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
 
+    /* After tunnel header has been added, MAC and IP data of flow and
+     * base_flow need to be set properly, since there is not recirculation
+     * any more when sending packet to tunnel. */
+    if (!eth_addr_is_zero(ctx->xin->flow.dl_dst)) {
+        ctx->xin->flow.dl_dst = dmac;
+        ctx->base_flow.dl_dst = ctx->xin->flow.dl_dst;
+    }
+
+    ctx->xin->flow.dl_src = smac;
+    ctx->base_flow.dl_src = ctx->xin->flow.dl_src;
+
+    propagate_tunnel_data_to_flow(&ctx->xin->flow, &ctx->base_flow);
+
+    if (!tnl_params.is_ipv6) {
+        ctx->xin->flow.dl_type = htons(ETH_TYPE_IP);
+    } else {
+        ctx->xin->flow.dl_type = htons(ETH_TYPE_IPV6);
+    }
+
+    if (tnl_push_data.tnl_type == OVS_VPORT_TYPE_GRE) {
+        ctx->xin->flow.nw_proto = IPPROTO_GRE;
+    } else {
+        ctx->xin->flow.nw_proto = IPPROTO_UDP;
+    }
+    ctx->base_flow.nw_proto = ctx->xin->flow.nw_proto;
+
     size_t push_action_size = 0;
     size_t clone_ofs = nl_msg_start_nested(ctx->odp_actions,
                                            OVS_ACTION_ATTR_CLONE);
     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
     push_action_size = ctx->odp_actions->size;
+
+    if (ctx->rule) {
+        old_ths = ctx->rule->tunnel_header_size;
+        ctx->rule->tunnel_header_size = tnl_push_data.header_len;
+    }
+
     apply_nested_clone_actions(ctx, xport, out_dev);
     if (ctx->odp_actions->size > push_action_size) {
         /* Update the CLONE action only when combined */
         nl_msg_end_nested(ctx->odp_actions, clone_ofs);
     }
+
+    if (ctx->rule) {
+        ctx->rule->tunnel_header_size = old_ths;
+    }
+
+    /* Restore flow and base_flow data. */
+    ctx->xin->flow = old_flow;
+    ctx->base_flow = old_base_flow;
+
     return 0;
 }
 
@@ -3728,6 +3801,10 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
         }
 
         if (rule) {
+            if (ctx->rule) {
+                rule->truncated_packet_size = ctx->rule->truncated_packet_size;
+                rule->tunnel_header_size = ctx->rule->tunnel_header_size;
+            }
             /* Fill in the cache entry here instead of xlate_recursively
              * to make the reference counting more explicit.  We take a
              * reference in the lookups above if we are going to cache the
@@ -4602,6 +4679,8 @@ xlate_output_trunc_action(struct xlate_ctx *ctx,
     bool support_trunc = ctx->xbridge->support.trunc;
     struct ovs_action_trunc *trunc;
     char name[OFP10_MAX_PORT_NAME_LEN];
+    /* Variable to backup truncate max len. */
+    uint64_t old_tps = 0;
 
     switch (port) {
     case OFPP_TABLE:
@@ -4634,7 +4713,20 @@ xlate_output_trunc_action(struct xlate_ctx *ctx,
                                 OVS_ACTION_ATTR_TRUNC,
                                 sizeof *trunc);
             trunc->max_len = max_len;
+
+            /* Update truncate correction. */
+            if (ctx->rule) {
+                old_tps = ctx->rule->truncated_packet_size;
+                ctx->rule->truncated_packet_size = max_len;
+            }
+
             xlate_output_action(ctx, port, max_len, false);
+
+            /* Restore truncate correction. */
+            if (ctx->rule) {
+                ctx->rule->truncated_packet_size = old_tps;
+            }
+
             if (!support_trunc) {
                 ctx->xout->slow |= SLOW_ACTION;
             }
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index dc5f004..800d0f6 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3959,8 +3959,18 @@ rule_dpif_credit_stats__(struct rule_dpif *rule,
     OVS_REQUIRES(rule->stats_mutex)
 {
     if (credit_counts) {
+        uint64_t stats_n_bytes = 0;
+
+        if (rule->truncated_packet_size) {
+            stats_n_bytes = MIN(rule->truncated_packet_size, stats->n_bytes);
+        } else {
+            stats_n_bytes = stats->n_bytes;
+        }
+
+        stats_n_bytes += rule->tunnel_header_size;
+
         rule->stats.n_packets += stats->n_packets;
-        rule->stats.n_bytes += stats->n_bytes;
+        rule->stats.n_bytes += stats_n_bytes;
     }
     rule->stats.used = MAX(rule->stats.used, stats->used);
 }
@@ -4153,6 +4163,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
             entry->table.match = (rule != NULL);
         }
         if (rule) {
+            rule->truncated_packet_size = 0;
+            rule->tunnel_header_size = 0;
             goto out;   /* Match. */
         }
         if (honor_table_miss) {
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index a3a6f1f..748df4c 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -93,6 +93,11 @@ struct rule_dpif {
      * The recirculation id and associated internal flow should
      * be freed when the rule is freed */
     uint32_t recirc_id;
+
+    /* Variables to be used to correct statistic when packet is sent to tunnel
+     * or truncated. */
+    uint64_t truncated_packet_size;
+    uint64_t tunnel_header_size;
 };
 
 struct rule_dpif *rule_dpif_lookup_from_table(struct ofproto_dpif *,
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index e52ab32..1f6cd84 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -6257,6 +6257,15 @@ HEADER
 	dgramSeqNo=1
 	ds=127.0.0.1>2:1000
 	fsSeqNo=1
+	tunnel4_out_length=0
+	tunnel4_out_protocol=47
+	tunnel4_out_src=1.1.2.88
+	tunnel4_out_dst=1.1.2.92
+	tunnel4_out_src_port=0
+	tunnel4_out_dst_port=0
+	tunnel4_out_tcp_flags=0
+	tunnel4_out_tos=0
+	tunnel_out_vni=456
 	in_vlan=0
 	in_priority=0
 	out_vlan=0
@@ -6266,7 +6275,7 @@ HEADER
 	dropEvents=0
 	in_ifindex=2011
 	in_format=0
-	out_ifindex=2
+	out_ifindex=1
 	out_format=2
 	hdr_prot=1
 	pkt_len=46
-- 
2.7.4


More information about the dev mailing list