[ovs-dev] [PATCH v4 17/17] conntrack: Add 'dl_type' parameter to conntrack_execute().

Fri Jun 10 22:47:43 UTC 2016

Now that dpif_execute has a 'flow' member, it's pretty easy to access a
the flow (or the matching megaflow) in dp_execute_cb().

This means that's not necessary anymore for the connection tracker to
reextract 'dl_type' from the packet, it can be passed as a parameter.

This change means that we have to complicate sightly test-conntrack to
group the packets by dl_type before passing them to the connection
tracker.

Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
 lib/conntrack.c        | 47 ++++++++++++++++++---------------------------
 lib/conntrack.h        |  3 ++-
 lib/dpif-netdev.c      | 21 ++++++++++----------
 tests/test-conntrack.c | 52 +++++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 77 insertions(+), 46 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 93d12c3..49d9fb8 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -52,7 +52,8 @@ struct conn_lookup_ctx {
 };
 
 static bool conn_key_extract(struct conntrack *, struct dp_packet *,
-                             struct conn_lookup_ctx *, uint16_t zone);
+                             ovs_be16 dl_type, struct conn_lookup_ctx *,
+                             uint16_t zone);
 static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
 static void conn_key_reverse(struct conn_key *);
 static void conn_key_lookup(struct conntrack_bucket *ctb,
@@ -264,7 +265,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
  * 'setlabel' behaves similarly for the connection label.*/
 int
 conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
-                  bool commit, uint16_t zone, const uint32_t *setmark,
+                  ovs_be16 dl_type, bool commit, uint16_t zone,
+                  const uint32_t *setmark,
                   const struct ovs_key_ct_labels *setlabel,
                   const char *helper)
 {
@@ -298,7 +300,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
     for (i = 0; i < cnt; i++) {
         unsigned bucket;
 
-        if (!conn_key_extract(ct, pkts[i], &ctxs[i], zone)) {
+        if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) {
             write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO);
             continue;
         }
@@ -915,7 +917,7 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
 }
 
 static bool
-conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
+conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
                  struct conn_lookup_ctx *ctx, uint16_t zone)
 {
     const struct eth_header *l2 = dp_packet_l2(pkt);
@@ -939,43 +941,32 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
      *    We already have the l3 and l4 headers' pointers.  Extracting
      *    the l3 addresses and the l4 ports is really cheap, since they
      *    can be found at fixed locations.
-     * 2) To extract the l3 and l4 types.
-     *    Extracting the l3 and l4 types (especially the l3[1]) on the
-     *    other hand is quite expensive, because they're not at a
-     *    fixed location.
+     * 2) To extract the l4 type.
+     *    Extracting the l4 types, for IPv6 can be quite expensive, because
+     *    it's not at a fixed location.
      *
      * Here's a way to avoid (2) with the help of the datapath.
-     * The datapath doesn't keep the packet's extracted flow[2], so
+     * The datapath doesn't keep the packet's extracted flow[1], so
      * using that is not an option.  We could use the packet's matching
-     * megaflow for l3 type (it's always unwildcarded), and for l4 type
-     * (we have to unwildcard it first).  This means either:
+     * megaflow, but we have to make sure that the l4 type (nw_proto)
+     * is unwildcarded.  This means either:
      *
-     * a) dpif-netdev passes the matching megaflow to dp_execute_cb(), which
-     *    is used to extract the l3 type.  Unfortunately, dp_execute_cb() is
-     *    used also in dpif_netdev_execute(), which doesn't have a matching
-     *    megaflow.
+     * a) dpif-netdev unwildcards the l4 type when a new flow is installed
+     *    if the actions contains ct().
      *
-     * b) We define an alternative OVS_ACTION_ATTR_CT, used only by the
-     *    userspace datapath, which includes l3 (and l4) type.  The
-     *    alternative action could be generated by ofproto-dpif specifically
-     *    for the userspace datapath. Having a different interface for
-     *    userspace and kernel doesn't seem very clean, though.
+     * b) ofproto-dpif-xlate unwildcards the l4 type when translating a ct()
+     *    action.  This is already done in different actions, but it's
+     *    unnecessary for the kernel.
      *
      * ---
-     * [1] A simple benchmark (running only the connection tracker
-     *     over and over on the same packets) shows that if the
-     *     l3 type is already provided we are 15% faster (running the
-     *     connection tracker over a couple of DPDK devices with a
-     *     stream of UDP 64-bytes packets shows that we are 4% faster).
-     *
-     * [2] The reasons for this are that keeping the flow increases
+     * [1] The reasons for this are that keeping the flow increases
      *     (slightly) the cache footprint and increases computation
      *     time as we move the packet around. Most importantly, the flow
      *     should be updated by the actions and this can be slow, as
      *     we use a sparse representation (miniflow).
      *
      */
-    ctx->key.dl_type = parse_dl_type(l2, (char *) l3 - (char *) l2);
+    ctx->key.dl_type = dl_type;
     if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
         ok = extract_l3_ipv4(&ctx->key, l3, tail - (char *) l3, NULL, true);
     } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
diff --git a/lib/conntrack.h b/lib/conntrack.h
index d1f58ba..f15880b 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -65,7 +65,8 @@ void conntrack_init(struct conntrack *);
 void conntrack_destroy(struct conntrack *);
 
 int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
-                      bool commit, uint16_t zone, const uint32_t *setmark,
+                      ovs_be16 dl_type, bool commit,
+                      uint16_t zone, const uint32_t *setmark,
                       const struct ovs_key_ct_labels *setlabel,
                       const char *helper);
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3165bd3..e8dc8a7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -506,7 +506,7 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name,
                             bool create, struct dpif **);
 static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                                       struct dp_packet_batch *,
-                                      bool may_steal,
+                                      bool may_steal, const struct flow *flow,
                                       const struct nlattr *actions,
                                       size_t actions_len);
 static void dp_netdev_input(struct dp_netdev_pmd_thread *,
@@ -2477,8 +2477,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
     }
 
     packet_batch_init_packet(&pp, execute->packet);
-    dp_netdev_execute_actions(pmd, &pp, false, execute->actions,
-                              execute->actions_len);
+    dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
+                              execute->actions, execute->actions_len);
 
     if (pmd->core_id == NON_PMD_CORE_ID) {
         ovs_mutex_unlock(&dp->non_pmd_mutex);
@@ -3662,7 +3662,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
 
     actions = dp_netdev_flow_get_actions(flow);
 
-    dp_netdev_execute_actions(pmd, &batch->array, true,
+    dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
                               actions->actions, actions->size);
 }
 
@@ -3789,7 +3789,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet,
      * the actions.  Otherwise, if there are any slow path actions,
      * we'll send the packet up twice. */
     packet_batch_init_packet(&b, packet);
-    dp_netdev_execute_actions(pmd, &b, true,
+    dp_netdev_execute_actions(pmd, &b, true, &match.flow,
                               actions->data, actions->size);
 
     add_actions = put_actions->size ? put_actions : actions;
@@ -3959,6 +3959,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
 
 struct dp_netdev_execute_aux {
     struct dp_netdev_pmd_thread *pmd;
+    const struct flow *flow;
 };
 
 static void
@@ -4028,7 +4029,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
                              NULL);
     if (!error || error == ENOSPC) {
         packet_batch_init_packet(&b, packet);
-        dp_netdev_execute_actions(pmd, &b, may_steal,
+        dp_netdev_execute_actions(pmd, &b, may_steal, flow,
                                   actions->data, actions->size);
     } else if (may_steal) {
         dp_packet_delete(packet);
@@ -4195,8 +4196,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
             }
         }
 
-        conntrack_execute(&dp->conntrack, packets_, commit, zone,
-                          setmark, setlabel, helper);
+        conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit,
+                          zone, setmark, setlabel, helper);
         break;
     }
 
@@ -4219,10 +4220,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
 static void
 dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                           struct dp_packet_batch *packets,
-                          bool may_steal,
+                          bool may_steal, const struct flow *flow,
                           const struct nlattr *actions, size_t actions_len)
 {
-    struct dp_netdev_execute_aux aux = { pmd };
+    struct dp_netdev_execute_aux aux = {pmd, flow};
 
     odp_execute_actions(&aux, packets, may_steal, actions,
                         actions_len, dp_execute_cb);
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index 84ee676..de0106f 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -30,7 +30,7 @@ static const char payload[] = "50540000000a50540000000908004500001c0000000000"
                               "11a4cd0a0101010a0101020001000200080000";
 
 static struct dp_packet_batch *
-prepare_packets(size_t n, bool change, unsigned tid)
+prepare_packets(size_t n, bool change, unsigned tid, ovs_be16 *dl_type)
 {
     struct dp_packet_batch *pkt_batch = xzalloc(sizeof *pkt_batch);
     struct flow flow;
@@ -56,8 +56,10 @@ prepare_packets(size_t n, bool change, unsigned tid)
         }
 
         pkt_batch->packets[i] = pkt;
+        *dl_type = flow.dl_type;
     }
 
+
     return pkt_batch;
 }
 
@@ -83,12 +85,13 @@ ct_thread_main(void *aux_)
 {
     struct thread_aux *aux = aux_;
     struct dp_packet_batch *pkt_batch;
+    ovs_be16 dl_type;
     size_t i;
 
-    pkt_batch = prepare_packets(batch_size, change_conn, aux->tid);
+    pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
     pthread_barrier_wait(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
-        conntrack_execute(&ct, pkt_batch, true, 0, NULL, NULL, NULL);
+        conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL);
     }
     pthread_barrier_wait(&barrier);
     destroy_packets(pkt_batch);
@@ -148,6 +151,44 @@ test_benchmark(struct ovs_cmdl_context *ctx)
 }
 
 static void
+pcap_batch_execute_conntrack(struct conntrack *ct,
+                             struct dp_packet_batch *pkt_batch)
+{
+    size_t i;
+    struct dp_packet_batch new_batch;
+    ovs_be16 dl_type = htons(0);
+
+    dp_packet_batch_init(&new_batch);
+
+    /* pkt_batch contains packets with different 'dl_type'. We have to 
+     * call conntrack_execute() on packets with the same 'dl_type'. */
+
+    for (i = 0; i < pkt_batch->count; i++) {
+        struct dp_packet *pkt = pkt_batch->packets[i];
+        struct flow flow;
+
+        /* This also initializes the l3 and l4 pointers. */
+        flow_extract(pkt, &flow);
+
+        if (!new_batch.count) {
+            dl_type = flow.dl_type;
+        }
+
+        if (flow.dl_type != dl_type) {
+            conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
+                              NULL);
+            dp_packet_batch_init(&new_batch);
+        }
+        new_batch.packets[new_batch.count++] = pkt;
+    }
+
+    if (new_batch.count) {
+        conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL);
+    }
+
+}
+
+static void
 test_pcap(struct ovs_cmdl_context *ctx)
 {
     size_t total_count, i, batch_size;
@@ -179,13 +220,10 @@ test_pcap(struct ovs_cmdl_context *ctx)
         dp_packet_batch_init(&pkt_batch);
 
         for (i = 0; i < batch_size; i++) {
-            struct flow dummy_flow;
-
             err = ovs_pcap_read(pcap, &pkt_batch.packets[i], NULL);
             if (err) {
                 break;
             }
-            flow_extract(pkt_batch.packets[i], &dummy_flow);
         }
 
         pkt_batch.count = i;
@@ -193,7 +231,7 @@ test_pcap(struct ovs_cmdl_context *ctx)
             break;
         }
 
-        conntrack_execute(&ct, &pkt_batch, true, 0, NULL, NULL, NULL);
+        pcap_batch_execute_conntrack(&ct, &pkt_batch);
 
         for (i = 0; i < pkt_batch.count; i++) {
             struct ds ds = DS_EMPTY_INITIALIZER;
-- 
2.8.1