[ovs-dev] [PATCH v4 17/17] conntrack: Add 'dl_type' parameter to conntrack_execute().
Daniele Di Proietto
diproiettod at vmware.com
Fri Jun 10 22:47:43 UTC 2016
Now that dpif_execute has a 'flow' member, it's pretty easy to access a
the flow (or the matching megaflow) in dp_execute_cb().
This means that's not necessary anymore for the connection tracker to
reextract 'dl_type' from the packet, it can be passed as a parameter.
This change means that we have to complicate sightly test-conntrack to
group the packets by dl_type before passing them to the connection
tracker.
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
lib/conntrack.c | 47 ++++++++++++++++++---------------------------
lib/conntrack.h | 3 ++-
lib/dpif-netdev.c | 21 ++++++++++----------
tests/test-conntrack.c | 52 +++++++++++++++++++++++++++++++++++++++++++-------
4 files changed, 77 insertions(+), 46 deletions(-)
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 93d12c3..49d9fb8 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -52,7 +52,8 @@ struct conn_lookup_ctx {
};
static bool conn_key_extract(struct conntrack *, struct dp_packet *,
- struct conn_lookup_ctx *, uint16_t zone);
+ ovs_be16 dl_type, struct conn_lookup_ctx *,
+ uint16_t zone);
static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
static void conn_key_reverse(struct conn_key *);
static void conn_key_lookup(struct conntrack_bucket *ctb,
@@ -264,7 +265,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
* 'setlabel' behaves similarly for the connection label.*/
int
conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
- bool commit, uint16_t zone, const uint32_t *setmark,
+ ovs_be16 dl_type, bool commit, uint16_t zone,
+ const uint32_t *setmark,
const struct ovs_key_ct_labels *setlabel,
const char *helper)
{
@@ -298,7 +300,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
for (i = 0; i < cnt; i++) {
unsigned bucket;
- if (!conn_key_extract(ct, pkts[i], &ctxs[i], zone)) {
+ if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) {
write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO);
continue;
}
@@ -915,7 +917,7 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
}
static bool
-conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
+conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
struct conn_lookup_ctx *ctx, uint16_t zone)
{
const struct eth_header *l2 = dp_packet_l2(pkt);
@@ -939,43 +941,32 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
* We already have the l3 and l4 headers' pointers. Extracting
* the l3 addresses and the l4 ports is really cheap, since they
* can be found at fixed locations.
- * 2) To extract the l3 and l4 types.
- * Extracting the l3 and l4 types (especially the l3[1]) on the
- * other hand is quite expensive, because they're not at a
- * fixed location.
+ * 2) To extract the l4 type.
+ * Extracting the l4 types, for IPv6 can be quite expensive, because
+ * it's not at a fixed location.
*
* Here's a way to avoid (2) with the help of the datapath.
- * The datapath doesn't keep the packet's extracted flow[2], so
+ * The datapath doesn't keep the packet's extracted flow[1], so
* using that is not an option. We could use the packet's matching
- * megaflow for l3 type (it's always unwildcarded), and for l4 type
- * (we have to unwildcard it first). This means either:
+ * megaflow, but we have to make sure that the l4 type (nw_proto)
+ * is unwildcarded. This means either:
*
- * a) dpif-netdev passes the matching megaflow to dp_execute_cb(), which
- * is used to extract the l3 type. Unfortunately, dp_execute_cb() is
- * used also in dpif_netdev_execute(), which doesn't have a matching
- * megaflow.
+ * a) dpif-netdev unwildcards the l4 type when a new flow is installed
+ * if the actions contains ct().
*
- * b) We define an alternative OVS_ACTION_ATTR_CT, used only by the
- * userspace datapath, which includes l3 (and l4) type. The
- * alternative action could be generated by ofproto-dpif specifically
- * for the userspace datapath. Having a different interface for
- * userspace and kernel doesn't seem very clean, though.
+ * b) ofproto-dpif-xlate unwildcards the l4 type when translating a ct()
+ * action. This is already done in different actions, but it's
+ * unnecessary for the kernel.
*
* ---
- * [1] A simple benchmark (running only the connection tracker
- * over and over on the same packets) shows that if the
- * l3 type is already provided we are 15% faster (running the
- * connection tracker over a couple of DPDK devices with a
- * stream of UDP 64-bytes packets shows that we are 4% faster).
- *
- * [2] The reasons for this are that keeping the flow increases
+ * [1] The reasons for this are that keeping the flow increases
* (slightly) the cache footprint and increases computation
* time as we move the packet around. Most importantly, the flow
* should be updated by the actions and this can be slow, as
* we use a sparse representation (miniflow).
*
*/
- ctx->key.dl_type = parse_dl_type(l2, (char *) l3 - (char *) l2);
+ ctx->key.dl_type = dl_type;
if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
ok = extract_l3_ipv4(&ctx->key, l3, tail - (char *) l3, NULL, true);
} else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
diff --git a/lib/conntrack.h b/lib/conntrack.h
index d1f58ba..f15880b 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -65,7 +65,8 @@ void conntrack_init(struct conntrack *);
void conntrack_destroy(struct conntrack *);
int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
- bool commit, uint16_t zone, const uint32_t *setmark,
+ ovs_be16 dl_type, bool commit,
+ uint16_t zone, const uint32_t *setmark,
const struct ovs_key_ct_labels *setlabel,
const char *helper);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3165bd3..e8dc8a7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -506,7 +506,7 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name,
bool create, struct dpif **);
static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *,
- bool may_steal,
+ bool may_steal, const struct flow *flow,
const struct nlattr *actions,
size_t actions_len);
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
@@ -2477,8 +2477,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
}
packet_batch_init_packet(&pp, execute->packet);
- dp_netdev_execute_actions(pmd, &pp, false, execute->actions,
- execute->actions_len);
+ dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
+ execute->actions, execute->actions_len);
if (pmd->core_id == NON_PMD_CORE_ID) {
ovs_mutex_unlock(&dp->non_pmd_mutex);
@@ -3662,7 +3662,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
actions = dp_netdev_flow_get_actions(flow);
- dp_netdev_execute_actions(pmd, &batch->array, true,
+ dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
actions->actions, actions->size);
}
@@ -3789,7 +3789,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet,
* the actions. Otherwise, if there are any slow path actions,
* we'll send the packet up twice. */
packet_batch_init_packet(&b, packet);
- dp_netdev_execute_actions(pmd, &b, true,
+ dp_netdev_execute_actions(pmd, &b, true, &match.flow,
actions->data, actions->size);
add_actions = put_actions->size ? put_actions : actions;
@@ -3959,6 +3959,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_execute_aux {
struct dp_netdev_pmd_thread *pmd;
+ const struct flow *flow;
};
static void
@@ -4028,7 +4029,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
NULL);
if (!error || error == ENOSPC) {
packet_batch_init_packet(&b, packet);
- dp_netdev_execute_actions(pmd, &b, may_steal,
+ dp_netdev_execute_actions(pmd, &b, may_steal, flow,
actions->data, actions->size);
} else if (may_steal) {
dp_packet_delete(packet);
@@ -4195,8 +4196,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
}
}
- conntrack_execute(&dp->conntrack, packets_, commit, zone,
- setmark, setlabel, helper);
+ conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit,
+ zone, setmark, setlabel, helper);
break;
}
@@ -4219,10 +4220,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
static void
dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
- bool may_steal,
+ bool may_steal, const struct flow *flow,
const struct nlattr *actions, size_t actions_len)
{
- struct dp_netdev_execute_aux aux = { pmd };
+ struct dp_netdev_execute_aux aux = {pmd, flow};
odp_execute_actions(&aux, packets, may_steal, actions,
actions_len, dp_execute_cb);
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index 84ee676..de0106f 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -30,7 +30,7 @@ static const char payload[] = "50540000000a50540000000908004500001c0000000000"
"11a4cd0a0101010a0101020001000200080000";
static struct dp_packet_batch *
-prepare_packets(size_t n, bool change, unsigned tid)
+prepare_packets(size_t n, bool change, unsigned tid, ovs_be16 *dl_type)
{
struct dp_packet_batch *pkt_batch = xzalloc(sizeof *pkt_batch);
struct flow flow;
@@ -56,8 +56,10 @@ prepare_packets(size_t n, bool change, unsigned tid)
}
pkt_batch->packets[i] = pkt;
+ *dl_type = flow.dl_type;
}
+
return pkt_batch;
}
@@ -83,12 +85,13 @@ ct_thread_main(void *aux_)
{
struct thread_aux *aux = aux_;
struct dp_packet_batch *pkt_batch;
+ ovs_be16 dl_type;
size_t i;
- pkt_batch = prepare_packets(batch_size, change_conn, aux->tid);
+ pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
pthread_barrier_wait(&barrier);
for (i = 0; i < n_pkts; i += batch_size) {
- conntrack_execute(&ct, pkt_batch, true, 0, NULL, NULL, NULL);
+ conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL);
}
pthread_barrier_wait(&barrier);
destroy_packets(pkt_batch);
@@ -148,6 +151,44 @@ test_benchmark(struct ovs_cmdl_context *ctx)
}
static void
+pcap_batch_execute_conntrack(struct conntrack *ct,
+ struct dp_packet_batch *pkt_batch)
+{
+ size_t i;
+ struct dp_packet_batch new_batch;
+ ovs_be16 dl_type = htons(0);
+
+ dp_packet_batch_init(&new_batch);
+
+ /* pkt_batch contains packets with different 'dl_type'. We have to
+ * call conntrack_execute() on packets with the same 'dl_type'. */
+
+ for (i = 0; i < pkt_batch->count; i++) {
+ struct dp_packet *pkt = pkt_batch->packets[i];
+ struct flow flow;
+
+ /* This also initializes the l3 and l4 pointers. */
+ flow_extract(pkt, &flow);
+
+ if (!new_batch.count) {
+ dl_type = flow.dl_type;
+ }
+
+ if (flow.dl_type != dl_type) {
+ conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
+ NULL);
+ dp_packet_batch_init(&new_batch);
+ }
+ new_batch.packets[new_batch.count++] = pkt;
+ }
+
+ if (new_batch.count) {
+ conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL);
+ }
+
+}
+
+static void
test_pcap(struct ovs_cmdl_context *ctx)
{
size_t total_count, i, batch_size;
@@ -179,13 +220,10 @@ test_pcap(struct ovs_cmdl_context *ctx)
dp_packet_batch_init(&pkt_batch);
for (i = 0; i < batch_size; i++) {
- struct flow dummy_flow;
-
err = ovs_pcap_read(pcap, &pkt_batch.packets[i], NULL);
if (err) {
break;
}
- flow_extract(pkt_batch.packets[i], &dummy_flow);
}
pkt_batch.count = i;
@@ -193,7 +231,7 @@ test_pcap(struct ovs_cmdl_context *ctx)
break;
}
- conntrack_execute(&ct, &pkt_batch, true, 0, NULL, NULL, NULL);
+ pcap_batch_execute_conntrack(&ct, &pkt_batch);
for (i = 0; i < pkt_batch.count; i++) {
struct ds ds = DS_EMPTY_INITIALIZER;
--
2.8.1
More information about the dev
mailing list