[ovs-dev] [megaflow v2 2/3] datapath: Mega flow implementation

Andy Zhou azhou at nicira.com
Fri Jun 7 20:35:21 UTC 2013


Add mega flow support in kernel datapath.

Pravin has made significant contributions to this patch. Including
the mega flow id look up scheme, API clean ups, and bug fixes.

Co-authored-by: Pravin B Shelar <pshelar at nicira.com>
Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
Signed-off-by: Andy Zhou <azhou at nicira.com>

---
v1->v2
     No change
---
 datapath/datapath.c |  116 ++++--
 datapath/datapath.h |    1 +
 datapath/flow.c     | 1093 ++++++++++++++++++++++++++++++++++-----------------
 datapath/flow.h     |   97 ++++-
 4 files changed, 910 insertions(+), 397 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 42af315..98d78a8 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -246,7 +246,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	}
 
 	/* Look up flow. */
-	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+	flow = ovs_flow_lookup(rcu_dereference(dp->table), &key, key_len);
 	if (unlikely(!flow)) {
 		struct dp_upcall_info upcall;
 
@@ -372,13 +372,13 @@ static size_t key_attr_size(void)
 {
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
-		  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		+ nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
+		+ nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+		+ nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+		+ nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
+		+ nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
+		+ nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+		+ nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
@@ -443,7 +443,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 	upcall->dp_ifindex = dp_ifindex;
 
 	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+	ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
 	nla_nest_end(user_skb, nla);
 
 	if (upcall_info->userdata)
@@ -619,10 +619,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
 static int validate_and_copy_set_tun(const struct nlattr *attr,
 				     struct sw_flow_actions **sfa)
 {
-	struct ovs_key_ipv4_tunnel tun_key;
+	struct sw_flow_match match;
+	struct sw_flow_key key;
 	int err, start;
 
-	err = ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
+	ovs_match_init(&match, &key, NULL);
+	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
 	if (err)
 		return err;
 
@@ -630,7 +632,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	if (start < 0)
 		return start;
 
-	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
+	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+                        sizeof(match.key->tun_key));
 	add_nested_action_end(*sfa, start);
 
 	return err;
@@ -1061,7 +1064,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 		if (!start)
 			return -EMSGSIZE;
 
-		err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
+		err = ipv4_tun_to_nlattr(skb,
+				nla_data(ovs_key), nla_data(ovs_key));
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
@@ -1138,14 +1142,27 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 
 	ovs_header->dp_ifindex = get_dpifindex(dp);
 
+	/* Fill flow key. */
 	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 	if (!nla)
 		goto nla_put_failure;
-	err = ovs_flow_to_nlattrs(&flow->key, skb);
+
+	err = ovs_flow_to_nlattrs(&flow->user_id, &flow->user_id, skb);
 	if (err)
 		goto error;
 	nla_nest_end(skb, nla);
 
+    nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+    if (!nla)
+        goto nla_put_failure;
+
+    err = ovs_flow_to_nlattrs(&flow->key,
+                              &flow->mfm->key, skb);
+    if (err)
+        goto error;
+
+    nla_nest_end(skb, nla);
+
 	spin_lock_bh(&flow->lock);
 	used = flow->used;
 	stats.n_packets = flow->packet_count;
@@ -1229,11 +1246,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
+	struct sw_flow_mask *mfm;
+	struct sw_flow_mask *existing_mfm;
 	struct sk_buff *reply;
 	struct datapath *dp;
 	struct flow_table *table;
 	struct sw_flow_actions *acts = NULL;
+	struct sw_flow_match match;
 	int error;
 	int key_len;
 
@@ -1241,23 +1261,33 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	error = -EINVAL;
 	if (!a[OVS_FLOW_ATTR_KEY])
 		goto error;
-	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
-	if (error)
-		goto error;
+
+	mfm = sw_flow_mask_alloc();
+	if (mfm == NULL)
+		return -ENOMEM;
+
+	ovs_match_init(&match, &key, mfm);
+	error = ovs_match_from_nlattrs(&match,
+			a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	if (error) {
+		goto err_kfree;
+	}
+
+	key_len = match.key_desc.end;
 
 	/* Validate actions. */
 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
 		acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
 		error = PTR_ERR(acts);
 		if (IS_ERR(acts))
-			goto error;
+			goto err_kfree;
 
 		error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
 		if (error)
 			goto err_kfree;
 	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
 		error = -EINVAL;
-		goto error;
+		goto err_kfree;
 	}
 
 	ovs_lock();
@@ -1267,7 +1297,23 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 
 	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+
+	/* Deduplicate the mega flow mask. Note ovs_mutex is held. */
+	existing_mfm = sw_flow_mask_find(mfm);
+	if (existing_mfm) {
+		flow = ovs_flow_tbl_lookup(table, &key, existing_mfm, key_len);
+		if (flow) {
+			if (!ovs_flow_cmp_id(flow, &key, key_len)) {
+				error = -EINVAL;
+				goto err_unlock_ovs;
+			}
+		}
+
+		sw_flow_mask_add_ref(existing_mfm);
+		sw_flow_mask_del_ref(mfm);
+		mfm = existing_mfm;
+	}
+
 	if (!flow) {
 		/* Bail out if we're not allowed to create a new flow. */
 		error = -ENOENT;
@@ -1294,14 +1340,17 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		}
 		clear_stats(flow);
 
+		rcu_assign_pointer(flow->mfm, mfm);
 		rcu_assign_pointer(flow->sf_acts, acts);
 
 		/* Put flow in bucket. */
 		ovs_flow_tbl_insert(table, flow, &key, key_len);
 
+		if (existing_mfm == NULL)
+			sw_flow_mask_insert(mfm);
+
 		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
-						info->snd_seq,
-						OVS_FLOW_CMD_NEW);
+						info->snd_seq, OVS_FLOW_CMD_NEW);
 	} else {
 		/* We found a matching flow. */
 		struct sw_flow_actions *old_acts;
@@ -1345,6 +1394,7 @@ err_unlock_ovs:
 	ovs_unlock();
 err_kfree:
 	kfree(acts);
+	sw_flow_mask_del_ref(mfm);
 error:
 	return error;
 }
@@ -1358,15 +1408,20 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct flow_table *table;
+	struct sw_flow_match match;
 	int err;
 	int key_len;
 
 	if (!a[OVS_FLOW_ATTR_KEY])
 		return -EINVAL;
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+	ovs_match_init(&match, &key, NULL);
+	err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
 	if (err)
 		return err;
 
+	key_len = match.key_desc.end;
+
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	if (!dp) {
@@ -1375,7 +1430,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup_id(table, &key, key_len);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
@@ -1404,6 +1459,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct flow_table *table;
+	struct sw_flow_match match;
 	int err;
 	int key_len;
 
@@ -1418,12 +1474,16 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		err = flush_flows(dp);
 		goto unlock;
 	}
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+	ovs_match_init(&match, &key, NULL);
+	err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
 	if (err)
 		goto unlock;
 
+	key_len = match.key_desc.end;
+
 	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup_id(table, &key, key_len);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index ad59a3a..cc9c034 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -202,4 +202,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq,
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
 void ovs_dp_notify_wq(struct work_struct *work);
+
 #endif /* datapath.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index 7f897bd..fc8fb86 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -46,6 +46,156 @@
 #include "vlan.h"
 
 static struct kmem_cache *flow_cache;
+static LIST_HEAD(mask_list);
+
+static void update_desc__(struct sw_flow_match *match,
+			  size_t offset, size_t size, bool is_mask)
+{
+	struct sw_flow_desc *desc = NULL;
+	size_t start= offset;
+	size_t end= offset + size;
+
+	if (!is_mask)
+		desc = &match->key_desc;
+	else if (match->mfm)
+		desc = &match->mfm->desc;
+
+	if (desc == NULL)
+		return;
+
+	if (desc->start == desc->end) {
+		desc->start = start;
+		desc->end = end;
+		return;
+	}
+
+	if (desc->start > start)
+		desc->start = start;
+
+	if (desc->end < end)
+		desc->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+	do { \
+		update_desc__(match, offsetof(struct sw_flow_key, field),   \
+				     sizeof (match)->key->field, is_mask ); \
+		if (is_mask && match->mfm != NULL) {                        \
+			(match)->mfm->key.field = value;		    \
+		} else {                                                    \
+			(match)->key->field = value;		            \
+		}                                                           \
+	} while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+	do { \
+		update_desc__(match, offsetof(struct sw_flow_key, field),   \
+				len, is_mask);                              \
+		if (is_mask && match->mfm != NULL) {                        \
+			memcpy(&(match)->mfm->key.field, value_p, len);     \
+		} else {                                                    \
+			memcpy(&(match)->key->field, value_p, len);         \
+		}                                                           \
+	} while (0)
+
+void ovs_match_init(struct sw_flow_match *match,
+		    struct sw_flow_key *key,
+		    struct sw_flow_mask *mfm)
+{
+	memset(match, 0, sizeof *match);
+	match->key = key;
+	match->mfm = mfm;
+
+	memset(key, 0, sizeof *key);
+
+	if (mfm) {
+		memset(&mfm->key, 0, sizeof mfm->key);
+		mfm->desc.start = mfm->desc.end = 0;
+	}
+}
+
+bool ovs_match_validate(const struct sw_flow_match *match,
+		u64 key_attrs, u64 mask_attrs)
+{
+	u64 expected = 0;
+
+	/* Mask attributes should be a subset of key attributes. */
+	if ((key_attrs | mask_attrs) != key_attrs)
+		return false;
+
+	/* Check key attributes. */
+	if (match->key->eth.type == htons(ETH_P_ARP))
+		expected |= 1ULL << OVS_KEY_ATTR_ARP;
+
+	if (match->key->eth.type == htons(ETH_P_RARP))
+		expected |= 1ULL << OVS_KEY_ATTR_ARP;
+
+	if (match->key->eth.type == htons(ETH_P_IP)) {
+		expected |= 1ULL << OVS_KEY_ATTR_IPV4;
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP)
+				expected |= 1ULL << OVS_KEY_ATTR_UDP;
+
+			if (match->key->ip.proto == IPPROTO_TCP)
+				expected |= 1ULL << OVS_KEY_ATTR_TCP;
+
+			if (match->key->ip.proto == IPPROTO_ICMP)
+				expected |= 1ULL << OVS_KEY_ATTR_ICMP;
+		}
+	}
+
+	if (match->key->eth.type == htons(ETH_P_IPV6)) {
+		expected |= 1ULL << OVS_KEY_ATTR_IPV6;
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP)
+				expected |= 1ULL << OVS_KEY_ATTR_UDP;
+
+			if (match->key->ip.proto == IPPROTO_TCP)
+				expected |= 1ULL << OVS_KEY_ATTR_TCP;
+
+			if (match->key->ip.proto == IPPROTO_ICMPV6) {
+				expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
+
+				if (match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION))
+					expected |= 1ULL << OVS_KEY_ATTR_ND;
+
+				if (match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+					expected |= 1ULL << OVS_KEY_ATTR_ND;
+			}
+		}
+	}
+
+	if ((key_attrs & expected) != expected)
+		/* Key attributes check failed. */
+		return false;
+
+	/* Check mask attributes. */
+	if ((mask_attrs & 1ULL << OVS_KEY_ATTR_IPV4)
+		|| (mask_attrs & 1ULL << OVS_KEY_ATTR_IPV6)
+		|| (mask_attrs & 1ULL << OVS_KEY_ATTR_ARP)) {
+
+		if (match->mfm->key.eth.type != 0xffff)
+			return false;
+	}
+
+	if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ICMP)
+		|| (mask_attrs & 1ULL << OVS_KEY_ATTR_ICMPV6)
+		|| (mask_attrs & 1ULL << OVS_KEY_ATTR_UDP)
+		|| (mask_attrs & 1ULL << OVS_KEY_ATTR_TCP)) {
+
+		/* IP header can not be wildcarded. */
+		if (((mask_attrs & 1ULL << OVS_KEY_ATTR_IPV4)
+			+ (mask_attrs & 1ULL << OVS_KEY_ATTR_IPV6)) == 0)
+			return false;
+
+		if (match->mfm->key.ip.proto != 0xff)
+			return false;
+	}
+
+	return true;
+}
 
 static int check_header(struct sk_buff *skb, int len)
 {
@@ -177,6 +327,22 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
 				  sizeof(struct icmp6hdr));
 }
 
+static void flow_key_mask(struct sw_flow_key *dst,
+			  const struct sw_flow_key *src,
+			  const struct sw_flow_mask *mfm)
+{
+	u8 *m = (u8 *)&mfm->key + mfm->desc.start;
+	u8 *s = (u8 *)src + mfm->desc.start;
+	u8 *d = (u8 *)dst + mfm->desc.start;
+	int i;
+
+	memset(dst, 0, sizeof(*dst));
+	for (i = 0; i < sw_flow_mask_roundup_size(mfm); i++) {
+		*d = *s & *m;
+		d++, s++, m++;
+	}
+}
+
 #define TCP_FLAGS_OFFSET 13
 #define TCP_FLAG_MASK 0x3f
 
@@ -225,6 +391,7 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	spin_lock_init(&flow->lock);
 	flow->sf_acts = NULL;
+	flow->mfm = NULL;
 
 	return flow;
 }
@@ -236,6 +403,12 @@ static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
 				(hash & (table->n_buckets - 1)));
 }
 
+static struct hlist_head *find_bucket_user_id(struct flow_table *table, u32 hash)
+{
+	return flex_array_get(table->buckets_ids,
+				(hash & (table->n_buckets - 1)));
+}
+
 static struct flex_array *alloc_buckets(unsigned int n_buckets)
 {
 	struct flex_array *buckets;
@@ -277,6 +450,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
 		kfree(table);
 		return NULL;
 	}
+
+	table->buckets_ids = alloc_buckets(new_size);
+
+	if (!table->buckets_ids) {
+		free_buckets(table->buckets);
+		kfree(table);
+		return NULL;
+	}
+
 	table->n_buckets = new_size;
 	table->count = 0;
 	table->node_ver = 0;
@@ -304,12 +486,14 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 
 		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
 			hlist_del_rcu(&flow->hash_node[ver]);
+			hlist_del_rcu(&flow->hash_node_id[ver]);
 			ovs_flow_free(flow);
 		}
 	}
 
 skip_flows:
 	free_buckets(table->buckets);
+	free_buckets(table->buckets_ids);
 	kfree(table);
 }
 
@@ -338,8 +522,8 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 	ver = table->node_ver;
 	while (*bucket < table->n_buckets) {
 		i = 0;
-		head = flex_array_get(table->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		head = flex_array_get(table->buckets_ids, *bucket);
+		hlist_for_each_entry_rcu(flow, head, hash_node_id[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -354,11 +538,16 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 	return NULL;
 }
 
-static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
 {
 	struct hlist_head *head;
+
 	head = find_bucket(table, flow->hash);
 	hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+
+	head = find_bucket_user_id(table, flow->id_hash);
+	hlist_add_head_rcu(&flow->hash_node_id[table->node_ver], head);
+
 	table->count++;
 }
 
@@ -378,7 +567,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
 		head = flex_array_get(old->buckets, i);
 
 		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			__flow_tbl_insert(new, flow);
+			__tbl_insert(new, flow);
 	}
 	old->keep_flows = true;
 }
@@ -411,6 +600,7 @@ void ovs_flow_free(struct sw_flow *flow)
 	if (unlikely(!flow))
 		return;
 
+	sw_flow_mask_del_ref(flow->mfm);
 	kfree((struct sf_flow_acts __force *)flow->sf_acts);
 	kmem_cache_free(flow_cache, flow);
 }
@@ -783,7 +973,7 @@ static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_l
 		      DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
 }
 
-static int flow_key_start(struct sw_flow_key *key)
+static int flow_key_start(const struct sw_flow_key *key)
 {
 	if (key->tun_key.ipv4_dst)
 		return 0;
@@ -791,8 +981,39 @@ static int flow_key_start(struct sw_flow_key *key)
 		return offsetof(struct sw_flow_key, phy);
 }
 
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				struct sw_flow_key *key, int key_len)
+
+static bool __flow_cmp_key(struct sw_flow *flow, u32 hash, u8 *key,
+			   int key_start, int key_len)
+{
+	return (flow->id_hash == hash &&
+		!memcmp((u8 *)&flow->user_id + key_start, key, (key_len - key_start)));
+}
+
+static bool __flow_cmp_key_mask(struct sw_flow *flow, u32 hash, u8 *key,
+				int key_start, int key_len,
+				struct sw_flow_mask *mfm)
+{
+	return (flow->hash == hash && (flow->mfm == mfm) &&
+		!memcmp((u8 *)&flow->key + key_start, key, (key_len - key_start)));
+}
+
+bool ovs_flow_cmp_id(struct sw_flow *flow,
+		  struct sw_flow_key *key, int key_len)
+{
+	int key_start;
+	u32 hash;
+	u8 *_key;
+
+	key_start = flow_key_start(key);
+	hash = ovs_flow_hash(key, key_start, key_len);
+	_key = (u8 *) key + key_start;
+
+	return __flow_cmp_key(flow, hash, _key, key_start, key_len);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_id(struct flow_table *table,
+				       struct sw_flow_key *key, int key_len)
+
 {
 	struct sw_flow *flow;
 	struct hlist_head *head;
@@ -804,29 +1025,62 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
 	hash = ovs_flow_hash(key, key_start, key_len);
 
 	_key = (u8 *) key + key_start;
+	head = find_bucket_user_id(table, hash);
+	hlist_for_each_entry_rcu(flow, head, hash_node_id[table->node_ver]) {
+		if (__flow_cmp_key(flow, hash, _key, key_start, key_len))
+			return flow;
+	}
+	return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+				    const struct sw_flow_key *flow_key,
+				    struct sw_flow_mask *mfm,
+				    int key_len)
+{
+	const struct sw_flow_key *key;
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u8 *_key;
+	int key_start = mfm->desc.start;
+	u32 hash;
+	struct sw_flow_key masked_key;
+	int end_roundup = mfm->desc.start
+			+ sw_flow_mask_roundup_size(mfm);
+
+	if (end_roundup < key_len)
+		key_len = end_roundup;
+
+	flow_key_mask(&masked_key, flow_key, mfm);
+	key = &masked_key;
+	hash = ovs_flow_hash(key, key_start, key_len);
+	_key = (u8 *) key + key_start;
 	head = find_bucket(table, hash);
 	hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-
-		if (flow->hash == hash &&
-		    !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
+		if (__flow_cmp_key_mask(flow, hash, _key, key_start, key_len, mfm))
 			return flow;
-		}
+
 	}
 	return NULL;
 }
 
+
 void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-			 struct sw_flow_key *key, int key_len)
+			 const struct sw_flow_key *key, int key_len)
 {
-	flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
-	memcpy(&flow->key, key, sizeof(flow->key));
-	__flow_tbl_insert(table, flow);
+	flow->user_id = *key;
+	flow->id_hash = ovs_flow_hash(key, flow_key_start(key), key_len);
+	flow_key_mask(&flow->key, &flow->user_id, flow->mfm);
+	flow->hash = ovs_flow_hash(&flow->key, flow->mfm->desc.start,
+			flow->mfm->desc.end);
+	__tbl_insert(table, flow);
 }
 
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	BUG_ON(table->count == 0);
 	hlist_del_rcu(&flow->hash_node[table->node_ver]);
+	hlist_del_rcu(&flow->hash_node_id[table->node_ver]);
 	table->count--;
 }
 
@@ -850,112 +1104,6 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_TUNNEL] = -1,
 };
 
-static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
-				  const struct nlattr *a[], u64 *attrs)
-{
-	const struct ovs_key_icmp *icmp_key;
-	const struct ovs_key_tcp *tcp_key;
-	const struct ovs_key_udp *udp_key;
-
-	switch (swkey->ip.proto) {
-	case IPPROTO_TCP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-		swkey->ipv4.tp.src = tcp_key->tcp_src;
-		swkey->ipv4.tp.dst = tcp_key->tcp_dst;
-		break;
-
-	case IPPROTO_UDP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-		swkey->ipv4.tp.src = udp_key->udp_src;
-		swkey->ipv4.tp.dst = udp_key->udp_dst;
-		break;
-
-	case IPPROTO_ICMP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
-		swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
-		swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
-		break;
-	}
-
-	return 0;
-}
-
-static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
-				  const struct nlattr *a[], u64 *attrs)
-{
-	const struct ovs_key_icmpv6 *icmpv6_key;
-	const struct ovs_key_tcp *tcp_key;
-	const struct ovs_key_udp *udp_key;
-
-	switch (swkey->ip.proto) {
-	case IPPROTO_TCP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-		swkey->ipv6.tp.src = tcp_key->tcp_src;
-		swkey->ipv6.tp.dst = tcp_key->tcp_dst;
-		break;
-
-	case IPPROTO_UDP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-		swkey->ipv6.tp.src = udp_key->udp_src;
-		swkey->ipv6.tp.dst = udp_key->udp_dst;
-		break;
-
-	case IPPROTO_ICMPV6:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
-		swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
-		swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
-
-		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
-			const struct ovs_key_nd *nd_key;
-
-			if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
-				return -EINVAL;
-			*attrs &= ~(1 << OVS_KEY_ATTR_ND);
-
-			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
-			nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
-			memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
-			       sizeof(swkey->ipv6.nd.target));
-			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
-			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
-		}
-		break;
-	}
-
-	return 0;
-}
-
 static int parse_flow_nlattrs(const struct nlattr *attr,
 			      const struct nlattr *a[], u64 *attrsp)
 {
@@ -963,7 +1111,7 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
 	u64 attrs;
 	int rem;
 
-	attrs = 0;
+	attrs = *attrsp;
 	nla_for_each_nested(nla, attr, rem) {
 		u16 type = nla_type(nla);
 		int expected_len;
@@ -986,13 +1134,12 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
 }
 
 int ipv4_tun_from_nlattr(const struct nlattr *attr,
-			 struct ovs_key_ipv4_tunnel *tun_key)
+			 struct sw_flow_match *match, bool is_mask)
 {
 	struct nlattr *a;
 	int rem;
 	bool ttl = false;
-
-	memset(tun_key, 0, sizeof(*tun_key));
+	u16 tun_flags = 0;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -1012,37 +1159,48 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 
 		switch (type) {
 		case OVS_TUNNEL_KEY_ATTR_ID:
-			tun_key->tun_id = nla_get_be64(a);
-			tun_key->tun_flags |= OVS_TNL_F_KEY;
+			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+					nla_get_be64(a), is_mask);
+			tun_flags |= OVS_TNL_F_KEY;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
-			tun_key->ipv4_src = nla_get_be32(a);
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+					nla_get_be32(a), is_mask);
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
-			tun_key->ipv4_dst = nla_get_be32(a);
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+					nla_get_be32(a), is_mask);
 			break;
 		case OVS_TUNNEL_KEY_ATTR_TOS:
-			tun_key->ipv4_tos = nla_get_u8(a);
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+					nla_get_u8(a), is_mask);
 			break;
 		case OVS_TUNNEL_KEY_ATTR_TTL:
-			tun_key->ipv4_ttl = nla_get_u8(a);
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+					nla_get_u8(a), is_mask);
 			ttl = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
-			tun_key->tun_flags |= OVS_TNL_F_DONT_FRAGMENT;
+			tun_flags |= OVS_TNL_F_DONT_FRAGMENT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
-			tun_key->tun_flags |= OVS_TNL_F_CSUM;
+			tun_flags |= OVS_TNL_F_CSUM;
 			break;
 		default:
 			return -EINVAL;
 
 		}
 	}
+	if (is_mask) {
+		SW_FLOW_KEY_PUT(match, tun_key.tun_flags, 0xffff, is_mask);
+	}else {
+		SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+	}
+
 	if (rem > 0)
 		return -EINVAL;
 
-	if (!tun_key->ipv4_dst)
+	if (!match->key->tun_key.ipv4_dst)
 		return -EINVAL;
 
 	if (!ttl)
@@ -1052,7 +1210,8 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 }
 
 int ipv4_tun_to_nlattr(struct sk_buff *skb,
-			const struct ovs_key_ipv4_tunnel *tun_key)
+			const struct ovs_key_ipv4_tunnel *tun_key,
+			const struct ovs_key_ipv4_tunnel *output)
 {
 	struct nlattr *nla;
 
@@ -1061,204 +1220,325 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 		return -EMSGSIZE;
 
 	if (tun_key->tun_flags & OVS_TNL_F_KEY &&
-	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
+	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 		return -EMSGSIZE;
 	if (tun_key->ipv4_src &&
-	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src))
+	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
 		return -EMSGSIZE;
-	if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst))
+	if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
 		return -EMSGSIZE;
 	if (tun_key->ipv4_tos &&
-	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos))
-		return -EMSGSIZE;
-	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
-		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
 		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
 		return -EMSGSIZE;
+	if (tun_key == output) {
+		if ((tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT) &&
+			nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+			return -EMSGSIZE;
+		if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
+			nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+			return -EMSGSIZE;
+	}
 
 	nla_nest_end(skb, nla);
 	return 0;
 }
 
-/**
- * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
- * @swkey: receives the extracted flow key.
- * @key_lenp: number of bytes used in @swkey.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- */
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
-		      const struct nlattr *attr)
-{
-	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-	const struct ovs_key_ethernet *eth_key;
-	int key_len;
-	u64 attrs;
-	int err;
-
-	memset(swkey, 0, sizeof(struct sw_flow_key));
-	key_len = SW_FLOW_KEY_OFFSET(eth);
 
-	err = parse_flow_nlattrs(attr, a, &attrs);
-	if (err)
-		return err;
-
-	/* Metadata attributes. */
-	if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
-		swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
-		attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+static int __metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
+		const struct nlattr **a, bool is_mask)
+{
+	if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
+		SW_FLOW_KEY_PUT(match, phy.priority,
+			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
 	}
-	if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+
+	if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
 		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
-		if (in_port >= DP_MAX_PORTS)
+
+		if (!is_mask && in_port >= DP_MAX_PORTS)
 			return -EINVAL;
-		swkey->phy.in_port = in_port;
-		attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
-	} else {
-		swkey->phy.in_port = DP_MAX_PORTS;
+		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
 	}
-	if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+
+	if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
 		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
-		if (mark != 0)
+		if (!is_mask && mark != 0)
 			return -EINVAL;
 #endif
-		swkey->phy.skb_mark = mark;
-		attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
+	}
+	if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
+		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+					is_mask))
+			return -EINVAL;
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
 	}
+	return 0;
+}
 
-	if (attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
-		err = ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
-		if (err)
-			return err;
+static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
+		const struct nlattr **a, bool is_mask)
+{
+	int err;
 
-		attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
-	}
+	err = __metadata_from_nlattrs(match, &attrs, a, is_mask);
+	if (err)
+		return err;
 
-	/* Data attributes. */
-	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
-		return -EINVAL;
-	attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+	if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
+		const struct ovs_key_ethernet *eth_key;
 
-	eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
-	memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
-	memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+		SW_FLOW_KEY_MEMCPY(match, eth.src,
+				eth_key->eth_src, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, eth.dst,
+				eth_key->eth_dst, ETH_ALEN, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
+	}
 
-	if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
-	    nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
-		const struct nlattr *encap;
+	if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
 		__be16 tci;
 
-		if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
-			      (1 << OVS_KEY_ATTR_ETHERTYPE) |
-			      (1 << OVS_KEY_ATTR_ENCAP)))
-			return -EINVAL;
-
-		encap = a[OVS_KEY_ATTR_ENCAP];
 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-		if (tci & htons(VLAN_TAG_PRESENT)) {
-			swkey->eth.tci = tci;
+		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
 
-			err = parse_flow_nlattrs(encap, a, &attrs);
-			if (err)
-				return err;
-		} else if (!tci) {
-			/* Corner case for truncated 802.1Q header. */
-			if (nla_len(encap))
-				return -EINVAL;
-
-			swkey->eth.type = htons(ETH_P_8021Q);
-			*key_lenp = key_len;
-			return 0;
-		} else {
-			return -EINVAL;
-		}
+		attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
 	}
 
-	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
-		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-		if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
+	if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
+		__be16 eth_type;
+
+		eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+		if (!is_mask && ntohs(eth_type) < ETH_P_802_3_MIN)
 			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-	} else {
-		swkey->eth.type = htons(ETH_P_802_2);
+
+		SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
 	}
 
-	if (swkey->eth.type == htons(ETH_P_IP)) {
+	if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
 		const struct ovs_key_ipv4 *ipv4_key;
 
-		if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
-		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
 		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
 		if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
 			return -EINVAL;
-		swkey->ip.proto = ipv4_key->ipv4_proto;
-		swkey->ip.tos = ipv4_key->ipv4_tos;
-		swkey->ip.ttl = ipv4_key->ipv4_ttl;
-		swkey->ip.frag = ipv4_key->ipv4_frag;
-		swkey->ipv4.addr.src = ipv4_key->ipv4_src;
-		swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
-
-		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-			err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
-			if (err)
-				return err;
-		}
-	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-		const struct ovs_key_ipv6 *ipv6_key;
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv4_key->ipv4_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv4_key->ipv4_tos, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv4_key->ipv4_ttl, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv4_key->ipv4_frag, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				ipv4_key->ipv4_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+				ipv4_key->ipv4_dst, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
+	}
 
-		if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+	if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
+		const struct ovs_key_ipv6 *ipv6_key;
 
-		key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
 		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
 		if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
 			return -EINVAL;
-		swkey->ipv6.label = ipv6_key->ipv6_label;
-		swkey->ip.proto = ipv6_key->ipv6_proto;
-		swkey->ip.tos = ipv6_key->ipv6_tclass;
-		swkey->ip.ttl = ipv6_key->ipv6_hlimit;
-		swkey->ip.frag = ipv6_key->ipv6_frag;
-		memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
-		       sizeof(swkey->ipv6.addr.src));
-		memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
-		       sizeof(swkey->ipv6.addr.dst));
-
-		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-			err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+		SW_FLOW_KEY_PUT(match, ipv6.label,
+				ipv6_key->ipv6_label, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv6_key->ipv6_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv6_key->ipv6_tclass, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv6_key->ipv6_hlimit, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv6_key->ipv6_frag, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+				ipv6_key->ipv6_src,
+				sizeof(match->key->ipv6.addr.src),
+				is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+				ipv6_key->ipv6_dst,
+				sizeof(match->key->ipv6.addr.dst),
+				is_mask);
+
+		attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
+		const struct ovs_key_arp *arp_key;
+
+		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+		if (!is_mask && (arp_key->arp_op & htons(0xff00)))
+			return -EINVAL;
+
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				arp_key->arp_sip, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+			arp_key->arp_tip, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ntohs(arp_key->arp_op), is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+				arp_key->arp_sha, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+				arp_key->arp_tha, ETH_ALEN, is_mask);
+
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
+		const struct ovs_key_tcp *tcp_key;
+
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+				tcp_key->tcp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+				tcp_key->tcp_dst, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
+		const struct ovs_key_udp *udp_key;
+
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+				udp_key->udp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+				udp_key->udp_dst, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
+		const struct ovs_key_icmp *icmp_key;
+
+		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+				htons(icmp_key->icmp_type), is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+				htons(icmp_key->icmp_code), is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
+		const struct ovs_key_icmpv6 *icmpv6_key;
+
+		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+		SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+				htons(icmpv6_key->icmpv6_type), is_mask);
+		SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+				htons(icmpv6_key->icmpv6_code), is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
+		const struct ovs_key_nd *nd_key;
+
+		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+			nd_key->nd_target,
+			sizeof(match->key->ipv6.nd.target),
+			is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+			nd_key->nd_sll, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+				nd_key->nd_tll, ETH_ALEN, is_mask);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
+	}
+
+	if (attrs != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+
+/**
+ * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
+ * mask. In case the mask is specified (mask == NULL), the flow is treated
+ * as a Micro flow.  Otherwise,  it is a Mega flow.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this mega (or micro) flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute carries the mega flow masks fields.
+ */
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+			   const struct nlattr *key,
+			   const struct nlattr *mask)
+{
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	const struct nlattr *m[OVS_KEY_ATTR_MAX + 1];
+	const struct nlattr *encap;
+	u64 key_attrs = 0;
+	u64 mask_attrs = 0;
+	bool encap_valid = false;
+	int err;
+
+	err = parse_flow_nlattrs(key, a, &key_attrs);
+	if (err)
+		return err;
+
+	if (key_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
+		encap = a[OVS_KEY_ATTR_ENCAP];
+		key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
+		if (nla_len(encap)) {
+			__be16 tci = 0;
+			__be16 eth_type = 0; /* ETH_P_8021Q */
+
+			if (a[OVS_KEY_ATTR_ETHERTYPE])
+				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+			if (a[OVS_KEY_ATTR_VLAN])
+				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+			if  ( (eth_type == htons(ETH_P_8021Q))
+					&& (tci & htons(VLAN_TAG_PRESENT)) ) {
+				encap_valid = true;
+				err = parse_flow_nlattrs(encap, a, &key_attrs);
+			}
+			else
+				err = -EINVAL;
+
 			if (err)
 				return err;
 		}
-	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
-		   swkey->eth.type == htons(ETH_P_RARP)) {
-		const struct ovs_key_arp *arp_key;
+	}
 
-		if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+	err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+	if (err)
+		return err;
 
-		key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
-		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
-		swkey->ipv4.addr.src = arp_key->arp_sip;
-		swkey->ipv4.addr.dst = arp_key->arp_tip;
-		if (arp_key->arp_op & htons(0xff00))
-			return -EINVAL;
-		swkey->ip.proto = ntohs(arp_key->arp_op);
-		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
-		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+	if (mask) {
+		err = parse_flow_nlattrs(mask, m, &mask_attrs);
+		if (err)
+			return err;
+
+		if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && encap_valid) {
+			mask_attrs &= ~(1ULL <<OVS_KEY_ATTR_ETHERTYPE);
+			encap = m[OVS_KEY_ATTR_ENCAP];
+			err = parse_flow_nlattrs(encap, m, &mask_attrs);
+			if (err)
+				return err;
+		}
+
+		err = ovs_key_from_nlattrs(match,  mask_attrs, m, true);
+		if (err)
+			return err;
+	} else {
+		/* Populate Micro flow's key mask. */
+		if (match->mfm)
+			sw_flow_mask_set(match->mfm, &match->key_desc, 0xff);
 	}
 
-	if (attrs)
+	if (ovs_match_validate(match, key_attrs, mask_attrs) == false)
 		return -EINVAL;
-	*key_lenp = key_len;
 
 	return 0;
 }
@@ -1276,55 +1556,29 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
  * extracted from the packet itself.
  */
 
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr)
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
+				   const struct nlattr *attr)
 {
 	struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
-	const struct nlattr *nla;
-	int rem;
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	u64 attrs = 0;
+	int err;
+	struct sw_flow_match match;
 
 	flow->key.phy.in_port = DP_MAX_PORTS;
 	flow->key.phy.priority = 0;
 	flow->key.phy.skb_mark = 0;
 	memset(tun_key, 0, sizeof(flow->key.tun_key));
 
-	nla_for_each_nested(nla, attr, rem) {
-		int type = nla_type(nla);
-
-		if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
-			int err;
-
-			if (nla_len(nla) != ovs_key_lens[type])
-				return -EINVAL;
-
-			switch (type) {
-			case OVS_KEY_ATTR_PRIORITY:
-				flow->key.phy.priority = nla_get_u32(nla);
-				break;
-
-			case OVS_KEY_ATTR_TUNNEL:
-				err = ipv4_tun_from_nlattr(nla, tun_key);
-				if (err)
-					return err;
-				break;
+	err = parse_flow_nlattrs(attr, a, &attrs);
+	if (err)
+		return -EINVAL;
 
-			case OVS_KEY_ATTR_IN_PORT:
-				if (nla_get_u32(nla) >= DP_MAX_PORTS)
-					return -EINVAL;
-				flow->key.phy.in_port = nla_get_u32(nla);
-				break;
+	ovs_match_init(&match, &flow->key, NULL);
 
-			case OVS_KEY_ATTR_SKB_MARK:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
-				if (nla_get_u32(nla) != 0)
-					return -EINVAL;
-#endif
-				flow->key.phy.skb_mark = nla_get_u32(nla);
-				break;
-			}
-		}
-	}
-	if (rem)
-		return -EINVAL;
+	err = __metadata_from_nlattrs(&match, &attrs, a, false);
+	if (err)
+		return err;
 
 	flow->hash = ovs_flow_hash(&flow->key,
 				   flow_key_start(&flow->key), key_len);
@@ -1332,37 +1586,40 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru
 	return 0;
 }
 
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
+		const struct sw_flow_key *output, struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
 	struct nlattr *nla, *encap;
 
 	if (swkey->phy.priority &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
+	    nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 		goto nla_put_failure;
 
 	if (swkey->tun_key.ipv4_dst &&
-	    ipv4_tun_to_nlattr(skb, &swkey->tun_key))
+	    ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
 		goto nla_put_failure;
 
 	if (swkey->phy.in_port != DP_MAX_PORTS &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
+	    nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, output->phy.in_port))
 		goto nla_put_failure;
 
 	if (swkey->phy.skb_mark &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
+	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
 		goto nla_put_failure;
 
 	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 	if (!nla)
 		goto nla_put_failure;
 	eth_key = nla_data(nla);
-	memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
-	memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+	memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
 
 	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
-		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) ||
-		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci))
+		__be16 eth_type;
+		eth_type = (swkey == output) ? htons(ETH_P_8021Q) : 0 ;
+		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
 			goto nla_put_failure;
 		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 		if (!swkey->eth.tci)
@@ -1374,7 +1631,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	if (swkey->eth.type == htons(ETH_P_802_2))
 		goto unencap;
 
-	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
+	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
 		goto nla_put_failure;
 
 	if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1384,12 +1641,12 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		if (!nla)
 			goto nla_put_failure;
 		ipv4_key = nla_data(nla);
-		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
-		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
-		ipv4_key->ipv4_proto = swkey->ip.proto;
-		ipv4_key->ipv4_tos = swkey->ip.tos;
-		ipv4_key->ipv4_ttl = swkey->ip.ttl;
-		ipv4_key->ipv4_frag = swkey->ip.frag;
+		ipv4_key->ipv4_src = output->ipv4.addr.src;
+		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = output->ip.proto;
+		ipv4_key->ipv4_tos = output->ip.tos;
+		ipv4_key->ipv4_ttl = output->ip.ttl;
+		ipv4_key->ipv4_frag = output->ip.frag;
 	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
 		struct ovs_key_ipv6 *ipv6_key;
 
@@ -1397,15 +1654,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		if (!nla)
 			goto nla_put_failure;
 		ipv6_key = nla_data(nla);
-		memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
 				sizeof(ipv6_key->ipv6_src));
-		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
 				sizeof(ipv6_key->ipv6_dst));
-		ipv6_key->ipv6_label = swkey->ipv6.label;
-		ipv6_key->ipv6_proto = swkey->ip.proto;
-		ipv6_key->ipv6_tclass = swkey->ip.tos;
-		ipv6_key->ipv6_hlimit = swkey->ip.ttl;
-		ipv6_key->ipv6_frag = swkey->ip.frag;
+		ipv6_key->ipv6_label = output->ipv6.label;
+		ipv6_key->ipv6_proto = output->ip.proto;
+		ipv6_key->ipv6_tclass = output->ip.tos;
+		ipv6_key->ipv6_hlimit = output->ip.ttl;
+		ipv6_key->ipv6_frag = output->ip.frag;
 	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
 		   swkey->eth.type == htons(ETH_P_RARP)) {
 		struct ovs_key_arp *arp_key;
@@ -1415,11 +1672,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			goto nla_put_failure;
 		arp_key = nla_data(nla);
 		memset(arp_key, 0, sizeof(struct ovs_key_arp));
-		arp_key->arp_sip = swkey->ipv4.addr.src;
-		arp_key->arp_tip = swkey->ipv4.addr.dst;
-		arp_key->arp_op = htons(swkey->ip.proto);
-		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
-		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+		arp_key->arp_sip = output->ipv4.addr.src;
+		arp_key->arp_tip = output->ipv4.addr.dst;
+		arp_key->arp_op = htons(output->ip.proto);
+		memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
+		memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1434,11 +1691,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				goto nla_put_failure;
 			tcp_key = nla_data(nla);
 			if (swkey->eth.type == htons(ETH_P_IP)) {
-				tcp_key->tcp_src = swkey->ipv4.tp.src;
-				tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+				tcp_key->tcp_src = output->ipv4.tp.src;
+				tcp_key->tcp_dst = output->ipv4.tp.dst;
 			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				tcp_key->tcp_src = swkey->ipv6.tp.src;
-				tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+				tcp_key->tcp_src = output->ipv6.tp.src;
+				tcp_key->tcp_dst = output->ipv6.tp.dst;
 			}
 		} else if (swkey->ip.proto == IPPROTO_UDP) {
 			struct ovs_key_udp *udp_key;
@@ -1448,11 +1705,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				goto nla_put_failure;
 			udp_key = nla_data(nla);
 			if (swkey->eth.type == htons(ETH_P_IP)) {
-				udp_key->udp_src = swkey->ipv4.tp.src;
-				udp_key->udp_dst = swkey->ipv4.tp.dst;
+				udp_key->udp_src = output->ipv4.tp.src;
+				udp_key->udp_dst = output->ipv4.tp.dst;
 			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				udp_key->udp_src = swkey->ipv6.tp.src;
-				udp_key->udp_dst = swkey->ipv6.tp.dst;
+				udp_key->udp_src = output->ipv6.tp.src;
+				udp_key->udp_dst = output->ipv6.tp.dst;
 			}
 		} else if (swkey->eth.type == htons(ETH_P_IP) &&
 			   swkey->ip.proto == IPPROTO_ICMP) {
@@ -1462,8 +1719,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			if (!nla)
 				goto nla_put_failure;
 			icmp_key = nla_data(nla);
-			icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
-			icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+			icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
+			icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
 		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
 			   swkey->ip.proto == IPPROTO_ICMPV6) {
 			struct ovs_key_icmpv6 *icmpv6_key;
@@ -1473,8 +1730,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			if (!nla)
 				goto nla_put_failure;
 			icmpv6_key = nla_data(nla);
-			icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
-			icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+			icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
+			icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
 
 			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
 			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1484,10 +1741,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				if (!nla)
 					goto nla_put_failure;
 				nd_key = nla_data(nla);
-				memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
 							sizeof(nd_key->nd_target));
-				memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
-				memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+				memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
+				memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
 			}
 		}
 	}
@@ -1519,3 +1776,115 @@ void ovs_flow_exit(void)
 {
 	kmem_cache_destroy(flow_cache);
 }
+
+struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
+				const struct sw_flow_key *key, int key_len)
+{
+	struct sw_flow *flow = NULL;
+	struct sw_flow_mask *mfm;
+
+	list_for_each_entry_rcu(mfm, &mask_list, list) {
+		/* Shorter key will not match, skip it. */
+		if (key_len < mfm->desc.end)
+			continue;
+
+		flow = ovs_flow_tbl_lookup(tbl, key, mfm, mfm->desc.end);
+		if (flow)  /* Found */
+			break;
+	}
+
+	return flow;
+}
+
+struct sw_flow_mask *sw_flow_mask_alloc(void)
+{
+	struct sw_flow_mask *mfm;
+
+	mfm = kmalloc(sizeof *mfm, GFP_KERNEL);
+	if (mfm) {
+		kref_init(&mfm->kref);
+		INIT_LIST_HEAD(&mfm->list);
+	}
+
+	return mfm;
+}
+
+void sw_flow_mask_add_ref(struct sw_flow_mask *mfm)
+{
+	kref_get(&mfm->kref);
+}
+
+static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
+{
+	struct sw_flow_mask *mfm = container_of(rcu, struct sw_flow_mask, rcu);
+
+	kfree(mfm);
+}
+
+static void mask_release__(struct kref *kref)
+{
+	struct sw_flow_mask *mfm;
+
+	mfm = container_of(kref, struct sw_flow_mask, kref);
+	list_del_rcu(&mfm->list);
+	call_rcu(&mfm->rcu, rcu_free_sw_flow_mask_cb);
+}
+
+int sw_flow_mask_del_ref(struct sw_flow_mask *mfm)
+{
+	int ret = 0;
+	if (mfm)
+		ret = kref_sub(&mfm->kref, 1, mask_release__);
+
+	return ret;
+}
+
+static bool sw_flow_mask_equal(const struct sw_flow_mask *a,
+			       const struct sw_flow_mask *b)
+{
+	u8 *a_ = (u8 *)&a->key + a->desc.start;
+	u8 *b_ = (u8 *)&b->key + b->desc.start;
+
+	return  (a->desc.end == b->desc.end)
+		&& (a->desc.start == b->desc.start)
+		&& (memcmp(a_, b_, sw_flow_mask_actual_size(a)) == 0);
+}
+
+struct sw_flow_mask *sw_flow_mask_find(const struct sw_flow_mask *mfm)
+{
+	struct list_head *ml;
+
+	list_for_each(ml, &mask_list) {
+		struct sw_flow_mask *m;
+		m = container_of(ml, struct sw_flow_mask, list);
+		if (sw_flow_mask_equal(mfm, m))
+			return m;
+	}
+
+	return NULL;
+}
+
+/**
+ * add a new mask into the mask list.
+ * The caller needs to make sure that 'mask' is not the same
+ * as any masks that are already on the list.
+ *
+ * The 'mask' should have ref count of 1.
+ */
+void sw_flow_mask_insert(struct sw_flow_mask *mfm)
+{
+	list_add_rcu(&mfm->list, &mask_list);
+}
+
+/**
+ * Set 'desc' fileds in the mask to the value of 'val'.
+ */
+void sw_flow_mask_set(struct sw_flow_mask *mfm,
+		struct sw_flow_desc *desc, u8 val)
+{
+	u8 *m = (u8*)&mfm->key + desc->start;
+	int s = sw_flow_desc_roundup_size(desc);
+
+	mfm->desc = *desc;
+	memset(m, val, s);
+}
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..9fb5a1c 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -33,6 +33,8 @@
 #include <net/inet_ecn.h>
 
 struct sk_buff;
+struct sw_flow_mask;
+struct flow_table;
 
 struct sw_flow_actions {
 	struct rcu_head rcu;
@@ -117,9 +119,13 @@ struct sw_flow_key {
 struct sw_flow {
 	struct rcu_head rcu;
 	struct hlist_node hash_node[2];
+	struct hlist_node hash_node_id[2];
 	u32 hash;
+	u32 id_hash;
 
 	struct sw_flow_key key;
+	struct sw_flow_key user_id;
+	struct sw_flow_mask *mfm;
 	struct sw_flow_actions __rcu *sf_acts;
 
 	spinlock_t lock;	/* Lock for values below. */
@@ -129,6 +135,39 @@ struct sw_flow {
 	u8 tcp_flags;		/* Union of seen TCP flags. */
 };
 
+struct sw_flow_desc {
+	size_t start;
+	size_t end;
+};
+
+static inline u16 sw_flow_desc_actual_size(const struct sw_flow_desc *desc)
+{
+	return desc->end - desc->start;
+}
+
+static inline u16 sw_flow_desc_roundup_size(const struct sw_flow_desc *desc)
+{
+	u16 actual = sw_flow_desc_actual_size(desc);
+	u16 n32s = actual / sizeof(u32);
+	u16 roundup;
+
+	roundup = (actual & 0x3) ? (n32s + 1) * sizeof(u32) : actual;
+
+	return roundup;
+}
+
+struct sw_flow_match {
+	struct sw_flow_key *key;
+	struct sw_flow_desc key_desc;
+	struct sw_flow_mask *mfm;
+};
+
+void ovs_match_init(struct sw_flow_match *match,
+		struct sw_flow_key *key, struct sw_flow_mask *mask);
+
+bool ovs_match_validate(const struct sw_flow_match *match,
+		u64 key_attrs, u64 mask_attrs);
+
 struct arp_eth_header {
 	__be16      ar_hrd;	/* format of hardware address   */
 	__be16      ar_pro;	/* format of protocol address   */
@@ -157,9 +196,10 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
 		     int *key_lenp);
 void ovs_flow_used(struct sw_flow *, struct sk_buff *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+int ovs_flow_to_nlattrs( const struct sw_flow_key *,
+		const struct sw_flow_key *, struct sk_buff *);
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+		      const struct nlattr *,
 		      const struct nlattr *);
 int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
 				   const struct nlattr *attr);
@@ -169,6 +209,7 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
 
 struct flow_table {
 	struct flex_array *buckets;
+	struct flex_array *buckets_ids;
 	unsigned int count, n_buckets;
 	struct rcu_head rcu;
 	int node_ver;
@@ -186,22 +227,64 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
 	return (table->count > table->n_buckets);
 }
 
+struct sw_flow *ovs_flow_lookup(struct flow_table *,
+				const struct sw_flow_key *, int len);
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				    struct sw_flow_key *key, int len);
+				    const struct sw_flow_key *key,
+				    struct sw_flow_mask *mask,
+				    int len);
+struct sw_flow *ovs_flow_tbl_lookup_id(struct flow_table *table,
+				    struct sw_flow_key *key,
+				    int len);
+
 void ovs_flow_tbl_destroy(struct flow_table *table);
 void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
 struct flow_table *ovs_flow_tbl_alloc(int new_size);
 struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
 struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
 void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-			 struct sw_flow_key *key, int key_len);
+			 const struct sw_flow_key *key, int key_len);
+
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
 
 struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
 extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
 int ipv4_tun_from_nlattr(const struct nlattr *attr,
-			 struct ovs_key_ipv4_tunnel *tun_key);
+			 struct sw_flow_match *match, bool is_mask);
 int ipv4_tun_to_nlattr(struct sk_buff *skb,
-			const struct ovs_key_ipv4_tunnel *tun_key);
+			const struct ovs_key_ipv4_tunnel *tun_key,
+			const struct ovs_key_ipv4_tunnel *output);
+
+bool ovs_flow_cmp_id(struct sw_flow *flow,
+		  struct sw_flow_key *key, int key_len);
+
+struct sw_flow_mask {
+	struct sw_flow_desc desc;
+	struct sw_flow_key key;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct kref kref;
+};
+
+static inline u16
+sw_flow_mask_actual_size(const struct sw_flow_mask *mask)
+{
+	return sw_flow_desc_actual_size(&mask->desc);
+}
+
+static inline u16
+sw_flow_mask_roundup_size(const struct sw_flow_mask *mask)
+{
+	return sw_flow_desc_roundup_size(&mask->desc);
+}
 
+struct sw_flow_mask *sw_flow_mask_alloc(void);
+void sw_flow_mask_add_ref(struct sw_flow_mask *);
+int sw_flow_mask_del_ref(struct sw_flow_mask *);
+void sw_flow_mask_insert(struct sw_flow_mask *);
+void sw_flow_mask_set(struct sw_flow_mask *, struct sw_flow_desc *, u8 val);
+struct sw_flow_mask *sw_flow_mask_find(const struct sw_flow_mask *);
+int ovs_mega_flow_from_nlattrs(struct sw_flow_key *key,
+			       struct sw_flow_mask *mask,
+			       const struct nlattr *);
 #endif /* flow.h */
-- 
1.7.9.5




More information about the dev mailing list