[ovs-dev] [netlink flows v2 11/12] datapath: Convert odp_flow_key to use Netlink attributes instead.

Ben Pfaff blp at nicira.com
Tue Dec 28 22:33:06 UTC 2010


One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 datapath/actions.c                                 |   20 +-
 datapath/actions.h                                 |    4 +-
 datapath/datapath.c                                |   77 +++-
 datapath/flow.c                                    |  297 ++++++++++++-
 datapath/flow.h                                    |   29 ++-
 .../linux-2.6/compat-2.6/include/net/netlink.h     |   47 ++
 datapath/odp-compat.h                              |    3 +-
 datapath/tunnel.c                                  |    2 +-
 include/openvswitch/datapath-protocol.h            |   94 +++--
 lib/dpif-netdev.c                                  |   65 +++-
 lib/dpif.c                                         |   38 ++-
 lib/flow.c                                         |    6 +-
 lib/flow.h                                         |    4 +
 lib/nx-match.c                                     |    7 +-
 lib/odp-util.c                                     |  454 +++++++++++++++++---
 lib/odp-util.h                                     |   28 +-
 lib/ofp-util.c                                     |   25 +-
 lib/ofp-util.h                                     |    4 +
 lib/packets.h                                      |    4 +
 ofproto/ofproto.c                                  |  104 ++++--
 utilities/ovs-dpctl.c                              |    7 +-
 21 files changed, 1119 insertions(+), 200 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 5e16143..511937e 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -27,7 +27,7 @@
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *, struct sk_buff *,
-			      const struct odp_flow_key *,
+			      const struct sw_flow_key *,
 			      const struct nlattr *actions, u32 actions_len);
 
 static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
@@ -76,7 +76,7 @@ static struct sk_buff *vlan_pull_tag(struct sk_buff *skb)
 }
 
 static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
-				       const struct odp_flow_key *key,
+				       const struct sw_flow_key *key,
 				       const struct nlattr *a, u32 actions_len)
 {
 	__be16 tci = nla_get_be16(a);
@@ -207,13 +207,13 @@ static struct sk_buff *strip_vlan(struct sk_buff *skb)
 	return skb;
 }
 
-static bool is_ip(struct sk_buff *skb, const struct odp_flow_key *key)
+static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	return (key->dl_type == htons(ETH_P_IP) &&
 		skb->transport_header > skb->network_header);
 }
 
-static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct odp_flow_key *key)
+static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	int transport_len = skb->len - skb_transport_offset(skb);
 	if (key->nw_proto == IPPROTO_TCP) {
@@ -227,7 +227,7 @@ static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct odp_flow_key *
 }
 
 static struct sk_buff *set_nw_addr(struct sk_buff *skb,
-				   const struct odp_flow_key *key,
+				   const struct sw_flow_key *key,
 				   const struct nlattr *a)
 {
 	__be32 new_nwaddr = nla_get_be32(a);
@@ -256,7 +256,7 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb,
 }
 
 static struct sk_buff *set_nw_tos(struct sk_buff *skb,
-				  const struct odp_flow_key *key,
+				  const struct sw_flow_key *key,
 				  u8 nw_tos)
 {
 	if (unlikely(!is_ip(skb, key)))
@@ -279,7 +279,7 @@ static struct sk_buff *set_nw_tos(struct sk_buff *skb,
 }
 
 static struct sk_buff *set_tp_port(struct sk_buff *skb,
-				   const struct odp_flow_key *key,
+				   const struct sw_flow_key *key,
 				   const struct nlattr *a)
 {
 	struct udphdr *th;
@@ -324,7 +324,7 @@ static struct sk_buff *set_tp_port(struct sk_buff *skb,
  * or truncated header fields or one whose inner and outer Ethernet address
  * differ.
  */
-static bool is_spoofed_arp(struct sk_buff *skb, const struct odp_flow_key *key)
+static bool is_spoofed_arp(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	struct arp_eth_header *arp;
 
@@ -370,7 +370,7 @@ static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg)
 
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			      const struct odp_flow_key *key,
+			      const struct sw_flow_key *key,
 			      const struct nlattr *actions, u32 actions_len)
 {
 	/* Every output action needs a separate clone of 'skb', but the common
@@ -490,7 +490,7 @@ static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
 
 /* Execute a list of actions against 'skb'. */
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct odp_flow_key *key,
+		    const struct sw_flow_key *key,
 		    const struct nlattr *actions, u32 actions_len)
 {
 	if (dp->sflow_probability) {
diff --git a/datapath/actions.h b/datapath/actions.h
index 3067bf7..5ad322f 100644
--- a/datapath/actions.h
+++ b/datapath/actions.h
@@ -14,10 +14,10 @@
 
 struct datapath;
 struct sk_buff;
-struct odp_flow_key;
+struct sw_flow_key;
 
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct odp_flow_key *key,
+		    const struct sw_flow_key *,
 		    const struct nlattr *, u32 actions_len);
 
 #endif /* actions.h */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 4b65325..d253e90 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -41,6 +41,7 @@
 #include <linux/rculist.h>
 #include <linux/dmi.h>
 #include <net/inet_ecn.h>
+#include <net/genetlink.h>
 #include <linux/compat.h>
 
 #include "openvswitch/datapath-protocol.h"
@@ -481,7 +482,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	OVS_CB(skb)->vport = p;
 
 	if (!OVS_CB(skb)->flow) {
-		struct odp_flow_key key;
+		struct sw_flow_key key;
 		struct tbl_node *flow_node;
 		bool is_frag;
 
@@ -814,14 +815,19 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 		       struct odp_flow_stats *stats)
 {
 	struct tbl_node *flow_node;
+	struct sw_flow_key key;
 	struct sw_flow *flow;
 	struct tbl *table;
 	int error;
 	u32 hash;
 
-	hash = flow_hash(&uf->flow.key);
+	error = flow_copy_from_user(&key, (const struct nlattr __force __user*)uf->flow.key, uf->flow.key_len);
+	if (error)
+		return error;
+
+	hash = flow_hash(&key);
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &uf->flow.key, hash, flow_cmp);
+	flow_node = tbl_lookup(table, &key, hash, flow_cmp);
 	if (!flow_node) {
 		/* No such flow. */
 		struct sw_flow_actions *acts;
@@ -844,7 +850,7 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 			error = PTR_ERR(flow);
 			goto error;
 		}
-		flow->key = uf->flow.key;
+		flow->key = key;
 		clear_stats(flow);
 
 		/* Obtain actions. */
@@ -973,13 +979,18 @@ static int answer_query(struct datapath *dp, struct sw_flow *flow,
 			       &ufp->stats, actions, &ufp->actions_len);
 }
 
-static struct sw_flow *do_del_flow(struct datapath *dp, struct odp_flow_key *key)
+static struct sw_flow *do_del_flow(struct datapath *dp, const struct nlattr __user *key, u32 key_len)
 {
 	struct tbl *table = get_table_protected(dp);
 	struct tbl_node *flow_node;
+	struct sw_flow_key swkey;
 	int error;
 
-	flow_node = tbl_lookup(table, key, flow_hash(key), flow_cmp);
+	error = flow_copy_from_user(&swkey, key, key_len);
+	if (error)
+		return ERR_PTR(error);
+
+	flow_node = tbl_lookup(table, &swkey, flow_hash(&swkey), flow_cmp);
 	if (!flow_node)
 		return ERR_PTR(-ENOENT);
 
@@ -1003,7 +1014,7 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
 	if (copy_from_user(&uf, ufp, sizeof uf))
 		return -EFAULT;
 
-	flow = do_del_flow(dp, &uf.key);
+	flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len);
 	if (IS_ERR(flow))
 		return PTR_ERR(flow);
 
@@ -1019,6 +1030,7 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 
 	for (i = 0; i < flowvec->n_flows; i++) {
 		struct odp_flow __user *ufp = (struct odp_flow __user __force *)&flowvec->flows[i];
+		struct sw_flow_key key;
 		struct odp_flow uf;
 		struct tbl_node *flow_node;
 		int error;
@@ -1026,7 +1038,11 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 		if (copy_from_user(&uf, ufp, sizeof uf))
 			return -EFAULT;
 
-		flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+		error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf.key, uf.key_len);
+		if (error)
+			return error;
+
+		flow_node = tbl_lookup(table, &uf.key, flow_hash(&key), flow_cmp);
 		if (!flow_node)
 			error = put_user(ENOENT, &ufp->stats.error);
 		else
@@ -1078,7 +1094,9 @@ static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state)
 static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *udumpp)
 {
 	struct odp_flow __user *uflowp;
+	struct nlattr __user *ukey;
 	struct sw_flow *flow;
+	u32 key_len;
 
 	flow = do_dump_flow(dp, udumpp->state);
 	if (IS_ERR(flow))
@@ -1090,15 +1108,23 @@ static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *udumpp)
 	if (!flow)
 		return put_user(ODPFF_EOF, &uflowp->flags);
 
-	if (copy_to_user(&uflowp->key, &flow->key, sizeof(struct odp_flow_key)) ||
-	    put_user(0, &uflowp->flags))
+	if (put_user(0, &uflowp->flags) ||
+	    get_user(ukey, (struct nlattr __user * __user*)&uflowp->key) ||
+	    get_user(key_len, &uflowp->key_len))
 		return -EFAULT;
+
+	key_len = flow_copy_to_user(ukey, &flow->key, key_len);
+	if (key_len < 0)
+		return key_len;
+	if (put_user(key_len, &uflowp->key_len))
+		return -EFAULT;
+
 	return answer_query(dp, flow, 0, uflowp);
 }
 
 static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 {
-	struct odp_flow_key key;
+	struct sw_flow_key key;
 	struct sk_buff *skb;
 	struct sw_flow_actions *actions;
 	struct ethhdr *eth;
@@ -1530,16 +1556,18 @@ static int compat_list_ports(struct datapath *dp, struct compat_odp_portvec __us
 
 static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
 {
-	compat_uptr_t actions;
+	compat_uptr_t key, actions;
 
 	if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
 	    __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
-	    __copy_from_user(&flow->key, &compat->key, sizeof(struct odp_flow_key)) ||
+	    __get_user(key, &compat->key) ||
+	    __get_user(flow->key_len, &compat->key_len) ||
 	    __get_user(actions, &compat->actions) ||
 	    __get_user(flow->actions_len, &compat->actions_len) ||
 	    __get_user(flow->flags, &compat->flags))
 		return -EFAULT;
 
+	flow->key = (struct nlattr __force *)compat_ptr(key);
 	flow->actions = (struct nlattr __force *)compat_ptr(actions);
 	return 0;
 }
@@ -1587,7 +1615,7 @@ static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *u
 	if (compat_get_flow(&uf, ufp))
 		return -EFAULT;
 
-	flow = do_del_flow(dp, &uf.key);
+	flow = do_del_flow(dp, uf.key, uf.key_len);
 	if (IS_ERR(flow))
 		return PTR_ERR(flow);
 
@@ -1607,12 +1635,17 @@ static int compat_query_flows(struct datapath *dp,
 		struct compat_odp_flow __user *ufp = &flows[i];
 		struct odp_flow uf;
 		struct tbl_node *flow_node;
+		struct sw_flow_key key;
 		int error;
 
 		if (compat_get_flow(&uf, ufp))
 			return -EFAULT;
 
-		flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+		error = flow_copy_from_user(&key, uf.key, uf.key_len);
+		if (error)
+			return error;
+
+		flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
 		if (!flow_node)
 			error = put_user(ENOENT, &ufp->stats.error);
 		else
@@ -1629,6 +1662,8 @@ static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __u
 	struct compat_odp_flow __user *uflowp;
 	compat_uptr_t compat_ufp;
 	struct sw_flow *flow;
+	compat_uptr_t ukey;
+	u32 key_len;
 
 	flow = do_dump_flow(dp, udumpp->state);
 	if (IS_ERR(flow))
@@ -1641,9 +1676,17 @@ static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __u
 	if (!flow)
 		return put_user(ODPFF_EOF, &uflowp->flags);
 
-	if (copy_to_user(&uflowp->key, &flow->key, sizeof(struct odp_flow_key)) ||
-	    put_user(0, &uflowp->flags))
+	if (put_user(0, &uflowp->flags) ||
+	    get_user(ukey, &uflowp->key) ||
+	    get_user(key_len, &uflowp->key_len))
+		return -EFAULT;
+
+	key_len = flow_copy_to_user(compat_ptr(ukey), &flow->key, key_len);
+	if (key_len < 0)
+		return key_len;
+	if (put_user(key_len, &uflowp->key_len))
 		return -EFAULT;
+
 	return compat_answer_query(dp, flow, 0, uflowp);
 }
 
diff --git a/datapath/flow.c b/datapath/flow.c
index 5cf0d54..323476d 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -8,6 +8,7 @@
 
 #include "flow.h"
 #include "datapath.h"
+#include <asm/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
@@ -195,7 +196,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
 	call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
 }
 
-static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key)
+static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	struct qtag_prefix {
 		__be16 eth_type; /* ETH_P_8021Q */
@@ -207,7 +208,7 @@ static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key)
 		return;
 
 	qp = (struct qtag_prefix *) skb->data;
-	key->dl_tci = qp->tci | htons(ODP_TCI_PRESENT);
+	key->dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
 	__skb_pull(skb, sizeof(struct qtag_prefix));
 }
 
@@ -226,17 +227,17 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	proto = *(__be16 *) skb->data;
 	__skb_pull(skb, sizeof(__be16));
 
-	if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF)
+	if (ntohs(proto) >= 1536)
 		return proto;
 
 	if (unlikely(skb->len < sizeof(struct llc_snap_hdr)))
-		return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+		return htons(ETH_P_802_2);
 
 	llc = (struct llc_snap_hdr *) skb->data;
 	if (llc->dsap != LLC_SAP_SNAP ||
 	    llc->ssap != LLC_SAP_SNAP ||
 	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
-		return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+		return htons(ETH_P_802_2);
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
 	return llc->ethertype;
@@ -267,7 +268,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
  *      otherwise the same as skb->network_header.  For other key->dl_type
  *      values it is left untouched.
  */
-int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key,
+int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		 bool *is_frag)
 {
 	struct ethhdr *eth;
@@ -383,17 +384,293 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key,
 	return 0;
 }
 
-u32 flow_hash(const struct odp_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key)
 {
 	return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed);
 }
 
 int flow_cmp(const struct tbl_node *node, void *key2_)
 {
-	const struct odp_flow_key *key1 = &flow_cast(node)->key;
-	const struct odp_flow_key *key2 = key2_;
+	const struct sw_flow_key *key1 = &flow_cast(node)->key;
+	const struct sw_flow_key *key2 = key2_;
 
-	return !memcmp(key1, key2, sizeof(struct odp_flow_key));
+	return !memcmp(key1, key2, sizeof(struct sw_flow_key));
+}
+
+/**
+ * flow_from_nlattrs - parses Netlink attributes into a flow key.
+ * @swkey: receives the extracted flow key.
+ * @key: start of %ODPKT_* Netlink attribute sequence.
+ * @key_len: number of bytes in @key.
+ *
+ * This state machine accepts the following forms, with [] for optional
+ * elements and | for alternatives:
+ *
+ * [tun_id] in_port ethernet [8021q] [ethertype [IP [TCP|UDP|ICMP] | ARP]
+ */
+static int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *key, u32 key_len)
+{
+	const struct nlattr *nla;
+	u16 prev_type;
+	int rem;
+
+	memset(swkey, 0, sizeof(*swkey));
+	swkey->dl_type = htons(ETH_P_802_2);
+
+	prev_type = ODPKT_UNSPEC;
+	nla_for_each_attr(nla, key, key_len, rem) {
+		static const u32 key_lens[ODPKT_MAX + 1] = {
+			[ODPKT_TUN_ID] = 8,
+			[ODPKT_IN_PORT] = 4,
+			[ODPKT_ETHERNET] = sizeof(struct odp_key_ethernet),
+			[ODPKT_8021Q] = sizeof(struct odp_key_8021q),
+			[ODPKT_ETHERTYPE] = 2,
+			[ODPKT_IPV4] = sizeof(struct odp_key_ipv4),
+			[ODPKT_TCP] = sizeof(struct odp_key_tcp),
+			[ODPKT_UDP] = sizeof(struct odp_key_udp),
+			[ODPKT_ICMP] = sizeof(struct odp_key_icmp),
+			[ODPKT_ARP] = sizeof(struct odp_key_arp),
+		};
+
+		const struct odp_key_ethernet *eth_key;
+		const struct odp_key_8021q *q_key;
+		const struct odp_key_ipv4 *ipv4_key;
+		const struct odp_key_tcp *tcp_key;
+		const struct odp_key_udp *udp_key;
+		const struct odp_key_icmp *icmp_key;
+		const struct odp_key_arp *arp_key;
+
+                int type = nla_type(nla);
+
+                if (type > ODPKT_MAX || nla_len(nla) != key_lens[type])
+                        return -EINVAL;
+
+#define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
+		switch (TRANSITION(prev_type, type)) {
+		case TRANSITION(ODPKT_UNSPEC, ODPKT_TUN_ID):
+			swkey->tun_id = nla_get_be64(nla);
+			break;
+
+		case TRANSITION(ODPKT_UNSPEC, ODPKT_IN_PORT):
+		case TRANSITION(ODPKT_TUN_ID, ODPKT_IN_PORT):
+			if (nla_get_u32(nla) >= DP_MAX_PORTS)
+				return -EINVAL;
+			swkey->in_port = nla_get_u32(nla);
+			break;
+
+		case TRANSITION(ODPKT_IN_PORT, ODPKT_ETHERNET):
+			eth_key = nla_data(nla);
+			memcpy(swkey->dl_src, eth_key->eth_src, ETH_ALEN);
+			memcpy(swkey->dl_dst, eth_key->eth_dst, ETH_ALEN);
+			break;
+
+		case TRANSITION(ODPKT_ETHERNET, ODPKT_8021Q):
+			q_key = nla_data(nla);
+			/* Only standard 0x8100 VLANs currently supported. */
+			if (q_key->q_tpid != htons(ETH_P_8021Q))
+				return -EINVAL;
+			if (q_key->q_tci & htons(VLAN_TAG_PRESENT))
+				return -EINVAL;
+			swkey->dl_tci = q_key->q_tci | htons(VLAN_TAG_PRESENT);
+			break;
+
+		case TRANSITION(ODPKT_8021Q, ODPKT_ETHERTYPE):
+		case TRANSITION(ODPKT_ETHERNET, ODPKT_ETHERTYPE):
+			swkey->dl_type = nla_get_be16(nla);
+			if (ntohs(swkey->dl_type) < 1536)
+				return -EINVAL;
+			break;
+
+		case TRANSITION(ODPKT_ETHERTYPE, ODPKT_IPV4):
+			if (swkey->dl_type != htons(ETH_P_IP))
+				return -EINVAL;
+			ipv4_key = nla_data(nla);
+			swkey->nw_src = ipv4_key->ipv4_src;
+			swkey->nw_dst = ipv4_key->ipv4_dst;
+			swkey->nw_proto = ipv4_key->ipv4_proto;
+			swkey->nw_tos = ipv4_key->ipv4_tos;
+			if (swkey->nw_tos & INET_ECN_MASK)
+				return -EINVAL;
+			break;
+
+		case TRANSITION(ODPKT_IPV4, ODPKT_TCP):
+			if (swkey->nw_proto != IPPROTO_TCP)
+				return -EINVAL;
+			tcp_key = nla_data(nla);
+			swkey->tp_src = tcp_key->tcp_src;
+			swkey->tp_dst = tcp_key->tcp_dst;
+			break;
+
+		case TRANSITION(ODPKT_IPV4, ODPKT_UDP):
+			if (swkey->nw_proto != IPPROTO_UDP)
+				return -EINVAL;
+			udp_key = nla_data(nla);
+			swkey->tp_src = udp_key->udp_src;
+			swkey->tp_dst = udp_key->udp_dst;
+			break;
+
+		case TRANSITION(ODPKT_IPV4, ODPKT_ICMP):
+			if (swkey->nw_proto != IPPROTO_ICMP)
+				return -EINVAL;
+			icmp_key = nla_data(nla);
+			swkey->tp_src = htons(icmp_key->icmp_type);
+			swkey->tp_dst = htons(icmp_key->icmp_code);
+			break;
+
+		case TRANSITION(ODPKT_ETHERTYPE, ODPKT_ARP):
+			if (swkey->dl_type != htons(ETH_P_ARP))
+				return -EINVAL;
+			arp_key = nla_data(nla);
+			swkey->nw_src = arp_key->arp_sip;
+			swkey->nw_dst = arp_key->arp_tip;
+			if (arp_key->arp_op & htons(0xff00))
+				return -EINVAL;
+			swkey->nw_proto = ntohs(arp_key->arp_op);
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		prev_type = type;
+	}
+	if (rem)
+		return -EINVAL;
+
+	switch (prev_type) {
+	case ODPKT_UNSPEC:
+		return -EINVAL;
+
+	case ODPKT_TUN_ID:
+	case ODPKT_IN_PORT:
+		return -EINVAL;
+
+	case ODPKT_ETHERNET:
+	case ODPKT_8021Q:
+		return 0;
+
+	case ODPKT_ETHERTYPE:
+		if (swkey->dl_type == htons(ETH_P_IP) ||
+		    swkey->dl_type == htons(ETH_P_ARP))
+			return -EINVAL;
+		return 0;
+
+	case ODPKT_IPV4:
+		if (swkey->nw_proto == htons(IPPROTO_TCP) ||
+		    swkey->nw_proto == htons(IPPROTO_UDP) ||
+		    swkey->nw_proto == htons(IPPROTO_ICMP))
+			return -EINVAL;
+		return 0;
+
+	case ODPKT_TCP:
+	case ODPKT_UDP:
+	case ODPKT_ICMP:
+	case ODPKT_ARP:
+		return 0;
+	}
+
+	WARN_ON_ONCE(1);
+	return -EINVAL;
+}
+
+static u32 flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+{
+	struct odp_key_ethernet *eth_key;
+
+	if (skb_tailroom(skb) < FLOW_BUFSIZE)
+		return -EMSGSIZE;
+
+	if (swkey->tun_id != cpu_to_be64(0))
+		nla_put_be64(skb, ODPKT_TUN_ID, swkey->tun_id);
+
+	nla_put_u32(skb, ODPKT_IN_PORT, swkey->in_port);
+
+	eth_key = nla_data(__nla_reserve(skb, ODPKT_ETHERNET, sizeof(*eth_key)));
+	memcpy(eth_key->eth_src, swkey->dl_src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, swkey->dl_dst, ETH_ALEN);
+
+	if (swkey->dl_tci != htons(0)) {
+		struct odp_key_8021q *q_key;
+
+		q_key = nla_data(__nla_reserve(skb, ODPKT_8021Q, sizeof(*q_key)));
+		q_key->q_tpid = htons(ETH_P_8021Q);
+		q_key->q_tci = swkey->dl_tci & ~htons(VLAN_TAG_PRESENT);
+	}
+
+	if (swkey->dl_type == htons(ETH_P_802_2))
+		goto exit;
+
+	nla_put_be16(skb, ODPKT_ETHERTYPE, swkey->dl_type);
+
+	if (swkey->dl_type == htons(ETH_P_IP)) {
+		struct odp_key_ipv4 *ipv4_key;
+
+		ipv4_key = nla_data(__nla_reserve(skb, ODPKT_IPV4, sizeof(*ipv4_key)));
+		ipv4_key->ipv4_src = swkey->nw_src;
+		ipv4_key->ipv4_dst = swkey->nw_dst;
+		ipv4_key->ipv4_proto = swkey->nw_proto;
+		ipv4_key->ipv4_tos = swkey->nw_tos;
+
+		if (swkey->nw_proto == IPPROTO_TCP) {
+			struct odp_key_tcp *tcp_key;
+
+			tcp_key = nla_data(__nla_reserve(skb, ODPKT_TCP, sizeof(*tcp_key)));
+			tcp_key->tcp_src = swkey->tp_src;
+			tcp_key->tcp_dst = swkey->tp_dst;
+		} else if (swkey->nw_proto == IPPROTO_UDP) {
+			struct odp_key_udp *udp_key;
+
+			udp_key = nla_data(__nla_reserve(skb, ODPKT_UDP, sizeof(*udp_key)));
+			udp_key->udp_src = swkey->tp_src;
+			udp_key->udp_dst = swkey->tp_dst;
+		} else if (swkey->nw_proto == IPPROTO_ICMP) {
+			struct odp_key_icmp *icmp_key;
+
+			icmp_key = nla_data(__nla_reserve(skb, ODPKT_ICMP, sizeof(*icmp_key)));
+			icmp_key->icmp_type = ntohs(swkey->tp_src);
+			icmp_key->icmp_code = ntohs(swkey->tp_dst);
+		}
+	} else if (swkey->dl_type == htons(ETH_P_ARP)) {
+		struct odp_key_arp *arp_key;
+
+		arp_key = nla_data(__nla_reserve(skb, ODPKT_ARP, sizeof(*arp_key)));
+		arp_key->arp_sip = swkey->nw_src;
+		arp_key->arp_tip = swkey->nw_dst;
+		arp_key->arp_op = htons(swkey->nw_proto);
+	}
+
+exit:
+	return skb->len;
+}
+
+int flow_copy_from_user(struct sw_flow_key *swkey, const struct nlattr __user *ukey, u32 ukey_len)
+{
+	char key[FLOW_BUFSIZE] __aligned(NLA_ALIGNTO);
+
+	if (ukey_len > FLOW_BUFSIZE || ukey_len % NLA_ALIGNTO)
+		return -EINVAL;
+
+	if (copy_from_user(key, ukey, ukey_len))
+		return -EFAULT;
+
+	return flow_from_nlattrs(swkey, (const struct nlattr *)key, ukey_len);
+}
+
+int flow_copy_to_user(struct nlattr __user *ukey, const struct sw_flow_key *swkey, u32 ukey_len)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = alloc_skb(FLOW_BUFSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	retval = flow_to_nlattrs(swkey, skb);
+	if (copy_to_user(ukey, skb->data, min(skb->len, ukey_len)))
+		retval = -EFAULT;
+	kfree_skb(skb);
+
+	return retval;
 }
 
 /* Initializes the flow module.
diff --git a/datapath/flow.h b/datapath/flow.h
index 0764482..f09f76c 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -29,11 +29,26 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
+struct sw_flow_key {
+	__be64	tun_id;     /* Encapsulating tunnel ID. */
+	__be32	nw_src;	    /* IP source address. */
+	__be32	nw_dst;	    /* IP destination address. */
+	u16	in_port;    /* Input switch port. */
+	__be16	dl_tci;	    /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+	__be16	dl_type;    /* Ethernet frame type. */
+	__be16	tp_src;	    /* TCP/UDP source port. */
+	__be16	tp_dst;	    /* TCP/UDP destination port. */
+	u8	dl_src[ETH_ALEN]; /* Ethernet source address. */
+	u8	dl_dst[ETH_ALEN]; /* Ethernet destination address. */
+	u8	nw_proto;   /* IP protocol or lower 8 bits of ARP opcode. */
+	u8	nw_tos;	    /* IP ToS (DSCP field, 6 bits). */
+};
+
 struct sw_flow {
 	struct rcu_head rcu;
 	struct tbl_node tbl_node;
 
-	struct odp_flow_key key;
+	struct sw_flow_key key;
 	struct sw_flow_actions __rcu *sf_acts;
 
 	atomic_t refcnt;
@@ -74,12 +89,20 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *, bool *is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 
-u32 flow_hash(const struct odp_flow_key *key);
+u32 flow_hash(const struct sw_flow_key *);
 int flow_cmp(const struct tbl_node *, void *target);
 
+/* By my calculations currently the longest valid nlattr-formatted flow key is
+ * 80 bytes long, so this leaves some safety margin.
+ */
+#define FLOW_BUFSIZE 96
+
+int flow_copy_from_user(struct sw_flow_key *, const struct nlattr __user *ukey, u32 key_len);
+int flow_copy_to_user(struct nlattr __user *ukey, const struct sw_flow_key *, u32 key_len);
+
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
 	return container_of(node, struct sw_flow, tbl_node);
diff --git a/datapath/linux-2.6/compat-2.6/include/net/netlink.h b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
index c11a170..b9ed532 100644
--- a/datapath/linux-2.6/compat-2.6/include/net/netlink.h
+++ b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
@@ -25,6 +25,16 @@ static inline int VERIFY_NUL_STRING(struct nlattr *attr)
         NLA_PUT_TYPE(skb, __be16, attrtype, value)
 #endif  /* !NLA_PUT_BE16 */
 
+#ifndef NLA_PUT_BE32
+#define NLA_PUT_BE32(skb, attrtype, value) \
+        NLA_PUT_TYPE(skb, __be32, attrtype, value)
+#endif  /* !NLA_PUT_BE32 */
+
+#ifndef NLA_PUT_BE64
+#define NLA_PUT_BE64(skb, attrtype, value) \
+        NLA_PUT_TYPE(skb, __be64, attrtype, value)
+#endif  /* !NLA_PUT_BE64 */
+
 #ifndef HAVE_NLA_GET_BE16
 /**
  * nla_get_be16 - return payload of __be16 attribute
@@ -92,4 +102,41 @@ static inline int nla_type(const struct nlattr *nla)
 }
 #endif
 
+/* The following nla_put_be{16,32,64} functions are not in any version of Linux
+ * (although NLA_PUT_BE{16,32,64} are), so we will probably want to add them
+ * as part of the patch series when we submit Open vSwitch upstream. */
+
+/**
+ * nla_put_be16 - Add a be16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be16), &value);
+}
+
+/**
+ * nla_put_be32 - Add a be32 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be32), &value);
+}
+
+/**
+ * nla_put_64 - Add a be64 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be64), &value);
+}
+
 #endif /* net/netlink.h */
diff --git a/datapath/odp-compat.h b/datapath/odp-compat.h
index ca2f170..4da630c 100644
--- a/datapath/odp-compat.h
+++ b/datapath/odp-compat.h
@@ -30,7 +30,8 @@ struct compat_odp_portvec {
 
 struct compat_odp_flow {
 	struct odp_flow_stats stats;
-	struct odp_flow_key key;
+	compat_uptr_t key;
+	u32 key_len;
 	compat_uptr_t actions;
 	u32 actions_len;
 	u32 flags;
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 5a9f8f3..5e56443 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -910,7 +910,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
 #endif
 
 	if (is_internal_dev(rt_dst(rt).dev)) {
-		struct odp_flow_key flow_key;
+		struct sw_flow_key flow_key;
 		struct tbl_node *flow_node;
 		struct vport *dst_vport;
 		struct sk_buff *skb;
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 730aa22..273ca98 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -210,29 +210,59 @@ struct odp_flow_stats {
     uint16_t error;             /* Used by ODP_FLOW_GET. */
 };
 
-/*
- * The datapath protocol adopts the Linux convention for TCI fields: if an
- * 802.1Q header is present then its TCI value is used verbatim except that the
- * CFI bit (0x1000) is always set to 1, and all-bits-zero indicates no 802.1Q
- * header.
- */
-#define ODP_TCI_PRESENT 0x1000  /* CFI bit */
-
-struct odp_flow_key {
-    ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
-    ovs_be32 nw_src;            /* IP source address. */
-    ovs_be32 nw_dst;            /* IP destination address. */
-    uint16_t in_port;           /* Input switch port. */
-    ovs_be16 dl_tci;            /* All zeros if 802.1Q header absent,
-                                  * ODP_TCI_PRESENT set if present. */
-    ovs_be16 dl_type;           /* Ethernet frame type. */
-    ovs_be16 tp_src;            /* TCP/UDP source port. */
-    ovs_be16 tp_dst;            /* TCP/UDP destination port. */
-    uint8_t  dl_src[6];         /* Ethernet source address. */
-    uint8_t  dl_dst[6];         /* Ethernet destination address. */
-    uint8_t  nw_proto;          /* IP protocol or lower 8 bits of
-                                   ARP opcode. */
-    uint8_t  nw_tos;            /* IP ToS (DSCP field, 6 bits). */
+enum odp_key_type {
+	ODPKT_UNSPEC,
+	ODPKT_TUN_ID,		/* 64-bit tunnel ID */
+	ODPKT_IN_PORT,		/* 32-bit ODP port number */
+	ODPKT_ETHERNET,		/* struct odp_key_ethernet */
+	ODPKT_8021Q,		/* struct odp_key_8021q */
+	ODPKT_ETHERTYPE,	/* 16-bit Ethernet type */
+	ODPKT_IPV4,		/* struct odp_key_ipv4 */
+	ODPKT_TCP,		/* struct odp_key_tcp */
+	ODPKT_UDP,		/* struct odp_key_udp */
+	ODPKT_ICMP,		/* struct odp_key_icmp */
+	ODPKT_ARP,		/* struct odp_key_arp */
+	__ODPKT_MAX
+};
+
+#define ODPKT_MAX (__ODPKT_MAX - 1)
+
+struct odp_key_ethernet {
+	uint8_t	 eth_src[6];
+	uint8_t	 eth_dst[6];
+};
+
+struct odp_key_8021q {
+	ovs_be16 q_tpid;
+	ovs_be16 q_tci;
+};
+
+struct odp_key_ipv4 {
+	ovs_be32 ipv4_src;
+	ovs_be32 ipv4_dst;
+	uint8_t  ipv4_proto;
+	uint8_t  ipv4_tos;
+};
+
+struct odp_key_tcp {
+	ovs_be16 tcp_src;
+	ovs_be16 tcp_dst;
+};
+
+struct odp_key_udp {
+	ovs_be16 udp_src;
+	ovs_be16 udp_dst;
+};
+
+struct odp_key_icmp {
+	uint8_t icmp_type;
+	uint8_t icmp_code;
+};
+
+struct odp_key_arp {
+	ovs_be32 arp_sip;
+	ovs_be32 arp_tip;
+	ovs_be16 arp_op;
 };
 
 /* Flags for ODP_FLOW. */
@@ -241,7 +271,8 @@ struct odp_flow_key {
 
 struct odp_flow {
     struct odp_flow_stats stats;
-    struct odp_flow_key key;
+    struct nlattr *key;
+    uint32_t key_len;
     struct nlattr *actions;
     uint32_t actions_len;
     uint32_t flags;
@@ -287,8 +318,8 @@ enum odp_action_type {
     ODPAT_STRIP_VLAN,		/* Strip the 802.1q header. */
     ODPAT_SET_DL_SRC,		/* Ethernet source address. */
     ODPAT_SET_DL_DST,		/* Ethernet destination address. */
-    ODPAT_SET_NW_SRC,		/* IP source address. */
-    ODPAT_SET_NW_DST,		/* IP destination address. */
+    ODPAT_SET_NW_SRC,		/* IPv4 source address. */
+    ODPAT_SET_NW_DST,		/* IPv4 destination address. */
     ODPAT_SET_NW_TOS,		/* IP ToS/DSCP field (6 bits). */
     ODPAT_SET_TP_SRC,		/* TCP/UDP source port. */
     ODPAT_SET_TP_DST,		/* TCP/UDP destination port. */
@@ -336,15 +367,4 @@ struct odp_vport_mtu {
     uint16_t mtu;
 };
 
-/* Values below this cutoff are 802.3 packets and the two bytes
- * following MAC addresses are used as a frame length.  Otherwise, the
- * two bytes are used as the Ethernet type.
- */
-#define ODP_DL_TYPE_ETH2_CUTOFF   0x0600
-
-/* Value of dl_type to indicate that the frame does not include an
- * Ethernet type.
- */
-#define ODP_DL_TYPE_NOT_ETH_TYPE  0x05ff
-
 #endif  /* openvswitch/datapath-protocol.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8b5df36..7e9e36c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -36,6 +36,7 @@
 #include "dpif.h"
 #include "dpif-provider.h"
 #include "dummy.h"
+#include "dynamic-string.h"
 #include "flow.h"
 #include "hmap.h"
 #include "list.h"
@@ -603,6 +604,32 @@ answer_flow_query(struct dp_netdev_flow *flow, uint32_t query_flags,
 }
 
 static int
+dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
+                              struct flow *flow)
+{
+    if (odp_flow_key_to_flow(key, key_len, flow)) {
+        /* This should not happen: it indicates that odp_flow_key_from_flow()
+         * and odp_flow_key_to_flow() disagree on the acceptable form of a
+         * flow.  Log the problem as an error, with enough details to enable
+         * debugging. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+        if (!VLOG_DROP_ERR(&rl)) {
+            struct ds s;
+
+            ds_init(&s);
+            odp_flow_key_format(key, key_len, &s);
+            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
+            ds_destroy(&s);
+        }
+
+        return EINVAL;
+    }
+
+    return 0;
+}
+
+static int
 dpif_netdev_flow_get(const struct dpif *dpif, struct odp_flow flows[], int n)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -611,8 +638,14 @@ dpif_netdev_flow_get(const struct dpif *dpif, struct odp_flow flows[], int n)
     for (i = 0; i < n; i++) {
         struct odp_flow *odp_flow = &flows[i];
         struct flow key;
+        int error;
+
+        error = dpif_netdev_flow_from_nlattrs(odp_flow->key, odp_flow->key_len,
+                                              &key);
+        if (error) {
+            return error;
+        }
 
-        odp_flow_key_to_flow(&odp_flow->key, &key);
         answer_flow_query(dp_netdev_lookup_flow(dp, &key),
                           odp_flow->flags, odp_flow);
     }
@@ -699,14 +732,14 @@ set_flow_actions(struct dp_netdev_flow *flow, struct odp_flow *odp_flow)
 }
 
 static int
-add_flow(struct dpif *dpif, struct odp_flow *odp_flow)
+add_flow(struct dpif *dpif, const struct flow *key, struct odp_flow *odp_flow)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     int error;
 
     flow = xzalloc(sizeof *flow);
-    odp_flow_key_to_flow(&odp_flow->key, &flow->key);
+    flow->key = *key;
 
     error = set_flow_actions(flow, odp_flow);
     if (error) {
@@ -734,13 +767,19 @@ dpif_netdev_flow_put(struct dpif *dpif, struct odp_flow_put *put)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct flow key;
+    int error;
+
+    error = dpif_netdev_flow_from_nlattrs(put->flow.key, put->flow.key_len,
+                                          &key);
+    if (error) {
+        return error;
+    }
 
-    odp_flow_key_to_flow(&put->flow.key, &key);
     flow = dp_netdev_lookup_flow(dp, &key);
     if (!flow) {
         if (put->flags & ODPPF_CREATE) {
             if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
-                return add_flow(dpif, &put->flow);
+                return add_flow(dpif, &key, &put->flow);
             } else {
                 return EFBIG;
             }
@@ -767,8 +806,14 @@ dpif_netdev_flow_del(struct dpif *dpif, struct odp_flow *odp_flow)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct flow key;
+    int error;
+
+    error = dpif_netdev_flow_from_nlattrs(odp_flow->key, odp_flow->key_len,
+                                          &key);
+    if (error) {
+        return error;
+    }
 
-    odp_flow_key_to_flow(&odp_flow->key, &key);
     flow = dp_netdev_lookup_flow(dp, &key);
     if (flow) {
         answer_flow_query(flow, 0, odp_flow);
@@ -799,6 +844,7 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct hmap_node *node;
+    struct ofpbuf key;
 
     node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
     if (!node) {
@@ -806,7 +852,12 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
     }
 
     flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
-    odp_flow_key_from_flow(&odp_flow->key, &flow->key);
+
+    ofpbuf_use_stack(&key, odp_flow->key, odp_flow->key_len);
+    odp_flow_key_from_flow(&key, &flow->key);
+    odp_flow->key_len = key.size;
+    ofpbuf_uninit(&key);
+
     answer_flow_query(flow, 0, odp_flow);
 
     return 0;
diff --git a/lib/dpif.c b/lib/dpif.c
index 1bf6e41..73c92cc 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -83,7 +83,8 @@ static void log_flow_operation(const struct dpif *, const char *operation,
 static void log_flow_put(struct dpif *, int error,
                          const struct odp_flow_put *);
 static bool should_log_flow_message(int error);
-static void check_rw_odp_flow(struct odp_flow *);
+static void check_rw_flow_actions(struct odp_flow *);
+static void check_rw_flow_key(struct odp_flow *);
 
 static void
 dp_initialize(void)
@@ -679,7 +680,7 @@ dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
 
     COVERAGE_INC(dpif_flow_get);
 
-    check_rw_odp_flow(flow);
+    check_rw_flow_actions(flow);
     error = dpif->dpif_class->flow_get(dpif, flow, 1);
     if (!error) {
         error = flow->stats.error;
@@ -732,7 +733,7 @@ dpif_flow_get_multiple(const struct dpif *dpif,
     COVERAGE_ADD(dpif_flow_get, n);
 
     for (i = 0; i < n; i++) {
-        check_rw_odp_flow(&flows[i]);
+        check_rw_flow_actions(&flows[i]);
     }
 
     error = dpif->dpif_class->flow_get(dpif, flows, n);
@@ -782,7 +783,7 @@ dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
 
     COVERAGE_INC(dpif_flow_del);
 
-    check_rw_odp_flow(flow);
+    check_rw_flow_actions(flow);
     memset(&flow->stats, 0, sizeof flow->stats);
 
     error = dpif->dpif_class->flow_del(dpif, flow);
@@ -824,7 +825,8 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, struct odp_flow *flow)
 {
     const struct dpif *dpif = dump->dpif;
 
-    check_rw_odp_flow(flow);
+    check_rw_flow_actions(flow);
+    check_rw_flow_key(flow);
 
     if (dump->error) {
         return false;
@@ -834,6 +836,9 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, struct odp_flow *flow)
     if (dump->error == EOF) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
     } else {
+        if (dump->error) {
+            flow->key_len = 0;
+        }
         if (should_log_flow_message(dump->error)) {
             log_flow_operation(dpif, "flow_dump_next", dump->error, flow);
         }
@@ -1117,7 +1122,7 @@ should_log_flow_message(int error)
 
 static void
 log_flow_message(const struct dpif *dpif, int error, const char *operation,
-                 const struct odp_flow_key *flow,
+                 const struct nlattr *key, size_t key_len,
                  const struct odp_flow_stats *stats,
                  const struct nlattr *actions, size_t actions_len)
 {
@@ -1130,7 +1135,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation,
     if (error) {
         ds_put_format(&ds, "(%s) ", strerror(error));
     }
-    format_odp_flow_key(&ds, flow);
+    odp_flow_key_format(key, key_len, &ds);
     if (stats) {
         ds_put_cstr(&ds, ", ");
         format_odp_flow_stats(&ds, stats);
@@ -1148,10 +1153,11 @@ log_flow_operation(const struct dpif *dpif, const char *operation, int error,
                    struct odp_flow *flow)
 {
     if (error) {
+        flow->key_len = 0;
         flow->actions_len = 0;
     }
-    log_flow_message(dpif, error, operation, &flow->key,
-                     !error ? &flow->stats : NULL,
+    log_flow_message(dpif, error, operation,
+                     flow->key, flow->key_len, !error ? &flow->stats : NULL,
                      flow->actions, flow->actions_len);
 }
 
@@ -1175,7 +1181,8 @@ log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
     if (put->flags & ~ODPPF_ALL) {
         ds_put_format(&s, "[%x]", put->flags & ~ODPPF_ALL);
     }
-    log_flow_message(dpif, error, ds_cstr(&s), &put->flow.key,
+    log_flow_message(dpif, error, ds_cstr(&s),
+                     put->flow.key, put->flow.key_len,
                      !error ? &put->flow.stats : NULL,
                      put->flow.actions, put->flow.actions_len);
     ds_destroy(&s);
@@ -1198,9 +1205,18 @@ log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
  *        "actions" or "actions_len" was not initialized.
  */
 static void
-check_rw_odp_flow(struct odp_flow *flow)
+check_rw_flow_actions(struct odp_flow *flow)
 {
     if (flow->actions_len) {
         memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]);
     }
 }
+
+/* Same as check_rw_flow_actions() but for flow->key. */
+static void
+check_rw_flow_key(struct odp_flow *flow)
+{
+    if (flow->key_len) {
+        memset(&flow->key[0], 0xcc, sizeof flow->key[0]);
+    }
+}
diff --git a/lib/flow.c b/lib/flow.c
index e7ed2a9..8e75d52 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -101,12 +101,12 @@ parse_ethertype(struct ofpbuf *b)
     ovs_be16 proto;
 
     proto = *(ovs_be16 *) ofpbuf_pull(b, sizeof proto);
-    if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) {
+    if (ntohs(proto) >= ETH_TYPE_MIN) {
         return proto;
     }
 
     if (b->size < sizeof *llc) {
-        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+        return htons(FLOW_DL_TYPE_NONE);
     }
 
     llc = b->data;
@@ -115,7 +115,7 @@ parse_ethertype(struct ofpbuf *b)
         || llc->llc.llc_cntl != LLC_CNTL_SNAP
         || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
                   sizeof llc->snap.snap_org)) {
-        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+        return htons(FLOW_DL_TYPE_NONE);
     }
 
     ofpbuf_pull(b, sizeof *llc);
diff --git a/lib/flow.h b/lib/flow.h
index ee460e3..0d4a5f7 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -35,6 +35,10 @@ struct ofpbuf;
 #define FLOW_N_REGS 4
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
 
+/* Used for struct flow's dl_type member for frames that have no Ethernet
+ * type, that is, pure 802.2 frames. */
+#define FLOW_DL_TYPE_NONE 0x5ff
+
 struct flow {
     ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
     uint32_t regs[FLOW_N_REGS]; /* Registers. */
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 47cce83..30cc87c 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -209,7 +209,7 @@ parse_nxm_entry(struct cls_rule *rule, const struct nxm_field *f,
         memcpy(flow->dl_src, value, ETH_ADDR_LEN);
         return 0;
     case NFI_NXM_OF_ETH_TYPE:
-        flow->dl_type = get_unaligned_be16(value);
+        flow->dl_type = ofputil_dl_type_from_openflow(get_unaligned_be16(value));
         return 0;
 
         /* 802.1Q header. */
@@ -596,7 +596,8 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
         nxm_put_eth(b, NXM_OF_ETH_SRC, flow->dl_src);
     }
     if (!(wc & FWW_DL_TYPE)) {
-        nxm_put_16(b, NXM_OF_ETH_TYPE, flow->dl_type);
+        nxm_put_16(b, NXM_OF_ETH_TYPE,
+                   ofputil_dl_type_to_openflow(flow->dl_type));
     }
 
     /* 802.1Q. */
@@ -1068,7 +1069,7 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow)
         return eth_addr_to_uint64(flow->dl_src);
 
     case NFI_NXM_OF_ETH_TYPE:
-        return ntohs(flow->dl_type);
+        return ntohs(ofputil_dl_type_to_openflow(flow->dl_type));
 
     case NFI_NXM_OF_VLAN_TCI:
         return ntohs(flow->vlan_tci);
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 09f8b83..2da79c4 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -16,6 +16,7 @@
 
 #include <config.h>
 #include "odp-util.h"
+#include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,26 +29,6 @@
 #include "timeval.h"
 #include "util.h"
 
-void
-format_odp_flow_key(struct ds *ds, const struct odp_flow_key *key)
-{
-    ds_put_format(ds, "tun_id%#"PRIx64" in_port%d tci(",
-                  ntohll(key->tun_id), key->in_port);
-    if (key->dl_tci) {
-        ds_put_format(ds, "vlan%"PRIu16",pcp%d",
-                      vlan_tci_to_vid(key->dl_tci),
-                      vlan_tci_to_pcp(key->dl_tci));
-    } else {
-        ds_put_char(ds, '0');
-    }
-    ds_put_format(ds, ") mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" type%04x "
-                  "proto%"PRId8" tos%"PRIu8" ip"IP_FMT"->"IP_FMT" port%d->%d",
-                  ETH_ADDR_ARGS(key->dl_src), ETH_ADDR_ARGS(key->dl_dst),
-                  ntohs(key->dl_type), key->nw_proto, key->nw_tos,
-                  IP_ARGS(&key->nw_src), IP_ARGS(&key->nw_dst),
-                  ntohs(key->tp_src), ntohs(key->tp_dst));
-}
-
 int
 odp_action_len(uint16_t type)
 {
@@ -186,7 +167,7 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions,
             format_odp_action(ds, a);
         }
         if (left) {
-            ds_put_format(ds, " ***%u leftover bytes***", left);
+            ds_put_format(ds, ",***%u leftover bytes***", left);
         }
     } else {
         ds_put_cstr(ds, "drop");
@@ -210,44 +191,419 @@ format_odp_flow_stats(struct ds *ds, const struct odp_flow_stats *s)
 void
 format_odp_flow(struct ds *ds, const struct odp_flow *f)
 {
-    format_odp_flow_key(ds, &f->key);
+    odp_flow_key_format(f->key, f->key_len, ds);
     ds_put_cstr(ds, ", ");
     format_odp_flow_stats(ds, &f->stats);
     ds_put_cstr(ds, ", actions:");
     format_odp_actions(ds, f->actions, f->actions_len);
 }
 
+/* Returns the correct length of the payload for a flow key attribute of the
+ * specified 'type', or -1 if 'type' is unknown. */
+static int
+odp_flow_key_attr_len(uint16_t type)
+{
+    if (type > ODPKT_MAX) {
+        return -1;
+    }
+
+    switch ((enum odp_key_type) type) {
+    case ODPKT_TUN_ID: return 8;
+    case ODPKT_IN_PORT: return 4;
+    case ODPKT_ETHERNET: return sizeof(struct odp_key_ethernet);
+    case ODPKT_8021Q: return sizeof(struct odp_key_8021q);
+    case ODPKT_ETHERTYPE: return 2;
+    case ODPKT_IPV4: return sizeof(struct odp_key_ipv4);
+    case ODPKT_TCP: return sizeof(struct odp_key_tcp);
+    case ODPKT_UDP: return sizeof(struct odp_key_udp);
+    case ODPKT_ICMP: return sizeof(struct odp_key_icmp);
+    case ODPKT_ARP: return sizeof(struct odp_key_arp);
+
+    case ODPKT_UNSPEC:
+    case __ODPKT_MAX:
+        return -1;
+    }
+
+    return -1;
+}
+
+
+static void
+format_generic_odp_key(const struct nlattr *a, struct ds *ds)
+{
+    size_t len = nl_attr_get_size(a);
+
+    ds_put_format(ds, "key%"PRId16, nl_attr_type(a));
+    if (len) {
+        const uint8_t *unspec;
+        unsigned int i;
+
+        unspec = nl_attr_get(a);
+        for (i = 0; i < len; i++) {
+            ds_put_char(ds, i ? ' ': '(');
+            ds_put_format(ds, "%02x", unspec[i]);
+        }
+        ds_put_char(ds, ')');
+    }
+}
+
+static void
+format_odp_key_attr(const struct nlattr *a, struct ds *ds)
+{
+    const struct odp_key_ethernet *eth_key;
+    const struct odp_key_8021q *q_key;
+    const struct odp_key_ipv4 *ipv4_key;
+    const struct odp_key_tcp *tcp_key;
+    const struct odp_key_udp *udp_key;
+    const struct odp_key_icmp *icmp_key;
+    const struct odp_key_arp *arp_key;
+
+    if (nl_attr_get_size(a) != odp_flow_key_attr_len(nl_attr_type(a))) {
+        ds_put_format(ds, "bad length %zu, expected %d for: ",
+                      nl_attr_get_size(a),
+                      odp_flow_key_attr_len(nl_attr_type(a)));
+        format_generic_odp_key(a, ds);
+        return;
+    }
+
+    switch (nl_attr_type(a)) {
+    case ODPKT_TUN_ID:
+        ds_put_format(ds, "tun_id(%#"PRIx64")", nl_attr_get_be64(a));
+        break;
+
+    case ODPKT_IN_PORT:
+        ds_put_format(ds, "in_port(%"PRIu32")", nl_attr_get_u32(a));
+        break;
+
+    case ODPKT_ETHERNET:
+        eth_key = nl_attr_get(a);
+        ds_put_format(ds, "eth(src="ETH_ADDR_FMT",dst="ETH_ADDR_FMT")",
+                      ETH_ADDR_ARGS(eth_key->eth_src),
+                      ETH_ADDR_ARGS(eth_key->eth_dst));
+        break;
+
+    case ODPKT_8021Q:
+        q_key = nl_attr_get(a);
+        ds_put_cstr(ds, "vlan(");
+        if (q_key->q_tpid != htons(ETH_TYPE_VLAN)) {
+            ds_put_format(ds, "tpid=%#"PRIx16",", ntohs(q_key->q_tpid));
+        }
+        ds_put_format(ds, "vid%"PRIu16",pcp%d)",
+                      vlan_tci_to_vid(q_key->q_tci),
+                      vlan_tci_to_pcp(q_key->q_tci));
+        break;
+
+    case ODPKT_ETHERTYPE:
+        ds_put_format(ds, "eth_type(%#04"PRIx16")",
+                      ntohs(nl_attr_get_be16(a)));
+        break;
+
+    case ODPKT_IPV4:
+        ipv4_key = nl_attr_get(a);
+        ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT","
+                      "proto=%"PRId8",tos=%"PRIu8")",
+                      IP_ARGS(&ipv4_key->ipv4_src),
+                      IP_ARGS(&ipv4_key->ipv4_dst),
+                      ipv4_key->ipv4_proto, ipv4_key->ipv4_tos);
+        break;
+
+    case ODPKT_TCP:
+        tcp_key = nl_attr_get(a);
+        ds_put_format(ds, "tcp(src=%"PRIu16",dst=%"PRIu16")",
+                      ntohs(tcp_key->tcp_src), ntohs(tcp_key->tcp_dst));
+        break;
+
+    case ODPKT_UDP:
+        udp_key = nl_attr_get(a);
+        ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16")",
+                      ntohs(udp_key->udp_src), ntohs(udp_key->udp_dst));
+        break;
+
+    case ODPKT_ICMP:
+        icmp_key = nl_attr_get(a);
+        ds_put_format(ds, "icmp(type=%"PRIu8",code=%"PRIu8")",
+                      icmp_key->icmp_type, icmp_key->icmp_code);
+        break;
+
+    case ODPKT_ARP:
+        arp_key = nl_attr_get(a);
+        ds_put_format(ds, "arp(sip="IP_FMT",tip="IP_FMT",op=%"PRIu16")",
+                      IP_ARGS(&arp_key->arp_sip), IP_ARGS(&arp_key->arp_tip),
+                      ntohs(arp_key->arp_op));
+        break;
+
+    default:
+        format_generic_odp_key(a, ds);
+        break;
+    }
+}
+
+/* Appends to 'ds' a string representation of the 'key_len' bytes of ODPKT_*
+ * attributes in 'key'. */
 void
-odp_flow_key_from_flow(struct odp_flow_key *key, const struct flow *flow)
+odp_flow_key_format(const struct nlattr *key, size_t key_len, struct ds *ds)
 {
-    key->tun_id = flow->tun_id;
-    key->nw_src = flow->nw_src;
-    key->nw_dst = flow->nw_dst;
-    key->in_port = flow->in_port;
-    key->dl_tci = flow->vlan_tci;
-    key->dl_type = flow->dl_type;
-    key->tp_src = flow->tp_src;
-    key->tp_dst = flow->tp_dst;
-    memcpy(key->dl_src, flow->dl_src, ETH_ADDR_LEN);
-    memcpy(key->dl_dst, flow->dl_dst, ETH_ADDR_LEN);
-    key->nw_proto = flow->nw_proto;
-    key->nw_tos = flow->nw_tos;
+    if (key_len) {
+        const struct nlattr *a;
+        unsigned int left;
+
+        NL_ATTR_FOR_EACH (a, left, key, key_len) {
+            if (a != key) {
+                ds_put_char(ds, ',');
+            }
+            format_odp_key_attr(a, ds);
+        }
+        if (left) {
+            ds_put_format(ds, ",***%u leftover bytes***", left);
+        }
+    } else {
+        ds_put_cstr(ds, "<empty>");
+    }
 }
 
+/* Appends a representation of 'flow' as ODPKT_* attributes to 'buf'. */
 void
-odp_flow_key_to_flow(const struct odp_flow_key *key, struct flow *flow)
+odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
+{
+    struct odp_key_ethernet *eth_key;
+
+    if (flow->tun_id != htonll(0)) {
+        nl_msg_put_be64(buf, ODPKT_TUN_ID, flow->tun_id);
+    }
+
+    nl_msg_put_u32(buf, ODPKT_IN_PORT, flow->in_port);
+
+    eth_key = nl_msg_put_unspec_uninit(buf, ODPKT_ETHERNET, sizeof *eth_key);
+    memcpy(eth_key->eth_src, flow->dl_src, ETH_ADDR_LEN);
+    memcpy(eth_key->eth_dst, flow->dl_dst, ETH_ADDR_LEN);
+
+    if (flow->vlan_tci != htons(0)) {
+        struct odp_key_8021q *q_key;
+
+        q_key = nl_msg_put_unspec_uninit(buf, ODPKT_8021Q, sizeof *q_key);
+        q_key->q_tpid = htons(ETH_TYPE_VLAN);
+        q_key->q_tci = flow->vlan_tci & ~htons(VLAN_CFI);
+    }
+
+    if (ntohs(flow->dl_type) < ETH_TYPE_MIN) {
+        return;
+    }
+
+    nl_msg_put_be16(buf, ODPKT_ETHERTYPE, flow->dl_type);
+
+    if (flow->dl_type == htons(ETH_TYPE_IP)) {
+        struct odp_key_ipv4 *ipv4_key;
+
+        ipv4_key = nl_msg_put_unspec_uninit(buf, ODPKT_IPV4, sizeof *ipv4_key);
+        ipv4_key->ipv4_src = flow->nw_src;
+        ipv4_key->ipv4_dst = flow->nw_dst;
+        ipv4_key->ipv4_proto = flow->nw_proto;
+        ipv4_key->ipv4_tos = flow->nw_tos;
+
+        if (flow->nw_proto == IP_TYPE_TCP) {
+            struct odp_key_tcp *tcp_key;
+
+            tcp_key = nl_msg_put_unspec_uninit(buf, ODPKT_TCP,
+                                               sizeof *tcp_key);
+            tcp_key->tcp_src = flow->tp_src;
+            tcp_key->tcp_dst = flow->tp_dst;
+        } else if (flow->nw_proto == IP_TYPE_UDP) {
+            struct odp_key_udp *udp_key;
+
+            udp_key = nl_msg_put_unspec_uninit(buf, ODPKT_UDP,
+                                               sizeof *udp_key);
+            udp_key->udp_src = flow->tp_src;
+            udp_key->udp_dst = flow->tp_dst;
+        } else if (flow->nw_proto == IP_TYPE_ICMP) {
+            struct odp_key_icmp *icmp_key;
+
+            icmp_key = nl_msg_put_unspec_uninit(buf, ODPKT_ICMP,
+                                                sizeof *icmp_key);
+            icmp_key->icmp_type = ntohs(flow->tp_src);
+            icmp_key->icmp_code = ntohs(flow->tp_dst);
+        }
+    } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
+        struct odp_key_arp *arp_key;
+
+        arp_key = nl_msg_put_unspec_uninit(buf, ODPKT_ARP, sizeof *arp_key);
+        arp_key->arp_sip = flow->nw_src;
+        arp_key->arp_tip = flow->nw_dst;
+        arp_key->arp_op = htons(flow->nw_proto);
+    }
+}
+
+/* Converts the 'key_len' bytes of ODPKT_* attributes in 'key' to a flow
+ * structure in 'flow'.  Returns 0 if successful, otherwise EINVAL. */
+int
+odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
+                     struct flow *flow)
 {
-    memset(flow->regs, 0, sizeof flow->regs);
-    flow->tun_id = key->tun_id;
-    flow->nw_src = key->nw_src;
-    flow->nw_dst = key->nw_dst;
-    flow->in_port = key->in_port;
-    flow->vlan_tci = key->dl_tci;
-    flow->dl_type = key->dl_type;
-    flow->tp_src = key->tp_src;
-    flow->tp_dst = key->tp_dst;
-    memcpy(flow->dl_src, key->dl_src, ETH_ADDR_LEN);
-    memcpy(flow->dl_dst, key->dl_dst, ETH_ADDR_LEN);
-    flow->nw_proto = key->nw_proto;
-    flow->nw_tos = key->nw_tos;
+    const struct nlattr *nla;
+    enum odp_key_type prev_type;
+    size_t left;
+
+    memset(flow, 0, sizeof *flow);
+    flow->dl_type = htons(FLOW_DL_TYPE_NONE);
+
+    prev_type = ODPKT_UNSPEC;
+    NL_ATTR_FOR_EACH (nla, left, key, key_len) {
+        const struct odp_key_ethernet *eth_key;
+        const struct odp_key_8021q *q_key;
+        const struct odp_key_ipv4 *ipv4_key;
+        const struct odp_key_tcp *tcp_key;
+        const struct odp_key_udp *udp_key;
+        const struct odp_key_icmp *icmp_key;
+        const struct odp_key_arp *arp_key;
+
+        uint16_t type = nl_attr_type(nla);
+        int len = odp_flow_key_attr_len(type);
+
+        if (nl_attr_get_size(nla) != len && len != -1) {
+            return EINVAL;
+        }
+
+#define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
+        switch (TRANSITION(prev_type, type)) {
+        case TRANSITION(ODPKT_UNSPEC, ODPKT_TUN_ID):
+            flow->tun_id = nl_attr_get_be64(nla);
+            break;
+
+        case TRANSITION(ODPKT_UNSPEC, ODPKT_IN_PORT):
+        case TRANSITION(ODPKT_TUN_ID, ODPKT_IN_PORT):
+            if (nl_attr_get_u32(nla) >= UINT16_MAX) {
+                return EINVAL;
+            }
+            flow->in_port = nl_attr_get_u32(nla);
+            break;
+
+        case TRANSITION(ODPKT_IN_PORT, ODPKT_ETHERNET):
+            eth_key = nl_attr_get(nla);
+            memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN);
+            memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN);
+            break;
+
+        case TRANSITION(ODPKT_ETHERNET, ODPKT_8021Q):
+            q_key = nl_attr_get(nla);
+            if (q_key->q_tpid != htons(ETH_TYPE_VLAN)) {
+                /* Only standard 0x8100 VLANs currently supported. */
+                return EINVAL;
+            }
+            if (q_key->q_tci & htons(VLAN_CFI)) {
+                return EINVAL;
+            }
+            flow->vlan_tci = q_key->q_tci | htons(VLAN_CFI);
+            break;
+
+        case TRANSITION(ODPKT_8021Q, ODPKT_ETHERTYPE):
+        case TRANSITION(ODPKT_ETHERNET, ODPKT_ETHERTYPE):
+            flow->dl_type = nl_attr_get_be16(nla);
+            if (ntohs(flow->dl_type) < 1536) {
+                return EINVAL;
+            }
+            break;
+
+        case TRANSITION(ODPKT_ETHERTYPE, ODPKT_IPV4):
+            if (flow->dl_type != htons(ETH_TYPE_IP)) {
+                return EINVAL;
+            }
+            ipv4_key = nl_attr_get(nla);
+            flow->nw_src = ipv4_key->ipv4_src;
+            flow->nw_dst = ipv4_key->ipv4_dst;
+            flow->nw_proto = ipv4_key->ipv4_proto;
+            flow->nw_tos = ipv4_key->ipv4_tos;
+            if (flow->nw_tos & IP_ECN_MASK) {
+                return EINVAL;
+            }
+            break;
+
+        case TRANSITION(ODPKT_IPV4, ODPKT_TCP):
+            if (flow->nw_proto != IP_TYPE_TCP) {
+                return EINVAL;
+            }
+            tcp_key = nl_attr_get(nla);
+            flow->tp_src = tcp_key->tcp_src;
+            flow->tp_dst = tcp_key->tcp_dst;
+            break;
+
+        case TRANSITION(ODPKT_IPV4, ODPKT_UDP):
+            if (flow->nw_proto != IP_TYPE_UDP) {
+                return EINVAL;
+            }
+            udp_key = nl_attr_get(nla);
+            flow->tp_src = udp_key->udp_src;
+            flow->tp_dst = udp_key->udp_dst;
+            break;
+
+        case TRANSITION(ODPKT_IPV4, ODPKT_ICMP):
+            if (flow->nw_proto != IP_TYPE_ICMP) {
+                return EINVAL;
+            }
+            icmp_key = nl_attr_get(nla);
+            flow->tp_src = htons(icmp_key->icmp_type);
+            flow->tp_dst = htons(icmp_key->icmp_code);
+            break;
+
+        case TRANSITION(ODPKT_ETHERTYPE, ODPKT_ARP):
+            if (flow->dl_type != htons(ETH_TYPE_ARP)) {
+                return EINVAL;
+            }
+            arp_key = nl_attr_get(nla);
+            flow->nw_src = arp_key->arp_sip;
+            flow->nw_dst = arp_key->arp_tip;
+            if (arp_key->arp_op & htons(0xff00)) {
+                return EINVAL;
+            }
+            flow->nw_proto = ntohs(arp_key->arp_op);
+            break;
+
+        default:
+            if (type == ODPKT_UNSPEC || prev_type == ODPKT_UNSPEC) {
+                return EINVAL;
+            }
+            return EINVAL;
+        }
+
+        prev_type = type;
+    }
+    if (left) {
+        return EINVAL;
+    }
+
+    switch (prev_type) {
+    case ODPKT_UNSPEC:
+        return EINVAL;
+
+    case ODPKT_TUN_ID:
+    case ODPKT_IN_PORT:
+        return EINVAL;
+
+    case ODPKT_ETHERNET:
+    case ODPKT_8021Q:
+        return 0;
+
+    case ODPKT_ETHERTYPE:
+        if (flow->dl_type == htons(ETH_TYPE_IP)
+            || flow->dl_type == htons(ETH_TYPE_ARP)) {
+            return EINVAL;
+        }
+        return 0;
+
+    case ODPKT_IPV4:
+        if (flow->nw_proto == htons(IP_TYPE_TCP)
+            || flow->nw_proto == htons(IP_TYPE_UDP)
+            || flow->nw_proto == htons(IP_TYPE_ICMP)) {
+            return EINVAL;
+        }
+        return 0;
+
+    case ODPKT_TCP:
+    case ODPKT_UDP:
+    case ODPKT_ICMP:
+    case ODPKT_ARP:
+        return 0;
+
+    case __ODPKT_MAX:
+    default:
+        NOT_REACHED();
+    }
 }
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 6051c52..9486661 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -28,6 +28,7 @@
 
 struct ds;
 struct flow;
+struct ofpbuf;
 
 static inline uint16_t
 ofp_port_to_odp_port(uint16_t ofp_port)
@@ -55,7 +56,6 @@ odp_port_to_ofp_port(uint16_t odp_port)
     }
 }
 
-void format_odp_flow_key(struct ds *, const struct odp_flow_key *);
 int odp_action_len(uint16_t type);
 void format_odp_action(struct ds *, const struct nlattr *);
 void format_odp_actions(struct ds *, const struct nlattr *odp_actions,
@@ -63,21 +63,19 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions,
 void format_odp_flow_stats(struct ds *, const struct odp_flow_stats *);
 void format_odp_flow(struct ds *, const struct odp_flow *);
 
-void odp_flow_key_from_flow(struct odp_flow_key *, const struct flow *);
-void odp_flow_key_to_flow(const struct odp_flow_key *, struct flow *);
+/* By my calculations currently the longest valid nlattr-formatted flow key is
+ * 80 bytes long, so this leaves some safety margin.
+ *
+ * We allocate temporary on-stack buffers for flow keys as arrays of uint32_t
+ * to ensure proper 32-bit alignment for Netlink attributes.  (An array of
+ * "struct nlattr" might not, in theory, be sufficiently aligned because it
+ * only contains 16-bit types.) */
+#define ODPUTIL_FLOW_KEY_BYTES 96
+#define ODPUTIL_FLOW_KEY_U32S DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)
 
-static inline bool
-odp_flow_key_equal(const struct odp_flow_key *a, const struct odp_flow_key *b)
-{
-    return !memcmp(a, b, sizeof *a);
-}
+void odp_flow_key_format(const struct nlattr *, size_t, struct ds *);
 
-static inline size_t
-odp_flow_key_hash(const struct odp_flow_key *flow, uint32_t basis)
-{
-    BUILD_ASSERT_DECL(!(sizeof *flow % sizeof(uint32_t)));
-    return hash_words((const uint32_t *) flow,
-                      sizeof *flow / sizeof(uint32_t), basis);
-}
+void odp_flow_key_from_flow(struct ofpbuf *, const struct flow *);
+int odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *);
 
 #endif /* odp-util.h */
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 9210413..20dc746 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -146,7 +146,7 @@ ofputil_cls_rule_from_match(const struct ofp_match *match,
     rule->flow.nw_dst = match->nw_dst;
     rule->flow.in_port = (match->in_port == htons(OFPP_LOCAL) ? ODPP_LOCAL
                      : ntohs(match->in_port));
-    rule->flow.dl_type = match->dl_type;
+    rule->flow.dl_type = ofputil_dl_type_from_openflow(match->dl_type);
     rule->flow.tp_src = match->tp_src;
     rule->flow.tp_dst = match->tp_dst;
     memcpy(rule->flow.dl_src, match->dl_src, ETH_ADDR_LEN);
@@ -270,7 +270,7 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule,
                            : rule->flow.in_port);
     memcpy(match->dl_src, rule->flow.dl_src, ETH_ADDR_LEN);
     memcpy(match->dl_dst, rule->flow.dl_dst, ETH_ADDR_LEN);
-    match->dl_type = rule->flow.dl_type;
+    match->dl_type = ofputil_dl_type_to_openflow(rule->flow.dl_type);
     match->nw_src = rule->flow.nw_src;
     match->nw_dst = rule->flow.nw_dst;
     match->nw_tos = rule->flow.nw_tos;
@@ -281,6 +281,27 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule,
     memset(match->pad2, '\0', sizeof match->pad2);
 }
 
+/* Given a 'dl_type' value in the format used in struct flow, returns the
+ * corresponding 'dl_type' value for use in an OpenFlow ofp_match structure. */
+ovs_be16
+ofputil_dl_type_to_openflow(ovs_be16 flow_dl_type)
+{
+    return (flow_dl_type == htons(FLOW_DL_TYPE_NONE)
+            ? htons(OFP_DL_TYPE_NOT_ETH_TYPE)
+            : flow_dl_type);
+}
+
+/* Given a 'dl_type' value in the format used in an OpenFlow ofp_match
+ * structure, returns the corresponding 'dl_type' value for use in struct
+ * flow. */
+ovs_be16
+ofputil_dl_type_from_openflow(ovs_be16 ofp_dl_type)
+{
+    return (ofp_dl_type == htons(OFP_DL_TYPE_NOT_ETH_TYPE)
+            ? htons(FLOW_DL_TYPE_NONE)
+            : ofp_dl_type);
+}
+
 /* Returns a transaction ID to use for an outgoing OpenFlow message. */
 static ovs_be32
 alloc_xid(void)
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index 4231865..a3647ec 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -110,6 +110,10 @@ void ofputil_cls_rule_to_match(const struct cls_rule *, enum nx_flow_format,
 void normalize_match(struct ofp_match *);
 char *ofp_match_to_literal_string(const struct ofp_match *match);
 
+/* dl_type translation between OpenFlow and 'struct flow' format. */
+ovs_be16 ofputil_dl_type_to_openflow(ovs_be16 flow_dl_type);
+ovs_be16 ofputil_dl_type_from_openflow(ovs_be16 ofp_dl_type);
+
 /* Flow formats. */
 bool ofputil_flow_format_is_valid(enum nx_flow_format);
 const char *ofputil_flow_format_to_string(enum nx_flow_format);
diff --git a/lib/packets.h b/lib/packets.h
index 39e88f1..96e23e1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -155,6 +155,10 @@ void compose_benign_packet(struct ofpbuf *, const char *tag,
 #define ETH_TYPE_VLAN          0x8100
 #define ETH_TYPE_CFM           0x8902
 
+/* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
+ * lengths. */
+#define ETH_TYPE_MIN           0x600
+
 #define ETH_HEADER_LEN 14
 #define ETH_PAYLOAD_MIN 46
 #define ETH_PAYLOAD_MAX 1500
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 5829702..f9b86f7 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2269,8 +2269,16 @@ static int
 facet_put__(struct ofproto *ofproto, struct facet *facet, int flags,
             struct odp_flow_put *put)
 {
+    uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
+    struct ofpbuf key;
+
+    ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &facet->flow);
+    assert(key.base == keybuf);
+
     memset(&put->flow.stats, 0, sizeof put->flow.stats);
-    odp_flow_key_from_flow(&put->flow.key, &facet->flow);
+    put->flow.key = key.data;
+    put->flow.key_len = key.size;
     put->flow.actions = facet->actions;
     put->flow.actions_len = facet->actions_len;
     put->flow.flags = 0;
@@ -2321,9 +2329,16 @@ static void
 facet_uninstall(struct ofproto *p, struct facet *facet)
 {
     if (facet->installed) {
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct odp_flow odp_flow;
+        struct ofpbuf key;
 
-        odp_flow_key_from_flow(&odp_flow.key, &facet->flow);
+        ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+        odp_flow_key_from_flow(&key, &facet->flow);
+        assert(key.base == keybuf);
+
+        odp_flow.key = key.data;
+        odp_flow.key_len = key.size;
         odp_flow.actions = NULL;
         odp_flow.actions_len = 0;
         odp_flow.flags = 0;
@@ -2459,10 +2474,16 @@ facet_revalidate(struct ofproto *ofproto, struct facet *facet)
      * to talk to the datapath. */
     if (actions_changed || facet->may_install != facet->installed) {
         if (facet->may_install) {
+            uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
             struct odp_flow_put put;
+            struct ofpbuf key;
+
+            ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+            odp_flow_key_from_flow(&key, &facet->flow);
 
             memset(&put.flow.stats, 0, sizeof put.flow.stats);
-            odp_flow_key_from_flow(&put.flow.key, &facet->flow);
+            put.flow.key = key.data;
+            put.flow.key_len = key.size;
             put.flow.actions = odp_actions->data;
             put.flow.actions_len = odp_actions->size;
             put.flow.flags = 0;
@@ -3397,42 +3418,44 @@ static void
 query_stats(struct ofproto *p, struct rule *rule,
             uint64_t *packet_countp, uint64_t *byte_countp)
 {
+    uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
     uint64_t packet_count, byte_count;
     struct facet *facet;
-    struct odp_flow *odp_flows;
-    size_t n_odp_flows;
+    struct ofpbuf key;
 
     /* Start from historical data for 'rule' itself that are no longer tracked
      * by the datapath.  This counts, for example, facets that have expired. */
     packet_count = rule->packet_count;
     byte_count = rule->byte_count;
 
-    /* Prepare to ask the datapath for statistics on all of the rule's facets.
+    /* Ask the datapath for statistics on all of the rule's facets.  (We could
+     * batch up statistics requests using dpif_flow_get_multiple(), but that is
+     * not yet implemented.)
      *
      * Also, add any statistics that are not tracked by the datapath for each
      * facet.  This includes, for example, statistics for packets that were
      * executed "by hand" by ofproto via dpif_execute() but must be accounted
      * to a rule. */
-    odp_flows = xzalloc(list_size(&rule->facets) * sizeof *odp_flows);
-    n_odp_flows = 0;
+    ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
     LIST_FOR_EACH (facet, list_node, &rule->facets) {
-        struct odp_flow *odp_flow = &odp_flows[n_odp_flows++];
-        odp_flow_key_from_flow(&odp_flow->key, &facet->flow);
-        packet_count += facet->packet_count;
-        byte_count += facet->byte_count;
-    }
+        struct odp_flow odp_flow;
 
-    /* Fetch up-to-date statistics from the datapath and add them in. */
-    if (!dpif_flow_get_multiple(p->dpif, odp_flows, n_odp_flows)) {
-        size_t i;
+        ofpbuf_clear(&key);
+        odp_flow_key_from_flow(&key, &facet->flow);
 
-        for (i = 0; i < n_odp_flows; i++) {
-            struct odp_flow *odp_flow = &odp_flows[i];
-            packet_count += odp_flow->stats.n_packets;
-            byte_count += odp_flow->stats.n_bytes;
+        odp_flow.key = key.data;
+        odp_flow.key_len = key.size;
+        odp_flow.actions = NULL;
+        odp_flow.actions_len = 0;
+        odp_flow.flags = 0;
+        if (!dpif_flow_get(p->dpif, &odp_flow)) {
+            packet_count += odp_flow.stats.n_packets;
+            byte_count += odp_flow.stats.n_bytes;
         }
+
+        packet_count += facet->packet_count;
+        byte_count += facet->byte_count;
     }
-    free(odp_flows);
 
     /* Return the stats to the caller. */
     *packet_countp = packet_count;
@@ -4471,16 +4494,36 @@ static void
 ofproto_update_used(struct ofproto *p)
 {
     struct dpif_flow_dump dump;
-    struct odp_flow f;
-
-    memset(&f, 0, sizeof f);
 
     dpif_flow_dump_start(&dump, p->dpif);
-    while (dpif_flow_dump_next(&dump, &f)) {
+    for (;;) {
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct facet *facet;
+        struct odp_flow f;
         struct flow flow;
 
-        odp_flow_key_to_flow(&f.key, &flow);
+        memset(&f, 0, sizeof f);
+        f.key = (struct nlattr *) keybuf;
+        f.key_len = sizeof keybuf;
+        if (!dpif_flow_dump_next(&dump, &f)) {
+            break;
+        }
+
+        if (f.key_len > sizeof keybuf) {
+            VLOG_WARN_RL(&rl, "ODP flow key overflowed buffer");
+            continue;
+        }
+        if (odp_flow_key_to_flow(f.key, f.key_len, &flow)) {
+            struct ds s;
+
+            ds_init(&s);
+            odp_flow_key_format(f.key, f.key_len, &s);
+            VLOG_WARN_RL(&rl, "failed to convert ODP flow key to flow: %s",
+                         ds_cstr(&s));
+            ds_destroy(&s);
+
+            continue;
+        }
         facet = facet_find(p, &flow);
 
         if (facet && facet->installed) {
@@ -4601,7 +4644,14 @@ facet_active_timeout(struct ofproto *ofproto, struct facet *facet)
          * ofproto_update_used() zeroed TCP flags. */
         memset(&odp_flow, 0, sizeof odp_flow);
         if (facet->installed) {
-            odp_flow_key_from_flow(&odp_flow.key, &facet->flow);
+            uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
+            struct ofpbuf key;
+
+            ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+            odp_flow_key_from_flow(&key, &facet->flow);
+
+            odp_flow.key = key.data;
+            odp_flow.key_len = key.size;
             odp_flow.flags = ODPFF_ZERO_TCP_FLAGS;
             dpif_flow_get(ofproto->dpif, &odp_flow);
 
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 898d7e9..dcf9e3b 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -470,12 +470,15 @@ do_dump_flows(int argc OVS_UNUSED, char *argv[])
     dpif_flow_dump_start(&dump, dpif);
     for (;;) {
         enum { MAX_ACTIONS = 4096 }; /* An arbitrary but large number. */
-        struct nlattr actions[MAX_ACTIONS];
+        uint32_t actions[MAX_ACTIONS * sizeof(struct nlattr) / 4];
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct odp_flow f;
 
         memset(&f, 0, sizeof f);
-        f.actions = actions;
+        f.actions = (struct nlattr *) actions;
         f.actions_len = sizeof actions;
+        f.key = (struct nlattr *) keybuf;
+        f.key_len = sizeof keybuf;
 
         if (!dpif_flow_dump_next(&dump, &f)) {
             break;
-- 
1.7.1





More information about the dev mailing list