[ovs-dev] [PATCH 6/7] datapath: Convert odp_flow_key to use Netlink attributes instead.

Ben Pfaff blp at nicira.com
Fri Dec 24 01:15:40 UTC 2010


One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 datapath/actions.c                                 |   20 +-
 datapath/actions.h                                 |    4 +-
 datapath/datapath.c                                |   77 +++-
 datapath/flow.c                                    |  268 +++++++++++-
 datapath/flow.h                                    |   29 ++-
 .../linux-2.6/compat-2.6/include/net/netlink.h     |   47 ++
 datapath/odp-compat.h                              |    3 +-
 datapath/tunnel.c                                  |    2 +-
 include/openvswitch/datapath-protocol.h            |   90 +++--
 lib/dpif-netdev.c                                  |   65 +++-
 lib/dpif.c                                         |   37 ++-
 lib/flow.c                                         |    6 +-
 lib/flow.h                                         |    4 +
 lib/odp-util.c                                     |  453 +++++++++++++++++---
 lib/odp-util.h                                     |   28 +-
 lib/packets.h                                      |    4 +
 ofproto/ofproto.c                                  |  104 ++++--
 utilities/ovs-dpctl.c                              |    7 +-
 18 files changed, 1054 insertions(+), 194 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 299c16a..d8dfeb0 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -27,7 +27,7 @@
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *, struct sk_buff *,
-			      const struct odp_flow_key *,
+			      const struct sw_flow_key *,
 			      const struct nlattr *actions, u32 actions_len);
 
 static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
@@ -76,7 +76,7 @@ static struct sk_buff *vlan_pull_tag(struct sk_buff *skb)
 }
 
 static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
-				       const struct odp_flow_key *key,
+				       const struct sw_flow_key *key,
 				       const struct nlattr *a, u32 actions_len)
 {
 	__be16 tci = nla_get_be16(a);
@@ -208,13 +208,13 @@ static struct sk_buff *strip_vlan(struct sk_buff *skb)
 	return skb;
 }
 
-static bool is_ip(struct sk_buff *skb, const struct odp_flow_key *key)
+static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	return (key->dl_type == htons(ETH_P_IP) &&
 		skb->transport_header > skb->network_header);
 }
 
-static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct odp_flow_key *key)
+static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	int transport_len = skb->len - skb_transport_offset(skb);
 	if (key->nw_proto == IPPROTO_TCP) {
@@ -228,7 +228,7 @@ static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct odp_flow_key *
 }
 
 static struct sk_buff *set_nw_addr(struct sk_buff *skb,
-				   const struct odp_flow_key *key,
+				   const struct sw_flow_key *key,
 				   const struct nlattr *a)
 {
 	__be32 new_nwaddr = nla_get_be32(a);
@@ -257,7 +257,7 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb,
 }
 
 static struct sk_buff *set_nw_tos(struct sk_buff *skb,
-				  const struct odp_flow_key *key,
+				  const struct sw_flow_key *key,
 				  u8 nw_tos)
 {
 	if (unlikely(!is_ip(skb, key)))
@@ -280,7 +280,7 @@ static struct sk_buff *set_nw_tos(struct sk_buff *skb,
 }
 
 static struct sk_buff *set_tp_port(struct sk_buff *skb,
-				   const struct odp_flow_key *key,
+				   const struct sw_flow_key *key,
 				   const struct nlattr *a)
 {
 	struct udphdr *th;
@@ -325,7 +325,7 @@ static struct sk_buff *set_tp_port(struct sk_buff *skb,
  * or truncated header fields or one whose inner and outer Ethernet address
  * differ.
  */
-static bool is_spoofed_arp(struct sk_buff *skb, const struct odp_flow_key *key)
+static bool is_spoofed_arp(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	struct arp_eth_header *arp;
 
@@ -371,7 +371,7 @@ static int output_control(struct datapath *dp, struct sk_buff *skb, u32 arg)
 
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			      const struct odp_flow_key *key,
+			      const struct sw_flow_key *key,
 			      const struct nlattr *actions, u32 actions_len)
 {
 	/* Every output action needs a separate clone of 'skb', but the common
@@ -491,7 +491,7 @@ static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
 
 /* Execute a list of actions against 'skb'. */
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct odp_flow_key *key,
+		    const struct sw_flow_key *key,
 		    const struct nlattr *actions, u32 actions_len)
 {
 	if (dp->sflow_probability) {
diff --git a/datapath/actions.h b/datapath/actions.h
index 3067bf7..5ad322f 100644
--- a/datapath/actions.h
+++ b/datapath/actions.h
@@ -14,10 +14,10 @@
 
 struct datapath;
 struct sk_buff;
-struct odp_flow_key;
+struct sw_flow_key;
 
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct odp_flow_key *key,
+		    const struct sw_flow_key *,
 		    const struct nlattr *, u32 actions_len);
 
 #endif /* actions.h */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 7b55698..9096c44 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -41,6 +41,7 @@
 #include <linux/rculist.h>
 #include <linux/dmi.h>
 #include <net/inet_ecn.h>
+#include <net/genetlink.h>
 #include <linux/compat.h>
 
 #include "openvswitch/datapath-protocol.h"
@@ -481,7 +482,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	OVS_CB(skb)->vport = p;
 
 	if (!OVS_CB(skb)->flow) {
-		struct odp_flow_key key;
+		struct sw_flow_key key;
 		struct tbl_node *flow_node;
 		bool is_frag;
 
@@ -814,14 +815,19 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 		       struct odp_flow_stats *stats)
 {
 	struct tbl_node *flow_node;
+	struct sw_flow_key key;
 	struct sw_flow *flow;
 	struct tbl *table;
 	int error;
 	u32 hash;
 
-	hash = flow_hash(&uf->flow.key);
+	error = flow_copy_from_user(&key, uf->flow.key, uf->flow.key_len);
+	if (error)
+		return error;
+
+	hash = flow_hash(&key);
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &uf->flow.key, hash, flow_cmp);
+	flow_node = tbl_lookup(table, &key, hash, flow_cmp);
 	if (!flow_node) {
 		/* No such flow. */
 		struct sw_flow_actions *acts;
@@ -844,7 +850,7 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 			error = PTR_ERR(flow);
 			goto error;
 		}
-		flow->key = uf->flow.key;
+		flow->key = key;
 		clear_stats(flow);
 
 		/* Obtain actions. */
@@ -973,13 +979,18 @@ static int answer_query(struct datapath *dp, struct sw_flow *flow,
 			       &ufp->stats, actions, &ufp->actions_len);
 }
 
-static struct sw_flow *do_del_flow(struct datapath *dp, struct odp_flow_key *key)
+static struct sw_flow *do_del_flow(struct datapath *dp, const struct nlattr __user *key, u32 key_len)
 {
 	struct tbl *table = get_table_protected(dp);
 	struct tbl_node *flow_node;
+	struct sw_flow_key swkey;
 	int error;
 
-	flow_node = tbl_lookup(table, key, flow_hash(key), flow_cmp);
+	error = flow_copy_from_user(&swkey, key, key_len);
+	if (error)
+		return ERR_PTR(error);
+
+	flow_node = tbl_lookup(table, &swkey, flow_hash(&swkey), flow_cmp);
 	if (!flow_node)
 		return ERR_PTR(-ENOENT);
 
@@ -1003,7 +1014,7 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
 	if (copy_from_user(&uf, ufp, sizeof uf))
 		return -EFAULT;
 
-	flow = do_del_flow(dp, &uf.key);
+	flow = do_del_flow(dp, uf.key, uf.key_len);
 	if (IS_ERR(flow))
 		return PTR_ERR(flow);
 
@@ -1019,6 +1030,7 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 
 	for (i = 0; i < flowvec->n_flows; i++) {
 		struct odp_flow __user *ufp = (struct odp_flow __user *)&flowvec->flows[i];
+		struct sw_flow_key key;
 		struct odp_flow uf;
 		struct tbl_node *flow_node;
 		int error;
@@ -1026,7 +1038,11 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 		if (copy_from_user(&uf, ufp, sizeof uf))
 			return -EFAULT;
 
-		flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+		error = flow_copy_from_user(&key, uf.key, uf.key_len);
+		if (error)
+			return error;
+
+		flow_node = tbl_lookup(table, &uf.key, flow_hash(&key), flow_cmp);
 		if (!flow_node)
 			error = put_user(ENOENT, &ufp->stats.error);
 		else
@@ -1078,7 +1094,9 @@ static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state)
 static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *ufdp)
 {
 	struct odp_flow __user *ufp;
+	struct nlattr __user *ukey;
 	struct sw_flow *flow;
+	u32 key_len;
 
 	flow = do_dump_flow(dp, ufdp->state);
 	if (IS_ERR(flow))
@@ -1090,15 +1108,23 @@ static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *ufdp)
 	if (!flow)
 		return put_user(ODPFF_EOF, &ufp->flags);
 
-	if (copy_to_user(&ufp->key, &flow->key, sizeof(struct odp_flow_key)) ||
-	    put_user(0, &ufp->flags))
+	if (put_user(0, &ufp->flags) ||
+	    get_user(ukey, &ufp->key) ||
+	    get_user(key_len, &ufp->key_len))
 		return -EFAULT;
+
+	key_len = flow_copy_to_user(&flow->key, ukey, key_len);
+	if (key_len < 0)
+		return key_len;
+	if (put_user(key_len, &ufp->key_len))
+		return -EFAULT;
+
 	return answer_query(dp, flow, 0, ufp);
 }
 
 static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 {
-	struct odp_flow_key key;
+	struct sw_flow_key key;
 	struct sk_buff *skb;
 	struct sw_flow_actions *actions;
 	struct ethhdr *eth;
@@ -1530,16 +1556,18 @@ static int compat_list_ports(struct datapath *dp, struct compat_odp_portvec __us
 
 static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
 {
-	compat_uptr_t actions;
+	compat_uptr_t key, actions;
 
 	if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
 	    __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
-	    __copy_from_user(&flow->key, &compat->key, sizeof(struct odp_flow_key)) ||
+	    __get_user(key, &compat->key) ||
+	    __get_user(flow->key_len, &compat->key_len) ||
 	    __get_user(actions, &compat->actions) ||
 	    __get_user(flow->actions_len, &compat->actions_len) ||
 	    __get_user(flow->flags, &compat->flags))
 		return -EFAULT;
 
+	flow->key = (struct nlattr __force *)compat_ptr(key);
 	flow->actions = (struct nlattr __force *)compat_ptr(actions);
 	return 0;
 }
@@ -1587,7 +1615,7 @@ static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *u
 	if (compat_get_flow(&uf, ufp))
 		return -EFAULT;
 
-	flow = do_del_flow(dp, &uf.key);
+	flow = do_del_flow(dp, uf.key, uf.key_len);
 	if (IS_ERR(flow))
 		return PTR_ERR(flow);
 
@@ -1607,12 +1635,17 @@ static int compat_query_flows(struct datapath *dp,
 		struct compat_odp_flow __user *ufp = &flows[i];
 		struct odp_flow uf;
 		struct tbl_node *flow_node;
+		struct sw_flow_key key;
 		int error;
 
 		if (compat_get_flow(&uf, ufp))
 			return -EFAULT;
 
-		flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+		error = flow_copy_from_user(&key, uf.key, uf.key_len);
+		if (error)
+			return error;
+
+		flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
 		if (!flow_node)
 			error = put_user(ENOENT, &ufp->stats.error);
 		else
@@ -1629,6 +1662,8 @@ static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __u
 	struct compat_odp_flow __user *ufp;
 	compat_uptr_t compat_ufp;
 	struct sw_flow *flow;
+	compat_uptr_t ukey;
+	u32 key_len;
 
 	flow = do_dump_flow(dp, ufdp->state);
 	if (IS_ERR(flow))
@@ -1641,9 +1676,17 @@ static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __u
 	if (!flow)
 		return put_user(ODPFF_EOF, &ufp->flags);
 
-	if (copy_to_user(&ufp->key, &flow->key, sizeof(struct odp_flow_key)) ||
-	    put_user(0, &ufp->flags))
+	if (put_user(0, &ufp->flags) ||
+	    get_user(ukey, &ufp->key) ||
+	    get_user(key_len, &ufp->key_len))
+		return -EFAULT;
+
+	key_len = flow_copy_to_user(&flow->key, compat_ptr(ukey), key_len);
+	if (key_len < 0)
+		return key_len;
+	if (put_user(key_len, &ufp->key_len))
 		return -EFAULT;
+
 	return compat_answer_query(dp, flow, 0, ufp);
 }
 
diff --git a/datapath/flow.c b/datapath/flow.c
index 5cf0d54..a5e5a0f 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -8,6 +8,7 @@
 
 #include "flow.h"
 #include "datapath.h"
+#include <asm/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
@@ -195,7 +196,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
 	call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
 }
 
-static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key)
+static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	struct qtag_prefix {
 		__be16 eth_type; /* ETH_P_8021Q */
@@ -207,7 +208,7 @@ static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key)
 		return;
 
 	qp = (struct qtag_prefix *) skb->data;
-	key->dl_tci = qp->tci | htons(ODP_TCI_PRESENT);
+	key->dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
 	__skb_pull(skb, sizeof(struct qtag_prefix));
 }
 
@@ -226,17 +227,17 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	proto = *(__be16 *) skb->data;
 	__skb_pull(skb, sizeof(__be16));
 
-	if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF)
+	if (ntohs(proto) >= 1536)
 		return proto;
 
 	if (unlikely(skb->len < sizeof(struct llc_snap_hdr)))
-		return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+		return htons(ETH_P_802_2);
 
 	llc = (struct llc_snap_hdr *) skb->data;
 	if (llc->dsap != LLC_SAP_SNAP ||
 	    llc->ssap != LLC_SAP_SNAP ||
 	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
-		return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+		return htons(ETH_P_802_2);
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
 	return llc->ethertype;
@@ -267,7 +268,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
  *      otherwise the same as skb->network_header.  For other key->dl_type
  *      values it is left untouched.
  */
-int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key,
+int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		 bool *is_frag)
 {
 	struct ethhdr *eth;
@@ -383,17 +384,264 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key,
 	return 0;
 }
 
-u32 flow_hash(const struct odp_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key)
 {
 	return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed);
 }
 
 int flow_cmp(const struct tbl_node *node, void *key2_)
 {
-	const struct odp_flow_key *key1 = &flow_cast(node)->key;
-	const struct odp_flow_key *key2 = key2_;
+	const struct sw_flow_key *key1 = &flow_cast(node)->key;
+	const struct sw_flow_key *key2 = key2_;
 
-	return !memcmp(key1, key2, sizeof(struct odp_flow_key));
+	return !memcmp(key1, key2, sizeof(struct sw_flow_key));
+}
+
+/**
+ * flow_from_nlattrs - parses Netlink attributes into a flow key.
+ * @swkey: receives the extracted flow key.
+ * @key: start of %ODPKT_* Netlink attribute sequence.
+ * @key_len: number of bytes in @key.
+ *
+ * This state machine accepts the following forms, with [] for optional
+ * elements and | for alternatives:
+ *
+ * [tun_id] in_port ethernet [8021q] [ethertype [IP [TCP|UDP|ICMP] | ARP]
+ */
+static int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *key, u32 key_len)
+{
+	const struct nlattr *nla;
+	u16 prev_type;
+	int rem;
+
+	memset(swkey, 0, sizeof(*swkey));
+	swkey->dl_type = htons(ETH_P_802_2);
+
+	prev_type = ODPKT_UNSPEC;
+	nla_for_each_attr(nla, key, key_len, rem) {
+		static const u32 key_lens[ODPKT_MAX + 1] = {
+			[ODPKT_TUN_ID] = 8,
+			[ODPKT_IN_PORT] = 4,
+			[ODPKT_ETHERNET] = sizeof(struct odp_key_ethernet),
+			[ODPKT_8021Q] = sizeof(struct odp_key_8021q),
+			[ODPKT_ETHERTYPE] = 2,
+			[ODPKT_IP] = sizeof(struct odp_key_ip),
+			[ODPKT_TCP] = sizeof(struct odp_key_tcp),
+			[ODPKT_UDP] = sizeof(struct odp_key_udp),
+			[ODPKT_ICMP] = sizeof(struct odp_key_icmp),
+			[ODPKT_ARP] = sizeof(struct odp_key_arp),
+		};
+
+		const struct odp_key_ethernet *eth_key;
+		const struct odp_key_8021q *q_key;
+		const struct odp_key_ip *ip_key;
+		const struct odp_key_tcp *tcp_key;
+		const struct odp_key_udp *udp_key;
+		const struct odp_key_icmp *icmp_key;
+		const struct odp_key_arp *arp_key;
+
+                int type = nla_type(nla);
+
+                if (type > ODPKT_MAX || nla_len(nla) != key_lens[type])
+                        return -EINVAL;
+
+#define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
+		switch (TRANSITION(prev_type, type)) {
+		case TRANSITION(ODPKT_UNSPEC, ODPKT_TUN_ID):
+			swkey->tun_id = nla_get_be64(nla);
+			break;
+
+		case TRANSITION(ODPKT_UNSPEC, ODPKT_IN_PORT):
+		case TRANSITION(ODPKT_TUN_ID, ODPKT_IN_PORT):
+			if (nla_get_u32(nla) >= DP_MAX_PORTS)
+				return -EINVAL;
+			swkey->in_port = nla_get_u32(nla);
+			break;
+
+		case TRANSITION(ODPKT_IN_PORT, ODPKT_ETHERNET):
+			eth_key = nla_data(nla);
+			memcpy(swkey->dl_src, eth_key->eth_src, ETH_ALEN);
+			memcpy(swkey->dl_dst, eth_key->eth_dst, ETH_ALEN);
+			break;
+
+		case TRANSITION(ODPKT_ETHERNET, ODPKT_8021Q):
+			q_key = nla_data(nla);
+			/* Only standard 0x8100 VLANs currently supported. */
+			if (q_key->q_tpid != htons(ETH_P_8021Q))
+				return -EINVAL;
+			if (q_key->q_tci & htons(VLAN_TAG_PRESENT))
+				return -EINVAL;
+			swkey->dl_tci = q_key->q_tci | htons(VLAN_TAG_PRESENT);
+			break;
+
+		case TRANSITION(ODPKT_8021Q, ODPKT_ETHERTYPE):
+		case TRANSITION(ODPKT_ETHERNET, ODPKT_ETHERTYPE):
+			swkey->dl_type = nla_get_be16(nla);
+			if (ntohs(swkey->dl_type) < 1536)
+				return -EINVAL;
+			break;
+
+		case TRANSITION(ODPKT_ETHERTYPE, ODPKT_IP):
+			if (swkey->dl_type != htons(ETH_P_IP))
+				return -EINVAL;
+			ip_key = nla_data(nla);
+			swkey->nw_src = ip_key->ip_src;
+			swkey->nw_dst = ip_key->ip_dst;
+			swkey->nw_proto = ip_key->ip_proto;
+			swkey->nw_tos = ip_key->ip_tos;
+			if (swkey->nw_tos & INET_ECN_MASK)
+				return -EINVAL;
+			break;
+
+		case TRANSITION(ODPKT_IP, ODPKT_TCP):
+			if (swkey->nw_proto != IPPROTO_TCP)
+				return -EINVAL;
+			tcp_key = nla_data(nla);
+			swkey->tp_src = tcp_key->tcp_src;
+			swkey->tp_dst = tcp_key->tcp_dst;
+			break;
+
+		case TRANSITION(ODPKT_IP, ODPKT_UDP):
+			if (swkey->nw_proto != IPPROTO_UDP)
+				return -EINVAL;
+			udp_key = nla_data(nla);
+			swkey->tp_src = udp_key->udp_src;
+			swkey->tp_dst = udp_key->udp_dst;
+			break;
+
+		case TRANSITION(ODPKT_IP, ODPKT_ICMP):
+			if (swkey->nw_proto != IPPROTO_ICMP)
+				return -EINVAL;
+			icmp_key = nla_data(nla);
+			swkey->tp_src = htons(icmp_key->icmp_type);
+			swkey->tp_dst = htons(icmp_key->icmp_code);
+			break;
+
+		case TRANSITION(ODPKT_ETHERTYPE, ODPKT_ARP):
+			if (swkey->dl_type != htons(ETH_P_ARP))
+				return -EINVAL;
+			arp_key = nla_data(nla);
+			swkey->nw_src = arp_key->arp_sip;
+			swkey->nw_dst = arp_key->arp_tip;
+			if (arp_key->arp_op & htons(0xff00))
+				return -EINVAL;
+			swkey->nw_proto = ntohs(arp_key->arp_op);
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		prev_type = type;
+	}
+	if (rem)
+		return -EINVAL;
+
+	/* We need at least in_port and Ethernet source and destination. */
+	if (prev_type < ODPKT_ETHERNET)
+		return -EINVAL;
+
+	return 0;
+}
+
+u32 flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+{
+	struct odp_key_ethernet *eth_key;
+
+	if (skb_tailroom(skb) < FLOW_BUFSIZE)
+		return -EMSGSIZE;
+
+	if (swkey->tun_id != cpu_to_be64(0))
+		nla_put_be64(skb, ODPKT_TUN_ID, swkey->tun_id);
+
+	nla_put_u32(skb, ODPKT_IN_PORT, swkey->in_port);
+
+	eth_key = nla_data(__nla_reserve(skb, ODPKT_ETHERNET, sizeof(*eth_key)));
+	memcpy(eth_key->eth_src, swkey->dl_src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, swkey->dl_dst, ETH_ALEN);
+
+	if (swkey->dl_tci != htons(0)) {
+		struct odp_key_8021q *q_key;
+
+		q_key = nla_data(__nla_reserve(skb, ODPKT_8021Q, sizeof(*q_key)));
+		q_key->q_tpid = htons(ETH_P_8021Q);
+		q_key->q_tci = swkey->dl_tci & ~htons(VLAN_TAG_PRESENT);
+	}
+
+	if (swkey->dl_type == htons(ETH_P_802_2))
+		goto exit;
+
+	nla_put_be16(skb, ODPKT_ETHERTYPE, swkey->dl_type);
+
+	if (swkey->dl_type == htons(ETH_P_IP)) {
+		struct odp_key_ip *ip_key;
+
+		ip_key = nla_data(__nla_reserve(skb, ODPKT_IP, sizeof(*ip_key)));
+		ip_key->ip_src = swkey->nw_src;
+		ip_key->ip_dst = swkey->nw_dst;
+		ip_key->ip_proto = swkey->nw_proto;
+		ip_key->ip_tos = swkey->nw_tos;
+
+		if (swkey->nw_proto == IPPROTO_TCP) {
+			struct odp_key_tcp *tcp_key;
+
+			tcp_key = nla_data(__nla_reserve(skb, ODPKT_TCP, sizeof(*tcp_key)));
+			tcp_key->tcp_src = swkey->tp_src;
+			tcp_key->tcp_dst = swkey->tp_dst;
+		} else if (swkey->nw_proto == IPPROTO_UDP) {
+			struct odp_key_udp *udp_key;
+
+			udp_key = nla_data(__nla_reserve(skb, ODPKT_UDP, sizeof(*udp_key)));
+			udp_key->udp_src = swkey->tp_src;
+			udp_key->udp_dst = swkey->tp_dst;
+		} else if (swkey->nw_proto == IPPROTO_ICMP) {
+			struct odp_key_icmp *icmp_key;
+
+			icmp_key = nla_data(__nla_reserve(skb, ODPKT_ICMP, sizeof(*icmp_key)));
+			icmp_key->icmp_type = ntohs(swkey->tp_src);
+			icmp_key->icmp_code = ntohs(swkey->tp_dst);
+		}
+	} else if (swkey->dl_type == htons(ETH_P_ARP)) {
+		struct odp_key_arp *arp_key;
+
+		arp_key = nla_data(__nla_reserve(skb, ODPKT_ARP, sizeof(*arp_key)));
+		arp_key->arp_sip = swkey->nw_src;
+		arp_key->arp_tip = swkey->nw_dst;
+		arp_key->arp_op = htons(swkey->nw_proto);
+	}
+
+exit:
+	return skb->len;
+}
+
+int flow_copy_from_user(struct sw_flow_key *swkey, const struct nlattr __user *ukey, u32 ukey_len)
+{
+	char key[FLOW_BUFSIZE] __aligned(NLA_ALIGNTO);
+
+	if (ukey_len > FLOW_BUFSIZE || ukey_len % NLA_ALIGNTO)
+		return -EINVAL;
+
+	if (copy_from_user(key, ukey, ukey_len))
+		return -EFAULT;
+
+	return flow_from_nlattrs(swkey, (const struct nlattr *)key, ukey_len);
+}
+
+int flow_copy_to_user(const struct sw_flow_key *swkey, struct nlattr __user *ukey, u32 ukey_len)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = alloc_skb(FLOW_BUFSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	retval = flow_to_nlattrs(swkey, skb);
+	if (copy_to_user(ukey, skb->data, min(skb->len, ukey_len)))
+		retval = -EFAULT;
+	kfree_skb(skb);
+
+	return retval;
 }
 
 /* Initializes the flow module.
diff --git a/datapath/flow.h b/datapath/flow.h
index 0764482..f5a6bfe 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -29,11 +29,26 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
+struct sw_flow_key {
+	__be64	tun_id;     /* Encapsulating tunnel ID. */
+	__be32	nw_src;	    /* IP source address. */
+	__be32	nw_dst;	    /* IP destination address. */
+	u16	in_port;    /* Input switch port. */
+	__be16	dl_tci;	    /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+	__be16	dl_type;    /* Ethernet frame type. */
+	__be16	tp_src;	    /* TCP/UDP source port. */
+	__be16	tp_dst;	    /* TCP/UDP destination port. */
+	u8	dl_src[ETH_ALEN]; /* Ethernet source address. */
+	u8	dl_dst[ETH_ALEN]; /* Ethernet destination address. */
+	u8	nw_proto;   /* IP protocol or lower 8 bits of ARP opcode. */
+	u8	nw_tos;	    /* IP ToS (DSCP field, 6 bits). */
+};
+
 struct sw_flow {
 	struct rcu_head rcu;
 	struct tbl_node tbl_node;
 
-	struct odp_flow_key key;
+	struct sw_flow_key key;
 	struct sw_flow_actions __rcu *sf_acts;
 
 	atomic_t refcnt;
@@ -74,12 +89,20 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *, bool *is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 
-u32 flow_hash(const struct odp_flow_key *key);
+u32 flow_hash(const struct sw_flow_key *);
 int flow_cmp(const struct tbl_node *, void *target);
 
+/* By my calculations currently the longest valid nlattr-formatted flow key is
+ * 80 bytes long, so this leaves some safety margin. */
+#define FLOW_BUFSIZE 96
+
+u32 flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
+int flow_copy_from_user(struct sw_flow_key *, const struct nlattr __user *ukey, u32 key_len);
+int flow_copy_to_user(const struct sw_flow_key *, struct nlattr __user *ukey, u32 key_len);
+
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
 	return container_of(node, struct sw_flow, tbl_node);
diff --git a/datapath/linux-2.6/compat-2.6/include/net/netlink.h b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
index c11a170..b9ed532 100644
--- a/datapath/linux-2.6/compat-2.6/include/net/netlink.h
+++ b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
@@ -25,6 +25,16 @@ static inline int VERIFY_NUL_STRING(struct nlattr *attr)
         NLA_PUT_TYPE(skb, __be16, attrtype, value)
 #endif  /* !NLA_PUT_BE16 */
 
+#ifndef NLA_PUT_BE32
+#define NLA_PUT_BE32(skb, attrtype, value) \
+        NLA_PUT_TYPE(skb, __be32, attrtype, value)
+#endif  /* !NLA_PUT_BE32 */
+
+#ifndef NLA_PUT_BE64
+#define NLA_PUT_BE64(skb, attrtype, value) \
+        NLA_PUT_TYPE(skb, __be64, attrtype, value)
+#endif  /* !NLA_PUT_BE64 */
+
 #ifndef HAVE_NLA_GET_BE16
 /**
  * nla_get_be16 - return payload of __be16 attribute
@@ -92,4 +102,41 @@ static inline int nla_type(const struct nlattr *nla)
 }
 #endif
 
+/* The following nla_put_be{16,32,64} functions are not in any version of Linux
+ * (although NLA_PUT_BE{16,32,64} are), so we will probably want to add them
+ * as part of the patch series when we submit Open vSwitch upstream. */
+
+/**
+ * nla_put_be16 - Add a be16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be16), &value);
+}
+
+/**
+ * nla_put_be32 - Add a be32 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be32), &value);
+}
+
+/**
+ * nla_put_64 - Add a be64 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value)
+{
+	return nla_put(skb, attrtype, sizeof(__be64), &value);
+}
+
 #endif /* net/netlink.h */
diff --git a/datapath/odp-compat.h b/datapath/odp-compat.h
index f359a71..d495be2 100644
--- a/datapath/odp-compat.h
+++ b/datapath/odp-compat.h
@@ -30,7 +30,8 @@ struct compat_odp_portvec {
 
 struct compat_odp_flow {
 	struct odp_flow_stats stats;
-	struct odp_flow_key key;
+	compat_uptr_t key;
+	u32 key_len;
 	compat_uptr_t actions;
 	u32 actions_len;
 	u32 flags;
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index eac3fa3..0e430c9 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -910,7 +910,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
 #endif
 
 	if (is_internal_dev(rt_dst(rt).dev)) {
-		struct odp_flow_key flow_key;
+		struct sw_flow_key flow_key;
 		struct tbl_node *flow_node;
 		struct vport *vport;
 		struct sk_buff *skb;
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 2dc3f83..e0ec9b2 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -211,29 +211,59 @@ struct odp_flow_stats {
     uint16_t error;             /* Used by ODP_FLOW_GET. */
 };
 
-/*
- * The datapath protocol adopts the Linux convention for TCI fields: if an
- * 802.1Q header is present then its TCI value is used verbatim except that the
- * CFI bit (0x1000) is always set to 1, and all-bits-zero indicates no 802.1Q
- * header.
- */
-#define ODP_TCI_PRESENT 0x1000  /* CFI bit */
-
-struct odp_flow_key {
-    ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
-    ovs_be32 nw_src;            /* IP source address. */
-    ovs_be32 nw_dst;            /* IP destination address. */
-    uint16_t in_port;           /* Input switch port. */
-    ovs_be16 dl_tci;            /* All zeros if 802.1Q header absent,
-                                  * ODP_TCI_PRESENT set if present. */
-    ovs_be16 dl_type;           /* Ethernet frame type. */
-    ovs_be16 tp_src;            /* TCP/UDP source port. */
-    ovs_be16 tp_dst;            /* TCP/UDP destination port. */
-    uint8_t  dl_src[6];         /* Ethernet source address. */
-    uint8_t  dl_dst[6];         /* Ethernet destination address. */
-    uint8_t  nw_proto;          /* IP protocol or lower 8 bits of
-                                   ARP opcode. */
-    uint8_t  nw_tos;            /* IP ToS (DSCP field, 6 bits). */
+enum odp_key_type {
+	ODPKT_UNSPEC,
+	ODPKT_TUN_ID,		/* 64-bit tunnel ID */
+	ODPKT_IN_PORT,		/* 32-bit ODP port number */
+	ODPKT_ETHERNET,		/* struct odp_key_ethernet */
+	ODPKT_8021Q,		/* struct odp_key_8021q */
+	ODPKT_ETHERTYPE,	/* 16-bit Ethernet type */
+	ODPKT_IP,		/* struct odp_key_ip */
+	ODPKT_TCP,		/* struct odp_key_tcp */
+	ODPKT_UDP,		/* struct odp_key_udp */
+	ODPKT_ICMP,		/* struct odp_key_icmp */
+	ODPKT_ARP,		/* struct odp_key_arp */
+	__ODPKT_MAX
+};
+
+#define ODPKT_MAX (__ODPKT_MAX - 1)
+
+struct odp_key_ethernet {
+	uint8_t	 eth_src[6];
+	uint8_t	 eth_dst[6];
+};
+
+struct odp_key_8021q {
+	ovs_be16 q_tpid;
+	ovs_be16 q_tci;
+};
+
+struct odp_key_ip {
+	ovs_be32 ip_src;
+	ovs_be32 ip_dst;
+	uint8_t  ip_proto;
+	uint8_t  ip_tos;
+};
+
+struct odp_key_tcp {
+	ovs_be16 tcp_src;
+	ovs_be16 tcp_dst;
+};
+
+struct odp_key_udp {
+	ovs_be16 udp_src;
+	ovs_be16 udp_dst;
+};
+
+struct odp_key_icmp {
+	uint8_t icmp_type;
+	uint8_t icmp_code;
+};
+
+struct odp_key_arp {
+	ovs_be32 arp_sip;
+	ovs_be32 arp_tip;
+	ovs_be16 arp_op;
 };
 
 /* Flags for ODP_FLOW. */
@@ -242,7 +272,8 @@ struct odp_flow_key {
 
 struct odp_flow {
     struct odp_flow_stats stats;
-    struct odp_flow_key key;
+    struct nlattr *key;
+    uint32_t key_len;
     struct nlattr *actions;
     uint32_t actions_len;
     uint32_t flags;
@@ -337,15 +368,4 @@ struct odp_vport_mtu {
     uint16_t mtu;
 };
 
-/* Values below this cutoff are 802.3 packets and the two bytes
- * following MAC addresses are used as a frame length.  Otherwise, the
- * two bytes are used as the Ethernet type.
- */
-#define ODP_DL_TYPE_ETH2_CUTOFF   0x0600
-
-/* Value of dl_type to indicate that the frame does not include an
- * Ethernet type.
- */
-#define ODP_DL_TYPE_NOT_ETH_TYPE  0x05ff
-
 #endif  /* openvswitch/datapath-protocol.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 18ed808..c7403f2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -36,6 +36,7 @@
 #include "dpif.h"
 #include "dpif-provider.h"
 #include "dummy.h"
+#include "dynamic-string.h"
 #include "flow.h"
 #include "hmap.h"
 #include "list.h"
@@ -603,6 +604,32 @@ answer_flow_query(struct dp_netdev_flow *flow, uint32_t query_flags,
 }
 
 static int
+dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
+                              struct flow *flow)
+{
+    if (odp_flow_key_to_flow(key, key_len, flow)) {
+        /* This should not happen: it indicates that odp_flow_key_from_flow()
+         * and odp_flow_key_to_flow() disagree on the acceptable form of a
+         * flow.  Log the problem as an error, with enough details to enable
+         * debugging. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+        if (!VLOG_DROP_ERR(&rl)) {
+            struct ds s;
+
+            ds_init(&s);
+            odp_flow_key_format(key, key_len, &s);
+            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
+            ds_destroy(&s);
+        }
+
+        return EINVAL;
+    }
+
+    return 0;
+}
+
+static int
 dpif_netdev_flow_get(const struct dpif *dpif, struct odp_flow flows[], int n)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -611,8 +638,14 @@ dpif_netdev_flow_get(const struct dpif *dpif, struct odp_flow flows[], int n)
     for (i = 0; i < n; i++) {
         struct odp_flow *odp_flow = &flows[i];
         struct flow key;
+        int error;
+
+        error = dpif_netdev_flow_from_nlattrs(odp_flow->key, odp_flow->key_len,
+                                              &key);
+        if (error) {
+            return error;
+        }
 
-        odp_flow_key_to_flow(&odp_flow->key, &key);
         answer_flow_query(dp_netdev_lookup_flow(dp, &key),
                           odp_flow->flags, odp_flow);
     }
@@ -699,14 +732,14 @@ set_flow_actions(struct dp_netdev_flow *flow, struct odp_flow *odp_flow)
 }
 
 static int
-add_flow(struct dpif *dpif, struct odp_flow *odp_flow)
+add_flow(struct dpif *dpif, const struct flow *key, struct odp_flow *odp_flow)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     int error;
 
     flow = xzalloc(sizeof *flow);
-    odp_flow_key_to_flow(&odp_flow->key, &flow->key);
+    flow->key = *key;
 
     error = set_flow_actions(flow, odp_flow);
     if (error) {
@@ -734,13 +767,19 @@ dpif_netdev_flow_put(struct dpif *dpif, struct odp_flow_put *put)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct flow key;
+    int error;
+
+    error = dpif_netdev_flow_from_nlattrs(put->flow.key, put->flow.key_len,
+                                          &key);
+    if (error) {
+        return error;
+    }
 
-    odp_flow_key_to_flow(&put->flow.key, &key);
     flow = dp_netdev_lookup_flow(dp, &key);
     if (!flow) {
         if (put->flags & ODPPF_CREATE) {
             if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
-                return add_flow(dpif, &put->flow);
+                return add_flow(dpif, &key, &put->flow);
             } else {
                 return EFBIG;
             }
@@ -767,8 +806,14 @@ dpif_netdev_flow_del(struct dpif *dpif, struct odp_flow *odp_flow)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct flow key;
+    int error;
+
+    error = dpif_netdev_flow_from_nlattrs(odp_flow->key, odp_flow->key_len,
+                                          &key);
+    if (error) {
+        return error;
+    }
 
-    odp_flow_key_to_flow(&odp_flow->key, &key);
     flow = dp_netdev_lookup_flow(dp, &key);
     if (flow) {
         answer_flow_query(flow, 0, odp_flow);
@@ -786,6 +831,7 @@ dpif_netdev_flow_dump(const struct dpif *dpif, struct dpif_dump *dump,
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
     struct hmap_node *node;
+    struct ofpbuf key;
 
     node = hmap_at_position(&dp->flow_table, &dump->state[0], &dump->state[1]);
     if (!node) {
@@ -793,7 +839,12 @@ dpif_netdev_flow_dump(const struct dpif *dpif, struct dpif_dump *dump,
     }
 
     flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
-    odp_flow_key_from_flow(&odp_flow->key, &flow->key);
+
+    ofpbuf_use_stack(&key, odp_flow->key, odp_flow->key_len);
+    odp_flow_key_from_flow(&key, &flow->key);
+    odp_flow->key_len = key.size;
+    ofpbuf_uninit(&key);
+
     answer_flow_query(flow, 0, odp_flow);
 
     return 0;
diff --git a/lib/dpif.c b/lib/dpif.c
index 6143ae4..48ef397 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -83,7 +83,8 @@ static void log_flow_operation(const struct dpif *, const char *operation,
 static void log_flow_put(struct dpif *, int error,
                          const struct odp_flow_put *);
 static bool should_log_flow_message(int error);
-static void check_rw_odp_flow(struct odp_flow *);
+static void check_rw_flow_actions(struct odp_flow *);
+static void check_rw_flow_key(struct odp_flow *);
 
 static void
 dp_initialize(void)
@@ -679,7 +680,7 @@ dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
 
     COVERAGE_INC(dpif_flow_get);
 
-    check_rw_odp_flow(flow);
+    check_rw_flow_actions(flow);
     error = dpif->dpif_class->flow_get(dpif, flow, 1);
     if (!error) {
         error = flow->stats.error;
@@ -732,7 +733,7 @@ dpif_flow_get_multiple(const struct dpif *dpif,
     COVERAGE_ADD(dpif_flow_get, n);
 
     for (i = 0; i < n; i++) {
-        check_rw_odp_flow(&flows[i]);
+        check_rw_flow_actions(&flows[i]);
     }
 
     error = dpif->dpif_class->flow_get(dpif, flows, n);
@@ -782,7 +783,7 @@ dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
 
     COVERAGE_INC(dpif_flow_del);
 
-    check_rw_odp_flow(flow);
+    check_rw_flow_actions(flow);
     memset(&flow->stats, 0, sizeof flow->stats);
 
     error = dpif->dpif_class->flow_del(dpif, flow);
@@ -823,9 +824,10 @@ dpif_dump_next(const struct dpif *dpif, struct dpif_dump *dump,
 {
     int error;
 
-    check_rw_odp_flow(flow);
-    error = dpif->dpif_class->flow_dump(dpif, dump, flow);
+    check_rw_flow_actions(flow);
+    check_rw_flow_key(flow);
 
+    error = dpif->dpif_class->flow_dump(dpif, dump, flow);
     if (error == EOF) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
     } else {
@@ -1097,7 +1099,7 @@ should_log_flow_message(int error)
 
 static void
 log_flow_message(const struct dpif *dpif, int error, const char *operation,
-                 const struct odp_flow_key *flow,
+                 const struct nlattr *key, size_t key_len,
                  const struct odp_flow_stats *stats,
                  const struct nlattr *actions, size_t actions_len)
 {
@@ -1110,7 +1112,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation,
     if (error) {
         ds_put_format(&ds, "(%s) ", strerror(error));
     }
-    format_odp_flow_key(&ds, flow);
+    odp_flow_key_format(key, key_len, &ds);
     if (stats) {
         ds_put_cstr(&ds, ", ");
         format_odp_flow_stats(&ds, stats);
@@ -1128,10 +1130,11 @@ log_flow_operation(const struct dpif *dpif, const char *operation, int error,
                    struct odp_flow *flow)
 {
     if (error) {
+        flow->key_len = 0;
         flow->actions_len = 0;
     }
-    log_flow_message(dpif, error, operation, &flow->key,
-                     !error ? &flow->stats : NULL,
+    log_flow_message(dpif, error, operation,
+                     flow->key, flow->key_len, !error ? &flow->stats : NULL,
                      flow->actions, flow->actions_len);
 }
 
@@ -1155,7 +1158,8 @@ log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
     if (put->flags & ~ODPPF_ALL) {
         ds_put_format(&s, "[%x]", put->flags & ~ODPPF_ALL);
     }
-    log_flow_message(dpif, error, ds_cstr(&s), &put->flow.key,
+    log_flow_message(dpif, error, ds_cstr(&s),
+                     put->flow.key, put->flow.key_len,
                      !error ? &put->flow.stats : NULL,
                      put->flow.actions, put->flow.actions_len);
     ds_destroy(&s);
@@ -1178,9 +1182,18 @@ log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
  *        "actions" or "actions_len" was not initialized.
  */
 static void
-check_rw_odp_flow(struct odp_flow *flow)
+check_rw_flow_actions(struct odp_flow *flow)
 {
     if (flow->actions_len) {
         memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]);
     }
 }
+
+/* Same as check_rw_flow_actions() but for flow->key. */
+static void
+check_rw_flow_key(struct odp_flow *flow)
+{
+    if (flow->key_len) {
+        memset(&flow->key[0], 0xcc, sizeof flow->key[0]);
+    }
+}
diff --git a/lib/flow.c b/lib/flow.c
index e7ed2a9..8e75d52 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -101,12 +101,12 @@ parse_ethertype(struct ofpbuf *b)
     ovs_be16 proto;
 
     proto = *(ovs_be16 *) ofpbuf_pull(b, sizeof proto);
-    if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) {
+    if (ntohs(proto) >= ETH_TYPE_MIN) {
         return proto;
     }
 
     if (b->size < sizeof *llc) {
-        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+        return htons(FLOW_DL_TYPE_NONE);
     }
 
     llc = b->data;
@@ -115,7 +115,7 @@ parse_ethertype(struct ofpbuf *b)
         || llc->llc.llc_cntl != LLC_CNTL_SNAP
         || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
                   sizeof llc->snap.snap_org)) {
-        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+        return htons(FLOW_DL_TYPE_NONE);
     }
 
     ofpbuf_pull(b, sizeof *llc);
diff --git a/lib/flow.h b/lib/flow.h
index ee460e3..bbf35c7 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -35,6 +35,10 @@ struct ofpbuf;
 #define FLOW_N_REGS 4
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
 
+/* Used for struct flow's dl_type member for frames that have no Ethernet
+ * type, that is, pure 802.2 frames. */
+#define FLOW_DL_TYPE_NONE 0
+
 struct flow {
     ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
     uint32_t regs[FLOW_N_REGS]; /* Registers. */
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 09f8b83..7a56292 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -16,6 +16,7 @@
 
 #include <config.h>
 #include "odp-util.h"
+#include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,26 +29,6 @@
 #include "timeval.h"
 #include "util.h"
 
-void
-format_odp_flow_key(struct ds *ds, const struct odp_flow_key *key)
-{
-    ds_put_format(ds, "tun_id%#"PRIx64" in_port%d tci(",
-                  ntohll(key->tun_id), key->in_port);
-    if (key->dl_tci) {
-        ds_put_format(ds, "vlan%"PRIu16",pcp%d",
-                      vlan_tci_to_vid(key->dl_tci),
-                      vlan_tci_to_pcp(key->dl_tci));
-    } else {
-        ds_put_char(ds, '0');
-    }
-    ds_put_format(ds, ") mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" type%04x "
-                  "proto%"PRId8" tos%"PRIu8" ip"IP_FMT"->"IP_FMT" port%d->%d",
-                  ETH_ADDR_ARGS(key->dl_src), ETH_ADDR_ARGS(key->dl_dst),
-                  ntohs(key->dl_type), key->nw_proto, key->nw_tos,
-                  IP_ARGS(&key->nw_src), IP_ARGS(&key->nw_dst),
-                  ntohs(key->tp_src), ntohs(key->tp_dst));
-}
-
 int
 odp_action_len(uint16_t type)
 {
@@ -186,7 +167,7 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions,
             format_odp_action(ds, a);
         }
         if (left) {
-            ds_put_format(ds, " ***%u leftover bytes***", left);
+            ds_put_format(ds, ",***%u leftover bytes***", left);
         }
     } else {
         ds_put_cstr(ds, "drop");
@@ -210,44 +191,418 @@ format_odp_flow_stats(struct ds *ds, const struct odp_flow_stats *s)
 void
 format_odp_flow(struct ds *ds, const struct odp_flow *f)
 {
-    format_odp_flow_key(ds, &f->key);
+    odp_flow_key_format(f->key, f->key_len, ds);
     ds_put_cstr(ds, ", ");
     format_odp_flow_stats(ds, &f->stats);
     ds_put_cstr(ds, ", actions:");
     format_odp_actions(ds, f->actions, f->actions_len);
 }
 
+/* Returns the correct length of the payload for a flow key attribute of the
+ * specified 'type', or -1 if 'type' is unknown. */
+static int
+odp_flow_key_attr_len(uint16_t type)
+{
+    if (type > ODPKT_MAX) {
+        return -1;
+    }
+
+    switch ((enum odp_key_type) type) {
+    case ODPKT_TUN_ID: return 8;
+    case ODPKT_IN_PORT: return 4;
+    case ODPKT_ETHERNET: return sizeof(struct odp_key_ethernet);
+    case ODPKT_8021Q: return sizeof(struct odp_key_8021q);
+    case ODPKT_ETHERTYPE: return 2;
+    case ODPKT_IP: return sizeof(struct odp_key_ip);
+    case ODPKT_TCP: return sizeof(struct odp_key_tcp);
+    case ODPKT_UDP: return sizeof(struct odp_key_udp);
+    case ODPKT_ICMP: return sizeof(struct odp_key_icmp);
+    case ODPKT_ARP: return sizeof(struct odp_key_arp);
+
+    case ODPKT_UNSPEC:
+    case __ODPKT_MAX:
+        return -1;
+    }
+
+    return -1;
+}
+
+
+static void
+format_generic_odp_key(const struct nlattr *a, struct ds *ds)
+{
+    size_t len = nl_attr_get_size(a);
+
+    ds_put_format(ds, "key%"PRId16, nl_attr_type(a));
+    if (len) {
+        const uint8_t *unspec;
+        unsigned int i;
+
+        unspec = nl_attr_get(a);
+        for (i = 0; i < len; i++) {
+            ds_put_char(ds, i ? ' ': '(');
+            ds_put_format(ds, "%02x", unspec[i]);
+        }
+        ds_put_char(ds, ')');
+    }
+}
+
+static void
+format_odp_key_attr(const struct nlattr *a, struct ds *ds)
+{
+    const struct odp_key_ethernet *eth_key;
+    const struct odp_key_8021q *q_key;
+    const struct odp_key_ip *ip_key;
+    const struct odp_key_tcp *tcp_key;
+    const struct odp_key_udp *udp_key;
+    const struct odp_key_icmp *icmp_key;
+    const struct odp_key_arp *arp_key;
+
+    if (nl_attr_get_size(a) != odp_flow_key_attr_len(nl_attr_type(a))) {
+        ds_put_format(ds, "bad length %zu, expected %d for: ",
+                      nl_attr_get_size(a),
+                      odp_flow_key_attr_len(nl_attr_type(a)));
+        format_generic_odp_key(a, ds);
+        return;
+    }
+
+    switch (nl_attr_type(a)) {
+    case ODPKT_TUN_ID:
+        ds_put_format(ds, "tun_id(%#"PRIx64")", nl_attr_get_be64(a));
+        break;
+
+    case ODPKT_IN_PORT:
+        ds_put_format(ds, "in_port(%"PRIu32")", nl_attr_get_u32(a));
+        break;
+
+    case ODPKT_ETHERNET:
+        eth_key = nl_attr_get(a);
+        ds_put_format(ds, "eth(src="ETH_ADDR_FMT",dst="ETH_ADDR_FMT")",
+                      ETH_ADDR_ARGS(eth_key->eth_src),
+                      ETH_ADDR_ARGS(eth_key->eth_dst));
+        break;
+
+    case ODPKT_8021Q:
+        q_key = nl_attr_get(a);
+        ds_put_cstr(ds, "vlan(");
+        if (q_key->q_tpid != htons(ETH_TYPE_VLAN)) {
+            ds_put_format(ds, "tpid=%#"PRIx16",", ntohs(q_key->q_tpid));
+        }
+        ds_put_format(ds, "vid%"PRIu16",pcp%d)",
+                      vlan_tci_to_vid(q_key->q_tci),
+                      vlan_tci_to_pcp(q_key->q_tci));
+        break;
+
+    case ODPKT_ETHERTYPE:
+        ds_put_format(ds, "eth_type(%#04"PRIx16")",
+                      ntohs(nl_attr_get_be16(a)));
+        break;
+
+    case ODPKT_IP:
+        ip_key = nl_attr_get(a);
+        ds_put_format(ds, "ip(src="IP_FMT",dst="IP_FMT","
+                      "proto=%"PRId8",tos=%"PRIu8")",
+                      IP_ARGS(&ip_key->ip_src), IP_ARGS(&ip_key->ip_dst),
+                      ip_key->ip_proto, ip_key->ip_tos);
+        break;
+
+    case ODPKT_TCP:
+        tcp_key = nl_attr_get(a);
+        ds_put_format(ds, "tcp(src=%"PRIu16",dst=%"PRIu16")",
+                      ntohs(tcp_key->tcp_src), ntohs(tcp_key->tcp_dst));
+        break;
+
+    case ODPKT_UDP:
+        udp_key = nl_attr_get(a);
+        ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16")",
+                      ntohs(udp_key->udp_src), ntohs(udp_key->udp_dst));
+        break;
+
+    case ODPKT_ICMP:
+        icmp_key = nl_attr_get(a);
+        ds_put_format(ds, "icmp(type=%"PRIu8",code=%"PRIu8")",
+                      icmp_key->icmp_type, icmp_key->icmp_code);
+        break;
+
+    case ODPKT_ARP:
+        arp_key = nl_attr_get(a);
+        ds_put_format(ds, "arp(sip="IP_FMT",tip="IP_FMT",op=%"PRIu16")",
+                      IP_ARGS(&arp_key->arp_sip), IP_ARGS(&arp_key->arp_tip),
+                      ntohs(arp_key->arp_op));
+        break;
+
+    default:
+        format_generic_odp_key(a, ds);
+        break;
+    }
+}
+
+/* Appends to 'ds' a string representation of the 'key_len' bytes of ODPKT_*
+ * attributes in 'key'. */
 void
-odp_flow_key_from_flow(struct odp_flow_key *key, const struct flow *flow)
+odp_flow_key_format(const struct nlattr *key, size_t key_len, struct ds *ds)
 {
-    key->tun_id = flow->tun_id;
-    key->nw_src = flow->nw_src;
-    key->nw_dst = flow->nw_dst;
-    key->in_port = flow->in_port;
-    key->dl_tci = flow->vlan_tci;
-    key->dl_type = flow->dl_type;
-    key->tp_src = flow->tp_src;
-    key->tp_dst = flow->tp_dst;
-    memcpy(key->dl_src, flow->dl_src, ETH_ADDR_LEN);
-    memcpy(key->dl_dst, flow->dl_dst, ETH_ADDR_LEN);
-    key->nw_proto = flow->nw_proto;
-    key->nw_tos = flow->nw_tos;
+    if (key_len) {
+        const struct nlattr *a;
+        unsigned int left;
+
+        NL_ATTR_FOR_EACH (a, left, key, key_len) {
+            if (a != key) {
+                ds_put_char(ds, ',');
+            }
+            format_odp_key_attr(a, ds);
+        }
+        if (left) {
+            ds_put_format(ds, ",***%u leftover bytes***", left);
+        }
+    } else {
+        ds_put_cstr(ds, "<empty>");
+    }
 }
 
+/* Appends a representation of 'flow' as ODPKT_* attributes to 'buf'. */
 void
-odp_flow_key_to_flow(const struct odp_flow_key *key, struct flow *flow)
+odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
+{
+    struct odp_key_ethernet *eth_key;
+
+    if (flow->tun_id != htonll(0)) {
+        nl_msg_put_be64(buf, ODPKT_TUN_ID, flow->tun_id);
+    }
+
+    nl_msg_put_u32(buf, ODPKT_IN_PORT, flow->in_port);
+
+    eth_key = nl_msg_put_unspec_uninit(buf, ODPKT_ETHERNET, sizeof *eth_key);
+    memcpy(eth_key->eth_src, flow->dl_src, ETH_ADDR_LEN);
+    memcpy(eth_key->eth_dst, flow->dl_dst, ETH_ADDR_LEN);
+
+    if (flow->vlan_tci != htons(0)) {
+        struct odp_key_8021q *q_key;
+
+        q_key = nl_msg_put_unspec_uninit(buf, ODPKT_8021Q, sizeof *q_key);
+        q_key->q_tpid = htons(ETH_TYPE_VLAN);
+        q_key->q_tci = flow->vlan_tci & ~htons(VLAN_CFI);
+    }
+
+    if (ntohs(flow->dl_type) < ETH_TYPE_MIN) {
+        return;
+    }
+
+    nl_msg_put_be16(buf, ODPKT_ETHERTYPE, flow->dl_type);
+
+    if (flow->dl_type == htons(ETH_TYPE_IP)) {
+        struct odp_key_ip *ip_key;
+
+        ip_key = nl_msg_put_unspec_uninit(buf, ODPKT_IP, sizeof *ip_key);
+        ip_key->ip_src = flow->nw_src;
+        ip_key->ip_dst = flow->nw_dst;
+        ip_key->ip_proto = flow->nw_proto;
+        ip_key->ip_tos = flow->nw_tos;
+
+        if (flow->nw_proto == IP_TYPE_TCP) {
+            struct odp_key_tcp *tcp_key;
+
+            tcp_key = nl_msg_put_unspec_uninit(buf, ODPKT_TCP,
+                                               sizeof *tcp_key);
+            tcp_key->tcp_src = flow->tp_src;
+            tcp_key->tcp_dst = flow->tp_dst;
+        } else if (flow->nw_proto == IP_TYPE_UDP) {
+            struct odp_key_udp *udp_key;
+
+            udp_key = nl_msg_put_unspec_uninit(buf, ODPKT_UDP,
+                                               sizeof *udp_key);
+            udp_key->udp_src = flow->tp_src;
+            udp_key->udp_dst = flow->tp_dst;
+        } else if (flow->nw_proto == IP_TYPE_ICMP) {
+            struct odp_key_icmp *icmp_key;
+
+            icmp_key = nl_msg_put_unspec_uninit(buf, ODPKT_ICMP,
+                                                sizeof *icmp_key);
+            icmp_key->icmp_type = ntohs(flow->tp_src);
+            icmp_key->icmp_code = ntohs(flow->tp_dst);
+        }
+    } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
+        struct odp_key_arp *arp_key;
+
+        arp_key = nl_msg_put_unspec_uninit(buf, ODPKT_ARP, sizeof *arp_key);
+        arp_key->arp_sip = flow->nw_src;
+        arp_key->arp_tip = flow->nw_dst;
+        arp_key->arp_op = htons(flow->nw_proto);
+    }
+}
+
+/* Converts the 'key_len' bytes of ODPKT_* attributes in 'key' to a flow
+ * structure in 'flow'.  Returns 0 if successful, otherwise EINVAL. */
+int
+odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
+                     struct flow *flow)
 {
-    memset(flow->regs, 0, sizeof flow->regs);
-    flow->tun_id = key->tun_id;
-    flow->nw_src = key->nw_src;
-    flow->nw_dst = key->nw_dst;
-    flow->in_port = key->in_port;
-    flow->vlan_tci = key->dl_tci;
-    flow->dl_type = key->dl_type;
-    flow->tp_src = key->tp_src;
-    flow->tp_dst = key->tp_dst;
-    memcpy(flow->dl_src, key->dl_src, ETH_ADDR_LEN);
-    memcpy(flow->dl_dst, key->dl_dst, ETH_ADDR_LEN);
-    flow->nw_proto = key->nw_proto;
-    flow->nw_tos = key->nw_tos;
+    const struct nlattr *nla;
+    enum odp_key_type prev_type;
+    size_t left;
+
+    memset(flow, 0, sizeof *flow);
+    flow->dl_type = htons(FLOW_DL_TYPE_NONE);
+
+    prev_type = ODPKT_UNSPEC;
+    NL_ATTR_FOR_EACH (nla, left, key, key_len) {
+        const struct odp_key_ethernet *eth_key;
+        const struct odp_key_8021q *q_key;
+        const struct odp_key_ip *ip_key;
+        const struct odp_key_tcp *tcp_key;
+        const struct odp_key_udp *udp_key;
+        const struct odp_key_icmp *icmp_key;
+        const struct odp_key_arp *arp_key;
+
+        uint16_t type = nl_attr_type(nla);
+        int len = odp_flow_key_attr_len(type);
+
+        if (nl_attr_get_size(nla) != len && len != -1) {
+            return EINVAL;
+        }
+
+#define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
+        switch (TRANSITION(prev_type, type)) {
+        case TRANSITION(ODPKT_UNSPEC, ODPKT_TUN_ID):
+            flow->tun_id = nl_attr_get_be64(nla);
+            break;
+
+        case TRANSITION(ODPKT_UNSPEC, ODPKT_IN_PORT):
+        case TRANSITION(ODPKT_TUN_ID, ODPKT_IN_PORT):
+            if (nl_attr_get_u32(nla) >= UINT16_MAX) {
+                return EINVAL;
+            }
+            flow->in_port = nl_attr_get_u32(nla);
+            break;
+
+        case TRANSITION(ODPKT_IN_PORT, ODPKT_ETHERNET):
+            eth_key = nl_attr_get(nla);
+            memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN);
+            memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN);
+            break;
+
+        case TRANSITION(ODPKT_ETHERNET, ODPKT_8021Q):
+            q_key = nl_attr_get(nla);
+            if (q_key->q_tpid != htons(ETH_TYPE_VLAN)) {
+                /* Only standard 0x8100 VLANs currently supported. */
+                return EINVAL;
+            }
+            if (q_key->q_tci & htons(VLAN_CFI)) {
+                return EINVAL;
+            }
+            flow->vlan_tci = q_key->q_tci | htons(VLAN_CFI);
+            break;
+
+        case TRANSITION(ODPKT_8021Q, ODPKT_ETHERTYPE):
+        case TRANSITION(ODPKT_ETHERNET, ODPKT_ETHERTYPE):
+            flow->dl_type = nl_attr_get_be16(nla);
+            if (ntohs(flow->dl_type) < 1536) {
+                return EINVAL;
+            }
+            break;
+
+        case TRANSITION(ODPKT_ETHERTYPE, ODPKT_IP):
+            if (flow->dl_type != htons(ETH_TYPE_IP)) {
+                return EINVAL;
+            }
+            ip_key = nl_attr_get(nla);
+            flow->nw_src = ip_key->ip_src;
+            flow->nw_dst = ip_key->ip_dst;
+            flow->nw_proto = ip_key->ip_proto;
+            flow->nw_tos = ip_key->ip_tos;
+            if (flow->nw_tos & IP_ECN_MASK) {
+                return EINVAL;
+            }
+            break;
+
+        case TRANSITION(ODPKT_IP, ODPKT_TCP):
+            if (flow->nw_proto != IP_TYPE_TCP) {
+                return EINVAL;
+            }
+            tcp_key = nl_attr_get(nla);
+            flow->tp_src = tcp_key->tcp_src;
+            flow->tp_dst = tcp_key->tcp_dst;
+            break;
+
+        case TRANSITION(ODPKT_IP, ODPKT_UDP):
+            if (flow->nw_proto != IP_TYPE_UDP) {
+                return EINVAL;
+            }
+            udp_key = nl_attr_get(nla);
+            flow->tp_src = udp_key->udp_src;
+            flow->tp_dst = udp_key->udp_dst;
+            break;
+
+        case TRANSITION(ODPKT_IP, ODPKT_ICMP):
+            if (flow->nw_proto != IP_TYPE_ICMP) {
+                return EINVAL;
+            }
+            icmp_key = nl_attr_get(nla);
+            flow->tp_src = htons(icmp_key->icmp_type);
+            flow->tp_dst = htons(icmp_key->icmp_code);
+            break;
+
+        case TRANSITION(ODPKT_ETHERTYPE, ODPKT_ARP):
+            if (flow->dl_type != htons(ETH_TYPE_ARP)) {
+                return EINVAL;
+            }
+            arp_key = nl_attr_get(nla);
+            flow->nw_src = arp_key->arp_sip;
+            flow->nw_dst = arp_key->arp_tip;
+            if (arp_key->arp_op & htons(0xff00)) {
+                return EINVAL;
+            }
+            flow->nw_proto = ntohs(arp_key->arp_op);
+            break;
+
+        default:
+            if (type == ODPKT_UNSPEC || prev_type == ODPKT_UNSPEC) {
+                return EINVAL;
+            }
+            return EINVAL;
+        }
+
+        prev_type = type;
+    }
+    if (left) {
+        return EINVAL;
+    }
+
+    switch (prev_type) {
+    case ODPKT_UNSPEC:
+        return EINVAL;
+
+    case ODPKT_TUN_ID:
+    case ODPKT_IN_PORT:
+        return EINVAL;
+
+    case ODPKT_ETHERNET:
+    case ODPKT_8021Q:
+        return 0;
+
+    case ODPKT_ETHERTYPE:
+        if (flow->dl_type == htons(ETH_TYPE_IP)
+            || flow->dl_type == htons(ETH_TYPE_ARP)) {
+            return EINVAL;
+        }
+        return 0;
+
+    case ODPKT_IP:
+        if (flow->nw_proto == htons(IP_TYPE_TCP)
+            || flow->nw_proto == htons(IP_TYPE_UDP)
+            || flow->nw_proto == htons(IP_TYPE_ICMP)) {
+            return EINVAL;
+        }
+        return 0;
+
+    case ODPKT_TCP:
+    case ODPKT_UDP:
+    case ODPKT_ICMP:
+    case ODPKT_ARP:
+        return 0;
+
+    case __ODPKT_MAX:
+    default:
+        NOT_REACHED();
+    }
 }
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 6051c52..9dfe6db 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -28,6 +28,7 @@
 
 struct ds;
 struct flow;
+struct ofpbuf;
 
 static inline uint16_t
 ofp_port_to_odp_port(uint16_t ofp_port)
@@ -55,7 +56,6 @@ odp_port_to_ofp_port(uint16_t odp_port)
     }
 }
 
-void format_odp_flow_key(struct ds *, const struct odp_flow_key *);
 int odp_action_len(uint16_t type);
 void format_odp_action(struct ds *, const struct nlattr *);
 void format_odp_actions(struct ds *, const struct nlattr *odp_actions,
@@ -63,21 +63,19 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions,
 void format_odp_flow_stats(struct ds *, const struct odp_flow_stats *);
 void format_odp_flow(struct ds *, const struct odp_flow *);
 
-void odp_flow_key_from_flow(struct odp_flow_key *, const struct flow *);
-void odp_flow_key_to_flow(const struct odp_flow_key *, struct flow *);
+/* By my calculations currently the longest valid nlattr-formatted flow key is
+ * 80 bytes long, so this leaves some safety margin.
+ *
+ * We allocate temporary on-stack buffers for flow keys as arrays of uint32_t
+ * to ensure proper 32-bit alignment for Netlink attributes.  (An array of
+ * "struct nlattr" might not, in theory, be sufficiently aligned because it
+ * only contains 16-bit types.) */
+#define ODPUTIL_FLOW_KEY_BYTES 128
+#define ODPUTIL_FLOW_KEY_U32S DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)
 
-static inline bool
-odp_flow_key_equal(const struct odp_flow_key *a, const struct odp_flow_key *b)
-{
-    return !memcmp(a, b, sizeof *a);
-}
+void odp_flow_key_format(const struct nlattr *, size_t, struct ds *);
 
-static inline size_t
-odp_flow_key_hash(const struct odp_flow_key *flow, uint32_t basis)
-{
-    BUILD_ASSERT_DECL(!(sizeof *flow % sizeof(uint32_t)));
-    return hash_words((const uint32_t *) flow,
-                      sizeof *flow / sizeof(uint32_t), basis);
-}
+void odp_flow_key_from_flow(struct ofpbuf *, const struct flow *);
+int odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *);
 
 #endif /* odp-util.h */
diff --git a/lib/packets.h b/lib/packets.h
index 39e88f1..96e23e1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -155,6 +155,10 @@ void compose_benign_packet(struct ofpbuf *, const char *tag,
 #define ETH_TYPE_VLAN          0x8100
 #define ETH_TYPE_CFM           0x8902
 
+/* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
+ * lengths. */
+#define ETH_TYPE_MIN           0x600
+
 #define ETH_HEADER_LEN 14
 #define ETH_PAYLOAD_MIN 46
 #define ETH_PAYLOAD_MAX 1500
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index f435690..95800c3 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2269,8 +2269,16 @@ static int
 facet_put__(struct ofproto *ofproto, struct facet *facet, int flags,
             struct odp_flow_put *put)
 {
+    uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
+    struct ofpbuf key;
+
+    ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &facet->flow);
+    assert(key.base == keybuf);
+
     memset(&put->flow.stats, 0, sizeof put->flow.stats);
-    odp_flow_key_from_flow(&put->flow.key, &facet->flow);
+    put->flow.key = key.data;
+    put->flow.key_len = key.size;
     put->flow.actions = facet->actions;
     put->flow.actions_len = facet->actions_len;
     put->flow.flags = 0;
@@ -2321,9 +2329,16 @@ static void
 facet_uninstall(struct ofproto *p, struct facet *facet)
 {
     if (facet->installed) {
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct odp_flow odp_flow;
+        struct ofpbuf key;
 
-        odp_flow_key_from_flow(&odp_flow.key, &facet->flow);
+        ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+        odp_flow_key_from_flow(&key, &facet->flow);
+        assert(key.base == keybuf);
+
+        odp_flow.key = key.data;
+        odp_flow.key_len = key.size;
         odp_flow.actions = NULL;
         odp_flow.actions_len = 0;
         odp_flow.flags = 0;
@@ -2459,10 +2474,16 @@ facet_revalidate(struct ofproto *ofproto, struct facet *facet)
      * to talk to the datapath. */
     if (actions_changed || facet->may_install != facet->installed) {
         if (facet->may_install) {
+            uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
             struct odp_flow_put put;
+            struct ofpbuf key;
+
+            ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+            odp_flow_key_from_flow(&key, &facet->flow);
 
             memset(&put.flow.stats, 0, sizeof put.flow.stats);
-            odp_flow_key_from_flow(&put.flow.key, &facet->flow);
+            put.flow.key = key.data;
+            put.flow.key_len = key.size;
             put.flow.actions = odp_actions->data;
             put.flow.actions_len = odp_actions->size;
             put.flow.flags = 0;
@@ -3397,42 +3418,44 @@ static void
 query_stats(struct ofproto *p, struct rule *rule,
             uint64_t *packet_countp, uint64_t *byte_countp)
 {
+    uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
     uint64_t packet_count, byte_count;
     struct facet *facet;
-    struct odp_flow *odp_flows;
-    size_t n_odp_flows;
+    struct ofpbuf key;
 
     /* Start from historical data for 'rule' itself that are no longer tracked
      * by the datapath.  This counts, for example, facets that have expired. */
     packet_count = rule->packet_count;
     byte_count = rule->byte_count;
 
-    /* Prepare to ask the datapath for statistics on all of the rule's facets.
+    /* Ask the datapath for statistics on all of the rule's facets.  (We could
+     * batch up statistics requests using dpif_flow_get_multiple(), but that is
+     * not yet implemented.)
      *
      * Also, add any statistics that are not tracked by the datapath for each
      * facet.  This includes, for example, statistics for packets that were
      * executed "by hand" by ofproto via dpif_execute() but must be accounted
      * to a rule. */
-    odp_flows = xzalloc(list_size(&rule->facets) * sizeof *odp_flows);
-    n_odp_flows = 0;
+    ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
     LIST_FOR_EACH (facet, list_node, &rule->facets) {
-        struct odp_flow *odp_flow = &odp_flows[n_odp_flows++];
-        odp_flow_key_from_flow(&odp_flow->key, &facet->flow);
-        packet_count += facet->packet_count;
-        byte_count += facet->byte_count;
-    }
+        struct odp_flow odp_flow;
 
-    /* Fetch up-to-date statistics from the datapath and add them in. */
-    if (!dpif_flow_get_multiple(p->dpif, odp_flows, n_odp_flows)) {
-        size_t i;
+        ofpbuf_clear(&key);
+        odp_flow_key_from_flow(&key, &facet->flow);
 
-        for (i = 0; i < n_odp_flows; i++) {
-            struct odp_flow *odp_flow = &odp_flows[i];
-            packet_count += odp_flow->stats.n_packets;
-            byte_count += odp_flow->stats.n_bytes;
+        odp_flow.key = key.data;
+        odp_flow.key_len = key.size;
+        odp_flow.actions = NULL;
+        odp_flow.actions_len = 0;
+        odp_flow.flags = 0;
+        if (!dpif_flow_get(p->dpif, &odp_flow)) {
+            packet_count += odp_flow.stats.n_packets;
+            byte_count += odp_flow.stats.n_bytes;
         }
+
+        packet_count += facet->packet_count;
+        byte_count += facet->byte_count;
     }
-    free(odp_flows);
 
     /* Return the stats to the caller. */
     *packet_countp = packet_count;
@@ -4471,16 +4494,36 @@ static void
 ofproto_update_used(struct ofproto *p)
 {
     struct dpif_dump dump;
-    struct odp_flow f;
-
-    memset(&f, 0, sizeof f);
 
     dpif_dump_init(&dump);
-    while (!dpif_dump_next(p->dpif, &dump, &f)) {
+    for (;;) {
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct facet *facet;
+        struct odp_flow f;
         struct flow flow;
 
-        odp_flow_key_to_flow(&f.key, &flow);
+        memset(&f, 0, sizeof f);
+        f.key = (struct nlattr *) keybuf;
+        f.key_len = sizeof keybuf;
+        if (dpif_dump_next(p->dpif, &dump, &f)) {
+            break;
+        }
+
+        if (f.key_len > sizeof keybuf) {
+            VLOG_WARN_RL(&rl, "ODP flow key overflowed buffer");
+            continue;
+        }
+        if (odp_flow_key_to_flow(f.key, f.key_len, &flow)) {
+            struct ds s;
+
+            ds_init(&s);
+            odp_flow_key_format(f.key, f.key_len, &s);
+            VLOG_WARN_RL(&rl, "failed to convert ODP flow key to flow: %s",
+                         ds_cstr(&s));
+            ds_destroy(&s);
+
+            continue;
+        }
         facet = facet_find(p, &flow);
 
         if (facet && facet->installed) {
@@ -4600,7 +4643,14 @@ facet_active_timeout(struct ofproto *ofproto, struct facet *facet)
          * ofproto_update_used() zeroed TCP flags. */
         memset(&odp_flow, 0, sizeof odp_flow);
         if (facet->installed) {
-            odp_flow_key_from_flow(&odp_flow.key, &facet->flow);
+            uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
+            struct ofpbuf key;
+
+            ofpbuf_use_stack(&key, keybuf, sizeof keybuf);
+            odp_flow_key_from_flow(&key, &facet->flow);
+
+            odp_flow.key = key.data;
+            odp_flow.key_len = key.size;
             odp_flow.flags = ODPFF_ZERO_TCP_FLAGS;
             dpif_flow_get(ofproto->dpif, &odp_flow);
 
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index ad8d98a..a568213 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -476,12 +476,15 @@ do_dump_flows(int argc OVS_UNUSED, char *argv[])
     dpif_dump_init(&dump);
     for (;;) {
         enum { MAX_ACTIONS = 4096 }; /* An arbitrary but large number. */
-        struct nlattr actions[MAX_ACTIONS];
+        uint32_t actions[MAX_ACTIONS * sizeof(struct nlattr) / 4];
+        uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
         struct odp_flow f;
 
         memset(&f, 0, sizeof f);
-        f.actions = actions;
+        f.actions = (struct nlattr *) actions;
         f.actions_len = sizeof actions;
+        f.key = (struct nlattr *) keybuf;
+        f.key_len = sizeof keybuf;
 
         if (dpif_dump_next(dpif, &dump, &f)) {
             break;
-- 
1.7.1





More information about the dev mailing list