[ovs-dev] [netlink v5 43/61] datapath: Convert datapath operations to use Netlink framing.

Ben Pfaff blp at nicira.com
Thu Jan 27 00:23:26 UTC 2011


Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 datapath/datapath.c                     | 1506 ++++++++++++++++---------------
 datapath/flow.c                         |   94 +--
 datapath/flow.h                         |    7 +-
 datapath/odp-compat.h                   |   25 -
 include/openvswitch/datapath-protocol.h |  118 ++-
 lib/dpif-linux.c                        |  696 +++++++++++----
 6 files changed, 1455 insertions(+), 991 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index f6a2824..ea998ed 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL(dp_ioctl_hook);
  * It is safe to access the datapath and vport structures with just
  * dp_mutex.
  */
-static struct datapath __rcu *dps[ODP_MAX];
+static struct datapath __rcu *dps[256];
 static DEFINE_MUTEX(dp_mutex);
 
 static struct vport *new_vport(const struct vport_parms *);
@@ -74,7 +74,7 @@ static struct vport *new_vport(const struct vport_parms *);
 /* Must be called with rcu_read_lock or dp_mutex. */
 struct datapath *get_dp(int dp_idx)
 {
-	if (dp_idx < 0 || dp_idx >= ODP_MAX)
+	if (dp_idx < 0 || dp_idx >= ARRAY_SIZE(dps))
 		return NULL;
 	return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
 					 lockdep_is_held(&dp_mutex));
@@ -206,111 +206,6 @@ static struct kobj_type dp_ktype = {
 	.release = release_dp
 };
 
-static int create_dp(int dp_idx, const char __user *devnamep)
-{
-	struct vport_parms parms;
-	char devname[IFNAMSIZ];
-	struct vport *vport;
-	struct datapath *dp;
-	int err;
-	int i;
-
-	if (devnamep) {
-		int retval = strncpy_from_user(devname, devnamep, IFNAMSIZ);
-		if (retval < 0) {
-			err = -EFAULT;
-			goto err;
-		} else if (retval >= IFNAMSIZ) {
-			err = -ENAMETOOLONG;
-			goto err;
-		}
-	} else {
-		snprintf(devname, sizeof(devname), "of%d", dp_idx);
-	}
-
-	rtnl_lock();
-	mutex_lock(&dp_mutex);
-	err = -ENODEV;
-	if (!try_module_get(THIS_MODULE))
-		goto err_unlock;
-
-	/* Exit early if a datapath with that number already exists.
-	 * (We don't use -EEXIST because that's ambiguous with 'devname'
-	 * conflicting with an existing network device name.) */
-	err = -EBUSY;
-	if (get_dp(dp_idx))
-		goto err_put_module;
-
-	err = -ENOMEM;
-	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
-	if (dp == NULL)
-		goto err_put_module;
-	INIT_LIST_HEAD(&dp->port_list);
-	mutex_init(&dp->mutex);
-	mutex_lock(&dp->mutex);
-	dp->dp_idx = dp_idx;
-	for (i = 0; i < DP_N_QUEUES; i++)
-		skb_queue_head_init(&dp->queues[i]);
-	init_waitqueue_head(&dp->waitqueue);
-
-	/* Initialize kobject for bridge.  This will be added as
-	 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
-	dp->ifobj.kset = NULL;
-	kobject_init(&dp->ifobj, &dp_ktype);
-
-	/* Allocate table. */
-	err = -ENOMEM;
-	rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
-	if (!dp->table)
-		goto err_free_dp;
-
-	/* Set up our datapath device. */
-	parms.name = devname;
-	parms.type = ODP_VPORT_TYPE_INTERNAL;
-	parms.options = NULL;
-	parms.dp = dp;
-	parms.port_no = ODPP_LOCAL;
-	vport = new_vport(&parms);
-	if (IS_ERR(vport)) {
-		err = PTR_ERR(vport);
-		if (err == -EBUSY)
-			err = -EEXIST;
-
-		goto err_destroy_table;
-	}
-
-	dp->drop_frags = 0;
-	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
-	if (!dp->stats_percpu) {
-		err = -ENOMEM;
-		goto err_destroy_local_port;
-	}
-
-	rcu_assign_pointer(dps[dp_idx], dp);
-	dp_sysfs_add_dp(dp);
-
-	mutex_unlock(&dp->mutex);
-	mutex_unlock(&dp_mutex);
-	rtnl_unlock();
-
-	return 0;
-
-err_destroy_local_port:
-	dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
-err_destroy_table:
-	tbl_destroy(get_table_protected(dp), NULL);
-err_free_dp:
-	mutex_unlock(&dp->mutex);
-	kfree(dp);
-err_put_module:
-	module_put(THIS_MODULE);
-err_unlock:
-	mutex_unlock(&dp_mutex);
-	rtnl_unlock();
-err:
-	return err;
-}
-
 static void destroy_dp_rcu(struct rcu_head *rcu)
 {
 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
@@ -324,22 +219,11 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 	kobject_put(&dp->ifobj);
 }
 
-static int destroy_dp(int dp_idx)
+/* Caller must hold RTNL, dp_mutex, and dp->mutex. */
+static void destroy_dp(struct datapath *dp)
 {
-	struct datapath *dp;
-	int err = 0;
 	struct vport *p, *n;
 
-	rtnl_lock();
-	mutex_lock(&dp_mutex);
-	dp = get_dp(dp_idx);
-	if (!dp) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	mutex_lock(&dp->mutex);
-
 	list_for_each_entry_safe (p, n, &dp->port_list, node)
 		if (p->port_no != ODPP_LOCAL)
 			dp_detach_port(p);
@@ -351,11 +235,6 @@ static int destroy_dp(int dp_idx)
 	mutex_unlock(&dp->mutex);
 	call_rcu(&dp->rcu, destroy_dp_rcu);
 	module_put(THIS_MODULE);
-
-out:
-	mutex_unlock(&dp_mutex);
-	rtnl_unlock();
-	return err;
 }
 
 /* Called with RTNL lock and dp->mutex. */
@@ -745,54 +624,25 @@ static int validate_actions(const struct nlattr *actions, u32 actions_len)
 	return 0;
 }
 
-static struct sw_flow_actions *get_actions(const struct odp_flow *flow)
+struct dp_flowcmd {
+	u32 nlmsg_flags;
+	u32 dp_idx;
+	u32 total_len;
+	struct sw_flow_key key;
+	const struct nlattr *actions;
+	u32 actions_len;
+	bool clear;
+	u64 state;
+};
+
+static struct sw_flow_actions *get_actions(const struct dp_flowcmd *flowcmd)
 {
 	struct sw_flow_actions *actions;
-	int error;
-
-	actions = flow_actions_alloc(flow->actions_len);
-	error = PTR_ERR(actions);
-	if (IS_ERR(actions))
-		goto error;
-
-	error = -EFAULT;
-	if (copy_from_user(actions->actions,
-			   (struct nlattr __user __force *)flow->actions,
-			   flow->actions_len))
-		goto error_free_actions;
-	error = validate_actions(actions->actions, actions->actions_len);
-	if (error)
-		goto error_free_actions;
 
+	actions = flow_actions_alloc(flowcmd->actions_len);
+	if (!IS_ERR(actions) && flowcmd->actions_len)
+		memcpy(actions->actions, flowcmd->actions, flowcmd->actions_len);
 	return actions;
-
-error_free_actions:
-	kfree(actions);
-error:
-	return ERR_PTR(error);
-}
-
-static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
-{
-	if (flow->used) {
-		struct timespec offset_ts, used, now_mono;
-
-		ktime_get_ts(&now_mono);
-		jiffies_to_timespec(jiffies - flow->used, &offset_ts);
-		set_normalized_timespec(&used, now_mono.tv_sec - offset_ts.tv_sec,
-					now_mono.tv_nsec - offset_ts.tv_nsec);
-
-		stats->used_sec = used.tv_sec;
-		stats->used_nsec = used.tv_nsec;
-	} else {
-		stats->used_sec = 0;
-		stats->used_nsec = 0;
-	}
-
-	stats->n_packets = flow->packet_count;
-	stats->n_bytes = flow->byte_count;
-	stats->reserved = 0;
-	stats->tcp_flags = flow->tcp_flags;
 }
 
 static void clear_stats(struct sw_flow *flow)
@@ -815,307 +665,7 @@ static int expand_table(struct datapath *dp)
 	rcu_assign_pointer(dp->table, new_table);
 	tbl_deferred_destroy(old_table, NULL);
 
-	return 0;
-}
-
-static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
-		       struct odp_flow_stats *stats)
-{
-	struct tbl_node *flow_node;
-	struct sw_flow_key key;
-	struct sw_flow *flow;
-	struct tbl *table;
-	struct sw_flow_actions *acts = NULL;
-	int error;
-	u32 hash;
-
-	error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf->flow.key,
-				    uf->flow.key_len);
-	if (error)
-		return error;
-
-	hash = flow_hash(&key);
-	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, hash, flow_cmp);
-	if (!flow_node) {
-		/* No such flow. */
-		error = -ENOENT;
-		if (!(uf->flags & ODPPF_CREATE))
-			goto error;
-
-		/* Expand table, if necessary, to make room. */
-		if (tbl_count(table) >= tbl_n_buckets(table)) {
-			error = expand_table(dp);
-			if (error)
-				goto error;
-			table = get_table_protected(dp);
-		}
-
-		/* Allocate flow. */
-		flow = flow_alloc();
-		if (IS_ERR(flow)) {
-			error = PTR_ERR(flow);
-			goto error;
-		}
-		flow->key = key;
-		clear_stats(flow);
-
-		/* Obtain actions. */
-		acts = get_actions(&uf->flow);
-		error = PTR_ERR(acts);
-		if (IS_ERR(acts))
-			goto error_free_flow;
-		rcu_assign_pointer(flow->sf_acts, acts);
-
-		/* Put flow in bucket. */
-		error = tbl_insert(table, &flow->tbl_node, hash);
-		if (error)
-			goto error_free_flow_acts;
-
-		memset(stats, 0, sizeof(struct odp_flow_stats));
-	} else {
-		/* We found a matching flow. */
-		struct sw_flow_actions *old_acts, *new_acts;
-
-		flow = flow_cast(flow_node);
-
-		/* Bail out if we're not allowed to modify an existing flow. */
-		error = -EEXIST;
-		if (!(uf->flags & ODPPF_MODIFY))
-			goto error;
-
-		/* Swap actions. */
-		new_acts = get_actions(&uf->flow);
-		error = PTR_ERR(new_acts);
-		if (IS_ERR(new_acts))
-			goto error;
-
-		old_acts = rcu_dereference_protected(flow->sf_acts,
-						     lockdep_is_held(&dp->mutex));
-		if (old_acts->actions_len != new_acts->actions_len ||
-		    memcmp(old_acts->actions, new_acts->actions,
-			   old_acts->actions_len)) {
-			rcu_assign_pointer(flow->sf_acts, new_acts);
-			flow_deferred_free_acts(old_acts);
-		} else {
-			kfree(new_acts);
-		}
-
-		/* Fetch stats, then clear them if necessary. */
-		spin_lock_bh(&flow->lock);
-		get_stats(flow, stats);
-		if (uf->flags & ODPPF_ZERO_STATS)
-			clear_stats(flow);
-		spin_unlock_bh(&flow->lock);
-	}
-
-	return 0;
-
-error_free_flow_acts:
-	kfree(acts);
-error_free_flow:
-	flow->sf_acts = NULL;
-	flow_put(flow);
-error:
-	return error;
-}
-
-static int put_flow(struct odp_flow_put __user *ufp)
-{
-	struct odp_flow_stats stats;
-	struct odp_flow_put uf;
-	struct datapath *dp;
-	int error;
-
-	if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put)))
-		return -EFAULT;
-
-	dp = get_dp_locked(uf.flow.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	error = do_put_flow(dp, &uf, &stats);
-	if (!error) {
-		if (copy_to_user(&ufp->flow.stats, &stats,
-				 sizeof(struct odp_flow_stats)))
-			error = -EFAULT;
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-static int do_answer_query(struct datapath *dp, struct sw_flow *flow,
-			   struct odp_flow_stats __user *ustats,
-			   struct nlattr __user *actions,
-			   u32 __user *actions_lenp)
-{
-	struct sw_flow_actions *sf_acts;
-	struct odp_flow_stats stats;
-	u32 actions_len;
-
-	spin_lock_bh(&flow->lock);
-	get_stats(flow, &stats);
-	spin_unlock_bh(&flow->lock);
-
-	if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) ||
-	    get_user(actions_len, actions_lenp))
-		return -EFAULT;
-
-	if (!actions_len)
-		return 0;
-
-	sf_acts = rcu_dereference_protected(flow->sf_acts,
-					    lockdep_is_held(&dp->mutex));
-	if (put_user(sf_acts->actions_len, actions_lenp) ||
-	    (actions && copy_to_user(actions, sf_acts->actions,
-				     min(sf_acts->actions_len, actions_len))))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int answer_query(struct datapath *dp, struct sw_flow *flow,
-			struct odp_flow __user *ufp)
-{
-	struct nlattr __user *actions;
-
-	if (get_user(actions, (struct nlattr __user * __user *)&ufp->actions))
-		return -EFAULT;
-
-	return do_answer_query(dp, flow, &ufp->stats, actions, &ufp->actions_len);
-}
-
-static struct sw_flow *do_del_flow(struct datapath *dp, const struct nlattr __user *key, u32 key_len)
-{
-	struct tbl *table = get_table_protected(dp);
-	struct tbl_node *flow_node;
-	struct sw_flow_key swkey;
-	int error;
-
-	error = flow_copy_from_user(&swkey, key, key_len);
-	if (error)
-		return ERR_PTR(error);
-
-	flow_node = tbl_lookup(table, &swkey, flow_hash(&swkey), flow_cmp);
-	if (!flow_node)
-		return ERR_PTR(-ENOENT);
-
-	error = tbl_remove(table, flow_node);
-	if (error)
-		return ERR_PTR(error);
-
-	/* XXX Returned flow_node's statistics might lose a few packets, since
-	 * other CPUs can be using this flow.  We used to synchronize_rcu() to
-	 * make sure that we get completely accurate stats, but that blows our
-	 * performance, badly. */
-	return flow_cast(flow_node);
-}
-
-static int del_flow(struct odp_flow __user *ufp)
-{
-	struct sw_flow *flow;
-	struct datapath *dp;
-	struct odp_flow uf;
-	int error;
-
-	if (copy_from_user(&uf, ufp, sizeof(uf)))
-		return -EFAULT;
-
-	dp = get_dp_locked(uf.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len);
-	error = PTR_ERR(flow);
-	if (!IS_ERR(flow)) {
-		error = answer_query(dp, flow, ufp);
-		flow_deferred_free(flow);
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-static int query_flow(struct odp_flow __user *uflow)
-{
-	struct tbl_node *flow_node;
-	struct sw_flow_key key;
-	struct odp_flow flow;
-	struct datapath *dp;
-	int error;
-
-	if (copy_from_user(&flow, uflow, sizeof(flow)))
-		return -EFAULT;
-
-	dp = get_dp_locked(flow.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	error = flow_copy_from_user(&key, (const struct nlattr __force __user *)flow.key, flow.key_len);
-	if (!error) {
-		struct tbl *table = get_table_protected(dp);
-		flow_node = tbl_lookup(table, &flow.key, flow_hash(&key), flow_cmp);
-		if (flow_node)
-			error = answer_query(dp, flow_cast(flow_node), uflow);
-		else
-			error = -ENOENT;
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-static int dump_flow(struct odp_flow_dump __user *udump)
-{
-	struct odp_flow __user *uflow;
-	struct nlattr __user *ukey;
-	struct tbl_node *tbl_node;
-	struct odp_flow_dump dump;
-	struct sw_flow *flow;
-	struct datapath *dp;
-	struct tbl *table;
-	u32 key_len;
-	int err;
-
-	err = -EFAULT;
-	if (copy_from_user(&dump, udump, sizeof(struct odp_flow_dump)))
-		goto exit;
-	uflow = (struct odp_flow __user __force *)dump.flow;
-
-	dp = get_dp_locked(dump.dp_idx);
-	err = -ENODEV;
-	if (!dp)
-		goto exit;
-
-	table = get_table_protected(dp);
-	tbl_node = tbl_next(table, &dump.state[0], &dump.state[1]);
-	if (!tbl_node) {
-		err = put_user(0, &uflow->key_len);
-		goto exit_unlock;
-	}
-	flow = flow_cast(tbl_node);
-
-	err = -EFAULT;
-	if (copy_to_user(udump->state, dump.state, 2 * sizeof(uint32_t)) ||
-	    get_user(ukey, (struct nlattr __user * __user*)&uflow->key) ||
-	    get_user(key_len, &uflow->key_len))
-		goto exit_unlock;
-
-	key_len = flow_copy_to_user(ukey, &flow->key, key_len);
-	err = key_len;
-	if (key_len < 0)
-		goto exit_unlock;
-	err = -EFAULT;
-	if (put_user(key_len, &uflow->key_len))
-		goto exit_unlock;
-
-	err = answer_query(dp, flow, uflow);
-
-exit_unlock:
-	mutex_unlock(&dp->mutex);
-exit:
-	return err;
+ 	return 0;
 }
 
 static int do_execute(struct datapath *dp, const struct odp_execute *execute)
@@ -1205,12 +755,11 @@ static int execute_packet(const struct odp_execute __user *executep)
 	return error;
 }
 
-static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
+static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
 {
-	struct odp_stats stats;
 	int i;
 
-	stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
+	stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
 	for_each_possible_cpu(i) {
 		const struct dp_stats_percpu *percpu_stats;
 		struct dp_stats_percpu local_stats;
@@ -1223,12 +772,11 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
 			local_stats = *percpu_stats;
 		} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
 
-		stats.n_frags += local_stats.n_frags;
-		stats.n_hit += local_stats.n_hit;
-		stats.n_missed += local_stats.n_missed;
-		stats.n_lost += local_stats.n_lost;
+		stats->n_frags += local_stats.n_frags;
+		stats->n_hit += local_stats.n_hit;
+		stats->n_missed += local_stats.n_missed;
+		stats->n_lost += local_stats.n_lost;
 	}
-	return copy_to_user(statsp, &stats, sizeof(stats)) ? -EFAULT : 0;
 }
 
 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */
@@ -1282,6 +830,749 @@ static void set_listen_mask(struct file *f, int listen_mask)
 	f->private_data = (void*)(long)listen_mask;
 }
 
+static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
+	[ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[ODP_FLOW_ATTR_STATE] = { .type = NLA_U64 },
+};
+
+static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp,
+			     struct sw_flow *flow, u32 total_len, u64 state)
+{
+	const struct sw_flow_actions *sf_acts;
+	struct odp_flow_stats stats;
+	struct odp_flow *odp_flow;
+	struct sk_buff *skb;
+	struct nlattr *nla;
+	unsigned long used;
+	u8 tcp_flags;
+	int err;
+
+	sf_acts = rcu_dereference_protected(flow->sf_acts,
+					    lockdep_is_held(&dp->mutex));
+
+	skb = alloc_skb(128 + FLOW_BUFSIZE + sf_acts->actions_len, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!skb)
+		goto exit;
+
+	rcu_read_lock();
+	odp_flow = (struct odp_flow*)__skb_put(skb, sizeof(struct odp_flow));
+	odp_flow->dp_idx = dp->dp_idx;
+	odp_flow->total_len = total_len;
+
+	nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
+	if (!nla)
+		goto nla_put_failure;
+	err = flow_to_nlattrs(&flow->key, skb);
+	if (err)
+		goto exit_unlock;
+	nla_nest_end(skb, nla);
+
+	nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
+	if (!nla || skb_tailroom(skb) < sf_acts->actions_len)
+		goto nla_put_failure;
+	memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
+	nla_nest_end(skb, nla);
+
+	spin_lock_bh(&flow->lock);
+	used = flow->used;
+	stats.n_packets = flow->packet_count;
+	stats.n_bytes = flow->byte_count;
+	tcp_flags = flow->tcp_flags;
+	spin_unlock_bh(&flow->lock);
+
+	if (used)
+		NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
+
+	if (stats.n_packets)
+		NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
+
+	if (tcp_flags)
+		NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
+
+	if (state)
+		NLA_PUT_U64(skb, ODP_FLOW_ATTR_STATE, state);
+
+	if (skb->len > total_len)
+		goto nla_put_failure;
+
+	odp_flow->len = skb->len;
+	err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0;
+	goto exit_unlock;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+exit_unlock:
+	rcu_read_unlock();
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
+static struct sk_buff *copy_flow_from_user(struct odp_flow __user *uodp_flow,
+					   struct dp_flowcmd *flowcmd)
+{
+	struct nlattr *a[ODP_FLOW_ATTR_MAX + 1];
+	struct odp_flow *odp_flow;
+	struct sk_buff *skb;
+	u32 len;
+	int err;
+
+	if (get_user(len, &uodp_flow->len))
+		return ERR_PTR(-EFAULT);
+	if (len < sizeof(struct odp_flow))
+		return ERR_PTR(-EINVAL);
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	err = -EFAULT;
+	if (copy_from_user(__skb_put(skb, len), uodp_flow, len))
+		goto error_free_skb;
+
+	odp_flow = (struct odp_flow *)skb->data;
+	err = -EINVAL;
+	if (odp_flow->len != len)
+		goto error_free_skb;
+
+	flowcmd->nlmsg_flags = odp_flow->nlmsg_flags;
+	flowcmd->dp_idx = odp_flow->dp_idx;
+	flowcmd->total_len = odp_flow->total_len;
+
+	err = nla_parse(a, ODP_FLOW_ATTR_MAX,
+			(struct nlattr *)(skb->data + sizeof(struct odp_flow)),
+			skb->len - sizeof(struct odp_flow), flow_policy);
+	if (err)
+		goto error_free_skb;
+
+	/* ODP_FLOW_ATTR_KEY. */
+	if (a[ODP_FLOW_ATTR_KEY]) {
+		err = flow_from_nlattrs(&flowcmd->key, a[ODP_FLOW_ATTR_KEY]);
+		if (err)
+			goto error_free_skb;
+	} else
+		memset(&flowcmd->key, 0, sizeof(struct sw_flow_key));
+
+	/* ODP_FLOW_ATTR_ACTIONS. */
+	if (a[ODP_FLOW_ATTR_ACTIONS]) {
+		flowcmd->actions = nla_data(a[ODP_FLOW_ATTR_ACTIONS]);
+		flowcmd->actions_len = nla_len(a[ODP_FLOW_ATTR_ACTIONS]);
+		err = validate_actions(flowcmd->actions, flowcmd->actions_len);
+		if (err)
+			goto error_free_skb;
+	} else {
+		flowcmd->actions = NULL;
+		flowcmd->actions_len = 0;
+	}
+
+	flowcmd->clear = a[ODP_FLOW_ATTR_CLEAR] != NULL;
+
+	flowcmd->state = a[ODP_FLOW_ATTR_STATE] ? nla_get_u64(a[ODP_FLOW_ATTR_STATE]) : 0;
+
+	return skb;
+
+error_free_skb:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
+static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
+{
+	struct tbl_node *flow_node;
+	struct dp_flowcmd flowcmd;
+	struct sw_flow *flow;
+	struct sk_buff *skb;
+	struct datapath *dp;
+	struct tbl *table;
+	u32 hash;
+	int error;
+
+	skb = copy_flow_from_user(uodp_flow, &flowcmd);
+	error = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+
+	dp = get_dp_locked(flowcmd.dp_idx);
+	error = -ENODEV;
+	if (!dp)
+		goto error_kfree_skb;
+
+	hash = flow_hash(&flowcmd.key);
+	table = get_table_protected(dp);
+	flow_node = tbl_lookup(table, &flowcmd.key, hash, flow_cmp);
+	if (!flow_node) {
+		struct sw_flow_actions *acts;
+
+		/* Bail out if we're not allowed to create a new flow. */
+		error = -ENOENT;
+		if (cmd == ODP_FLOW_SET)
+			goto error_unlock_dp;
+
+		/* Expand table, if necessary, to make room. */
+		if (tbl_count(table) >= tbl_n_buckets(table)) {
+			error = expand_table(dp);
+			if (error)
+				goto error_unlock_dp;
+			table = get_table_protected(dp);
+		}
+
+		/* Allocate flow. */
+		flow = flow_alloc();
+		if (IS_ERR(flow)) {
+			error = PTR_ERR(flow);
+			goto error_unlock_dp;
+		}
+		flow->key = flowcmd.key;
+		clear_stats(flow);
+
+		/* Obtain actions. */
+		acts = get_actions(&flowcmd);
+		error = PTR_ERR(acts);
+		if (IS_ERR(acts))
+			goto error_free_flow;
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
+		if (error)
+			goto error_free_flow;
+
+		/* Put flow in bucket. */
+		error = tbl_insert(table, &flow->tbl_node, hash);
+		if (error)
+			goto error_free_flow;
+	} else {
+		/* We found a matching flow. */
+		struct sw_flow_actions *old_acts;
+
+		/* Bail out if we're not allowed to modify an existing flow.
+		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+		 * because Generic Netlink treats the latter as a dump
+		 * request.  We also accept NLM_F_EXCL in case that bug ever
+		 * gets fixed.
+		 */
+		error = -EEXIST;
+		if (flowcmd.nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+			goto error_kfree_skb;
+
+		/* Update actions. */
+		flow = flow_cast(flow_node);
+		old_acts = rcu_dereference_protected(flow->sf_acts,
+						     lockdep_is_held(&dp->mutex));
+		if (flowcmd.actions &&
+		    (old_acts->actions_len != flowcmd.actions_len ||
+		     memcmp(old_acts->actions, flowcmd.actions,
+			    flowcmd.actions_len))) {
+			struct sw_flow_actions *new_acts;
+
+			new_acts = get_actions(&flowcmd);
+			error = PTR_ERR(new_acts);
+			if (IS_ERR(new_acts))
+				goto error_kfree_skb;
+
+			rcu_assign_pointer(flow->sf_acts, new_acts);
+			flow_deferred_free_acts(old_acts);
+		}
+
+		error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
+		if (error)
+			goto error_kfree_skb;
+
+		/* Clear stats. */
+		if (flowcmd.clear) {
+			spin_lock_bh(&flow->lock);
+			clear_stats(flow);
+			spin_unlock_bh(&flow->lock);
+		}
+	}
+	kfree_skb(skb);
+	mutex_unlock(&dp->mutex);
+	return 0;
+
+error_free_flow:
+	flow_put(flow);
+error_unlock_dp:
+	mutex_unlock(&dp->mutex);
+error_kfree_skb:
+	kfree_skb(skb);
+exit:
+	return error;
+}
+
+static int get_or_del_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
+{
+	struct tbl_node *flow_node;
+	struct dp_flowcmd flowcmd;
+	struct sw_flow *flow;
+	struct sk_buff *skb;
+	struct datapath *dp;
+	struct tbl *table;
+	int err;
+
+	skb = copy_flow_from_user(uodp_flow, &flowcmd);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+
+	dp = get_dp_locked(flowcmd.dp_idx);
+	err = -ENODEV;
+	if (!dp)
+		goto exit_kfree_skb;
+
+	table = get_table_protected(dp);
+	flow_node = tbl_lookup(table, &flowcmd.key, flow_hash(&flowcmd.key), flow_cmp);
+	err = -ENOENT;
+	if (!flow_node)
+		goto exit_unlock_dp;
+
+	if (cmd == ODP_FLOW_DEL) {
+		err = tbl_remove(table, flow_node);
+		if (err)
+			goto exit_unlock_dp;
+	}
+
+	flow = flow_cast(flow_node);
+	err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
+	if (!err && cmd == ODP_FLOW_DEL)
+		flow_deferred_free(flow);
+
+exit_unlock_dp:
+	mutex_unlock(&dp->mutex);
+exit_kfree_skb:
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
+static int dump_flow(struct odp_flow __user *uodp_flow)
+{
+	struct tbl_node *flow_node;
+	struct dp_flowcmd flowcmd;
+	struct sw_flow *flow;
+	struct sk_buff *skb;
+	struct datapath *dp;
+	u32 bucket, obj;
+	int err;
+
+	skb = copy_flow_from_user(uodp_flow, &flowcmd);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+
+	dp = get_dp_locked(flowcmd.dp_idx);
+	err = -ENODEV;
+	if (!dp)
+		goto exit_free;
+
+	bucket = flowcmd.state >> 32;
+	obj = flowcmd.state;
+	flow_node = tbl_next(dp->table, &bucket, &obj);
+	err = -ENODEV;
+	if (!flow_node)
+		goto exit_unlock_dp;
+
+	flow = flow_cast(flow_node);
+	err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len,
+				((u64)bucket << 32) | obj);
+
+exit_unlock_dp:
+	mutex_unlock(&dp->mutex);
+exit_free:
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
+static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
+	[ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
+	[ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
+};
+
+static int copy_datapath_to_user(void __user *dst, struct datapath *dp, uint32_t total_len)
+{
+	struct odp_datapath *odp_datapath;
+	struct sk_buff *skb;
+	struct nlattr *nla;
+	int err;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!skb)
+		goto exit;
+
+	odp_datapath = (struct odp_datapath*)__skb_put(skb, sizeof(struct odp_datapath));
+	odp_datapath->dp_idx = dp->dp_idx;
+	odp_datapath->total_len = total_len;
+
+	rcu_read_lock();
+	err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
+	rcu_read_unlock();
+	if (err)
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
+	if (!nla)
+		goto nla_put_failure;
+	get_dp_stats(dp, nla_data(nla));
+
+	NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
+		    dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
+
+	if (dp->sflow_probability)
+		NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
+
+	if (skb->len > total_len)
+		goto nla_put_failure;
+
+	odp_datapath->len = skb->len;
+	err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0;
+	goto exit_free_skb;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+exit_free_skb:
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
+static struct sk_buff *copy_datapath_from_user(struct odp_datapath __user *uodp_datapath, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+	struct odp_datapath *odp_datapath;
+	struct sk_buff *skb;
+	u32 len;
+	int err;
+
+	if (get_user(len, &uodp_datapath->len))
+		return ERR_PTR(-EFAULT);
+	if (len < sizeof(struct odp_datapath))
+		return ERR_PTR(-EINVAL);
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	err = -EFAULT;
+	if (copy_from_user(__skb_put(skb, len), uodp_datapath, len))
+		goto error_free_skb;
+
+	odp_datapath = (struct odp_datapath *)skb->data;
+	err = -EINVAL;
+	if (odp_datapath->len != len)
+		goto error_free_skb;
+
+	err = nla_parse(a, ODP_DP_ATTR_MAX,
+			(struct nlattr *)(skb->data + sizeof(struct odp_datapath)),
+			skb->len - sizeof(struct odp_datapath), datapath_policy);
+	if (err)
+		goto error_free_skb;
+
+	if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
+		u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
+
+		err = -EINVAL;
+		if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
+			goto error_free_skb;
+	}
+
+	err = VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
+	if (err)
+		goto error_free_skb;
+
+	return skb;
+
+error_free_skb:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
+/* Called with dp_mutex and optionally with RTNL lock also.
+ * Holds the returned datapath's mutex on return.
+ */
+static struct datapath *lookup_datapath(struct odp_datapath *odp_datapath, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+	WARN_ON_ONCE(!mutex_is_locked(&dp_mutex));
+
+	if (!a[ODP_DP_ATTR_NAME]) {
+		struct datapath *dp;
+
+		dp = get_dp(odp_datapath->dp_idx);
+		if (!dp)
+			return ERR_PTR(-ENODEV);
+		mutex_lock(&dp->mutex);
+		return dp;
+	} else {
+		struct datapath *dp;
+		struct vport *vport;
+		int dp_idx;
+
+		vport_lock();
+		vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
+		dp_idx = vport && vport->port_no == ODPP_LOCAL ? vport->dp->dp_idx : -1;
+		vport_unlock();
+
+		if (dp_idx < 0)
+			return ERR_PTR(-ENODEV);
+
+		dp = get_dp(dp_idx);
+		mutex_lock(&dp->mutex);
+		return dp;
+	}
+}
+
+static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+	if (a[ODP_DP_ATTR_IPV4_FRAGS])
+		dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
+	if (a[ODP_DP_ATTR_SAMPLING])
+		dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
+}
+
+static int new_datapath(struct odp_datapath __user *uodp_datapath)
+{
+	struct nlattr *a[ODP_DP_ATTR_MAX + 1];
+	struct odp_datapath *odp_datapath;
+	struct vport_parms parms;
+	struct sk_buff *skb;
+	struct datapath *dp;
+	struct vport *vport;
+	int dp_idx;
+	int err;
+	int i;
+
+	skb = copy_datapath_from_user(uodp_datapath, a);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto err;
+	odp_datapath = (struct odp_datapath *)skb->data;
+
+	err = -EINVAL;
+	if (!a[ODP_DP_ATTR_NAME])
+		goto err_free_skb;
+
+	rtnl_lock();
+	mutex_lock(&dp_mutex);
+	err = -ENODEV;
+	if (!try_module_get(THIS_MODULE))
+		goto err_unlock_dp_mutex;
+
+	dp_idx = odp_datapath->dp_idx;
+	if (dp_idx < 0) {
+		err = -EFBIG;
+		for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
+			if (get_dp(dp_idx))
+				continue;
+			err = 0;
+			break;
+		}
+	} else if (dp_idx < ARRAY_SIZE(dps))
+		err = get_dp(dp_idx) ? -EBUSY : 0;
+	else
+		err = -EINVAL;
+	if (err)
+		goto err_put_module;
+
+	err = -ENOMEM;
+	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		goto err_put_module;
+	INIT_LIST_HEAD(&dp->port_list);
+	mutex_init(&dp->mutex);
+	mutex_lock(&dp->mutex);
+	dp->dp_idx = dp_idx;
+	for (i = 0; i < DP_N_QUEUES; i++)
+		skb_queue_head_init(&dp->queues[i]);
+	init_waitqueue_head(&dp->waitqueue);
+
+	/* Initialize kobject for bridge.  This will be added as
+	 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
+	dp->ifobj.kset = NULL;
+	kobject_init(&dp->ifobj, &dp_ktype);
+
+	/* Allocate table. */
+	err = -ENOMEM;
+	rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
+	if (!dp->table)
+		goto err_free_dp;
+
+	/* Set up our datapath device. */
+	parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
+	parms.type = ODP_VPORT_TYPE_INTERNAL;
+	parms.options = NULL;
+	parms.dp = dp;
+	parms.port_no = ODPP_LOCAL;
+	vport = new_vport(&parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		if (err == -EBUSY)
+			err = -EEXIST;
+
+		goto err_destroy_table;
+	}
+
+	dp->drop_frags = 0;
+	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+	if (!dp->stats_percpu) {
+		err = -ENOMEM;
+		goto err_destroy_local_port;
+	}
+
+	change_datapath(dp, a);
+
+	rcu_assign_pointer(dps[dp_idx], dp);
+	dp_sysfs_add_dp(dp);
+
+	mutex_unlock(&dp->mutex);
+	mutex_unlock(&dp_mutex);
+	rtnl_unlock();
+
+	return 0;
+
+err_destroy_local_port:
+	dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
+err_destroy_table:
+	tbl_destroy(get_table_protected(dp), NULL);
+err_free_dp:
+	mutex_unlock(&dp->mutex);
+	kfree(dp);
+err_put_module:
+	module_put(THIS_MODULE);
+err_unlock_dp_mutex:
+	mutex_unlock(&dp_mutex);
+	rtnl_unlock();
+err_free_skb:
+	kfree_skb(skb);
+err:
+	return err;
+}
+
+static int del_datapath(struct odp_datapath __user *uodp_datapath)
+{
+	struct nlattr *a[ODP_DP_ATTR_MAX + 1];
+	struct datapath *dp;
+	struct sk_buff *skb;
+	int err;
+
+	skb = copy_datapath_from_user(uodp_datapath, a);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+
+	rtnl_lock();
+	mutex_lock(&dp_mutex);
+	dp = lookup_datapath((struct odp_datapath *)skb->data, a);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto exit_free;
+
+	destroy_dp(dp);
+	err = 0;
+
+exit_free:
+	kfree_skb(skb);
+	mutex_unlock(&dp_mutex);
+	rtnl_unlock();
+exit:
+	return err;
+}
+
+static int set_datapath(struct odp_datapath __user *uodp_datapath)
+{
+	struct nlattr *a[ODP_DP_ATTR_MAX + 1];
+	struct datapath *dp;
+	struct sk_buff *skb;
+	int err;
+
+	skb = copy_datapath_from_user(uodp_datapath, a);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+
+	mutex_lock(&dp_mutex);
+	dp = lookup_datapath((struct odp_datapath *)skb->data, a);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto exit_free;
+
+	change_datapath(dp, a);
+	mutex_unlock(&dp->mutex);
+	err = 0;
+
+exit_free:
+	kfree_skb(skb);
+	mutex_unlock(&dp_mutex);
+exit:
+	return err;
+}
+
+static int get_datapath(struct odp_datapath __user *uodp_datapath)
+{
+	struct nlattr *a[ODP_DP_ATTR_MAX + 1];
+	struct odp_datapath *odp_datapath;
+	struct datapath *dp;
+	struct sk_buff *skb;
+	int err;
+
+	skb = copy_datapath_from_user(uodp_datapath, a);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+	odp_datapath = (struct odp_datapath *)skb->data;
+
+	mutex_lock(&dp_mutex);
+	dp = lookup_datapath(odp_datapath, a);
+	mutex_unlock(&dp_mutex);
+
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto exit_free;
+
+	err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len);
+	mutex_unlock(&dp->mutex);
+exit_free:
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
+static int dump_datapath(struct odp_datapath __user *uodp_datapath)
+{
+	struct nlattr *a[ODP_DP_ATTR_MAX + 1];
+	struct odp_datapath *odp_datapath;
+	struct sk_buff *skb;
+	u32 dp_idx;
+	int err;
+
+	skb = copy_datapath_from_user(uodp_datapath, a);
+	err = PTR_ERR(skb);
+	if (IS_ERR(skb))
+		goto exit;
+	odp_datapath = (struct odp_datapath *)skb->data;
+
+	mutex_lock(&dp_mutex);
+	for (dp_idx = odp_datapath->dp_idx; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
+		struct datapath *dp = get_dp(dp_idx);
+		if (!dp)
+			continue;
+
+		mutex_lock(&dp->mutex);
+		mutex_unlock(&dp_mutex);
+		err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len);
+		mutex_unlock(&dp->mutex);
+		goto exit_free;
+	}
+	mutex_unlock(&dp_mutex);
+	err = -ENODEV;
+
+exit_free:
+	kfree_skb(skb);
+exit:
+	return err;
+}
+
 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
 	[ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
 	[ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
@@ -1681,18 +1972,29 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 {
 	int dp_idx = iminor(f->f_dentry->d_inode);
 	struct datapath *dp;
-	int drop_frags, listeners;
-	unsigned int sflow_probability;
+	int listeners;
 	int err;
 
 	/* Handle commands with special locking requirements up front. */
 	switch (cmd) {
-	case ODP_DP_CREATE:
-		err = create_dp(dp_idx, (char __user *)argp);
+	case ODP_DP_NEW:
+		err = new_datapath((struct odp_datapath __user *)argp);
+		goto exit;
+
+	case ODP_DP_GET:
+		err = get_datapath((struct odp_datapath __user *)argp);
+		goto exit;
+
+	case ODP_DP_DEL:
+		err = del_datapath((struct odp_datapath __user *)argp);
 		goto exit;
 
-	case ODP_DP_DESTROY:
-		err = destroy_dp(dp_idx);
+	case ODP_DP_SET:
+		err = set_datapath((struct odp_datapath __user *)argp);
+		goto exit;
+
+	case ODP_DP_DUMP:
+		err = dump_datapath((struct odp_datapath __user *)argp);
 		goto exit;
 
 	case ODP_VPORT_NEW:
@@ -1719,20 +2021,18 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 		err = flush_flows(argp);
 		goto exit;
 
-	case ODP_FLOW_PUT:
-		err = put_flow((struct odp_flow_put __user *)argp);
-		goto exit;
-
-	case ODP_FLOW_DEL:
-		err = del_flow((struct odp_flow __user *)argp);
+	case ODP_FLOW_NEW:
+	case ODP_FLOW_SET:
+		err = new_flow(cmd, (struct odp_flow __user *)argp);
 		goto exit;
 
 	case ODP_FLOW_GET:
-		err = query_flow((struct odp_flow __user *)argp);
+	case ODP_FLOW_DEL:
+		err = get_or_del_flow(cmd, (struct odp_flow __user *)argp);
 		goto exit;
 
 	case ODP_FLOW_DUMP:
-		err = dump_flow((struct odp_flow_dump __user *)argp);
+		err = dump_flow((struct odp_flow __user *)argp);
 		goto exit;
 
 	case ODP_EXECUTE:
@@ -1746,25 +2046,6 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 		goto exit;
 
 	switch (cmd) {
-	case ODP_DP_STATS:
-		err = get_dp_stats(dp, (struct odp_stats __user *)argp);
-		break;
-
-	case ODP_GET_DROP_FRAGS:
-		err = put_user(dp->drop_frags, (int __user *)argp);
-		break;
-
-	case ODP_SET_DROP_FRAGS:
-		err = get_user(drop_frags, (int __user *)argp);
-		if (err)
-			break;
-		err = -EINVAL;
-		if (drop_frags != 0 && drop_frags != 1)
-			break;
-		dp->drop_frags = drop_frags;
-		err = 0;
-		break;
-
 	case ODP_GET_LISTEN_MASK:
 		err = put_user(get_listen_mask(f), (int __user *)argp);
 		break;
@@ -1780,16 +2061,6 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 		set_listen_mask(f, listeners);
 		break;
 
-	case ODP_GET_SFLOW_PROBABILITY:
-		err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
-		break;
-
-	case ODP_SET_SFLOW_PROBABILITY:
-		err = get_user(sflow_probability, (unsigned int __user *)argp);
-		if (!err)
-			dp->sflow_probability = sflow_probability;
-		break;
-
 	default:
 		err = -ENOIOCTLCMD;
 		break;
@@ -1810,170 +2081,6 @@ static int dp_has_packet_of_interest(struct datapath *dp, int listeners)
 }
 
 #ifdef CONFIG_COMPAT
-static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
-{
-	compat_uptr_t key, actions;
-
-	if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
-	    __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
-	    __get_user(key, &compat->key) ||
-	    __get_user(flow->key_len, &compat->key_len) ||
-	    __get_user(actions, &compat->actions) ||
-	    __get_user(flow->actions_len, &compat->actions_len))
-		return -EFAULT;
-
-	flow->key = (struct nlattr __force *)compat_ptr(key);
-	flow->actions = (struct nlattr __force *)compat_ptr(actions);
-	return 0;
-}
-
-static int compat_put_flow(struct compat_odp_flow_put __user *ufp)
-{
-	struct odp_flow_stats stats;
-	struct odp_flow_put uf;
-	struct datapath *dp;
-	int error;
-
-	if (compat_get_flow(&uf.flow, &ufp->flow) ||
-	    get_user(uf.flags, &ufp->flags))
-		return -EFAULT;
-
-	dp = get_dp_locked(uf.flow.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	error = do_put_flow(dp, &uf, &stats);
-	if (!error) {
-		if (copy_to_user(&ufp->flow.stats, &stats,
-				 sizeof(struct odp_flow_stats)))
-			error = -EFAULT;
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-
-static int compat_answer_query(struct datapath *dp, struct sw_flow *flow,
-			       struct compat_odp_flow __user *ufp)
-{
-	compat_uptr_t actions;
-
-	if (get_user(actions, &ufp->actions))
-		return -EFAULT;
-
-	return do_answer_query(dp, flow, &ufp->stats,
-			       compat_ptr(actions), &ufp->actions_len);
-}
-
-static int compat_del_flow(struct compat_odp_flow __user *ufp)
-{
-	struct sw_flow *flow;
-	struct datapath *dp;
-	struct odp_flow uf;
-	int error;
-
-	if (compat_get_flow(&uf, ufp))
-		return -EFAULT;
-
-	dp = get_dp_locked(uf.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len);
-	error = PTR_ERR(flow);
-	if (!IS_ERR(flow)) {
-		error = compat_answer_query(dp, flow, ufp);
-		flow_deferred_free(flow);
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-static int compat_query_flow(struct compat_odp_flow __user *uflow)
-{
-	struct tbl_node *flow_node;
-	struct sw_flow_key key;
-	struct odp_flow flow;
-	struct datapath *dp;
-	int error;
-
-	if (compat_get_flow(&flow, uflow))
-		return -EFAULT;
-
-	dp = get_dp_locked(flow.dp_idx);
-	if (!dp)
-		return -ENODEV;
-
-	error = flow_copy_from_user(&key, (const struct nlattr __force __user *)flow.key, flow.key_len);
-	if (!error) {
-		struct tbl *table = get_table_protected(dp);
-		flow_node = tbl_lookup(table, &flow.key, flow_hash(&key), flow_cmp);
-		if (flow_node)
-			error = compat_answer_query(dp, flow_cast(flow_node), uflow);
-		else
-			error = -ENOENT;
-	}
-	mutex_unlock(&dp->mutex);
-
-	return error;
-}
-
-static int compat_dump_flow(struct compat_odp_flow_dump __user *udump)
-{
-	struct compat_odp_flow __user *uflow;
-	struct nlattr __user *ukey;
-	struct tbl_node *tbl_node;
-	struct compat_odp_flow_dump dump;
-	struct sw_flow *flow;
-	compat_uptr_t ukey32;
-	struct datapath *dp;
-	struct tbl *table;
-	u32 key_len;
-	int err;
-
-	err = -EFAULT;
-	if (copy_from_user(&dump, udump, sizeof(struct compat_odp_flow_dump)))
-		goto exit;
-	uflow =compat_ptr(dump.flow);
-
-	dp = get_dp_locked(dump.dp_idx);
-	err = -ENODEV;
-	if (!dp)
-		goto exit;
-
-	table = get_table_protected(dp);
-	tbl_node = tbl_next(table, &dump.state[0], &dump.state[1]);
-	if (!tbl_node) {
-		err = put_user(0, &uflow->key_len);
-		goto exit_unlock;
-	}
-	flow = flow_cast(tbl_node);
-
-	err = -EFAULT;
-	if (copy_to_user(udump->state, dump.state, 2 * sizeof(uint32_t)) ||
-	    get_user(ukey32, &uflow->key) ||
-	    get_user(key_len, &uflow->key_len))
-		goto exit_unlock;
-	ukey = compat_ptr(ukey32);
-
-	key_len = flow_copy_to_user(ukey, &flow->key, key_len);
-	err = key_len;
-	if (key_len < 0)
-		goto exit_unlock;
-	err = -EFAULT;
-	if (put_user(key_len, &uflow->key_len))
-		goto exit_unlock;
-
-	err = compat_answer_query(dp, flow, uflow);
-
-exit_unlock:
-	mutex_unlock(&dp->mutex);
-exit:
-	return err;
-}
-
 static int compat_execute(const struct compat_odp_execute __user *uexecute)
 {
 	struct odp_execute execute;
@@ -2005,39 +2112,30 @@ static int compat_execute(const struct compat_odp_execute __user *uexecute)
 static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
 {
 	switch (cmd) {
-	case ODP_DP_DESTROY:
 	case ODP_FLOW_FLUSH:
 		/* Ioctls that don't need any translation at all. */
 		return openvswitch_ioctl(f, cmd, argp);
 
-	case ODP_DP_CREATE:
+	case ODP_DP_NEW:
+	case ODP_DP_GET:
+	case ODP_DP_DEL:
+	case ODP_DP_SET:
+	case ODP_DP_DUMP:
 	case ODP_VPORT_NEW:
 	case ODP_VPORT_DEL:
 	case ODP_VPORT_GET:
 	case ODP_VPORT_SET:
 	case ODP_VPORT_DUMP:
-	case ODP_DP_STATS:
-	case ODP_GET_DROP_FRAGS:
-	case ODP_SET_DROP_FRAGS:
+	case ODP_FLOW_NEW:
+	case ODP_FLOW_DEL:
+	case ODP_FLOW_GET:
+	case ODP_FLOW_SET:
+	case ODP_FLOW_DUMP:
 	case ODP_SET_LISTEN_MASK:
 	case ODP_GET_LISTEN_MASK:
-	case ODP_SET_SFLOW_PROBABILITY:
-	case ODP_GET_SFLOW_PROBABILITY:
 		/* Ioctls that just need their pointer argument extended. */
 		return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
 
-	case ODP_FLOW_PUT32:
-		return compat_put_flow(compat_ptr(argp));
-
-	case ODP_FLOW_DEL32:
-		return compat_del_flow(compat_ptr(argp));
-
-	case ODP_FLOW_GET32:
-		return compat_query_flow(compat_ptr(argp));
-
-	case ODP_FLOW_DUMP32:
-		return compat_dump_flow(compat_ptr(argp));
-
 	case ODP_EXECUTE32:
 		return compat_execute(compat_ptr(argp));
 
diff --git a/datapath/flow.c b/datapath/flow.c
index 63f2752..0987fd2 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -399,15 +399,15 @@ int flow_cmp(const struct tbl_node *node, void *key2_)
 /**
  * flow_from_nlattrs - parses Netlink attributes into a flow key.
  * @swkey: receives the extracted flow key.
- * @key: start of %ODP_KEY_ATTR_* Netlink attribute sequence.
- * @key_len: number of bytes in @key.
+ * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
+ * sequence.
  *
  * This state machine accepts the following forms, with [] for optional
  * elements and | for alternatives:
  *
  * [tun_id] in_port ethernet [8021q] [ethertype [IP [TCP|UDP|ICMP] | ARP]
  */
-static int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *key, u32 key_len)
+int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
 {
 	const struct nlattr *nla;
 	u16 prev_type;
@@ -417,7 +417,7 @@ static int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *key
 	swkey->dl_type = htons(ETH_P_802_2);
 
 	prev_type = ODP_KEY_ATTR_UNSPEC;
-	nla_for_each_attr(nla, key, key_len, rem) {
+	nla_for_each_nested(nla, attr, rem) {
 		static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = {
 			[ODP_KEY_ATTR_TUN_ID] = 8,
 			[ODP_KEY_ATTR_IN_PORT] = 4,
@@ -572,39 +572,43 @@ static int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *key
 	return -EINVAL;
 }
 
-u32 flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 {
 	struct odp_key_ethernet *eth_key;
-
-	if (skb_tailroom(skb) < FLOW_BUFSIZE)
-		return -EMSGSIZE;
+	struct nlattr *nla;
 
 	if (swkey->tun_id != cpu_to_be64(0))
-		nla_put_be64(skb, ODP_KEY_ATTR_TUN_ID, swkey->tun_id);
+		NLA_PUT_BE64(skb, ODP_KEY_ATTR_TUN_ID, swkey->tun_id);
 
-	nla_put_u32(skb, ODP_KEY_ATTR_IN_PORT, swkey->in_port);
+	NLA_PUT_U32(skb, ODP_KEY_ATTR_IN_PORT, swkey->in_port);
 
-	eth_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_ETHERNET, sizeof(*eth_key)));
+	nla = nla_reserve(skb, ODP_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+	if (!nla)
+		goto nla_put_failure;
+	eth_key = nla_data(nla);
 	memcpy(eth_key->eth_src, swkey->dl_src, ETH_ALEN);
 	memcpy(eth_key->eth_dst, swkey->dl_dst, ETH_ALEN);
 
 	if (swkey->dl_tci != htons(0)) {
-		struct odp_key_8021q *q_key;
+		struct odp_key_8021q q_key;
 
-		q_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_8021Q, sizeof(*q_key)));
-		q_key->q_tpid = htons(ETH_P_8021Q);
-		q_key->q_tci = swkey->dl_tci & ~htons(VLAN_TAG_PRESENT);
+		q_key.q_tpid = htons(ETH_P_8021Q);
+		q_key.q_tci = swkey->dl_tci & ~htons(VLAN_TAG_PRESENT);
+		NLA_PUT(skb, ODP_KEY_ATTR_8021Q, sizeof(q_key), &q_key);
 	}
 
 	if (swkey->dl_type == htons(ETH_P_802_2))
-		goto exit;
+		return 0;
 
-	nla_put_be16(skb, ODP_KEY_ATTR_ETHERTYPE, swkey->dl_type);
+	NLA_PUT_BE16(skb, ODP_KEY_ATTR_ETHERTYPE, swkey->dl_type);
 
 	if (swkey->dl_type == htons(ETH_P_IP)) {
 		struct odp_key_ipv4 *ipv4_key;
 
-		ipv4_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_IPV4, sizeof(*ipv4_key)));
+		nla = nla_reserve(skb, ODP_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv4_key = nla_data(nla);
 		ipv4_key->ipv4_src = swkey->nw_src;
 		ipv4_key->ipv4_dst = swkey->nw_dst;
 		ipv4_key->ipv4_proto = swkey->nw_proto;
@@ -613,63 +617,47 @@ u32 flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		if (swkey->nw_proto == IPPROTO_TCP) {
 			struct odp_key_tcp *tcp_key;
 
-			tcp_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_TCP, sizeof(*tcp_key)));
+			nla = nla_reserve(skb, ODP_KEY_ATTR_TCP, sizeof(*tcp_key));
+			if (!nla)
+				goto nla_put_failure;
+			tcp_key = nla_data(nla);
 			tcp_key->tcp_src = swkey->tp_src;
 			tcp_key->tcp_dst = swkey->tp_dst;
 		} else if (swkey->nw_proto == IPPROTO_UDP) {
 			struct odp_key_udp *udp_key;
 
-			udp_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_UDP, sizeof(*udp_key)));
+			nla = nla_reserve(skb, ODP_KEY_ATTR_UDP, sizeof(*udp_key));
+			if (!nla)
+				goto nla_put_failure;
+			udp_key = nla_data(nla);
 			udp_key->udp_src = swkey->tp_src;
 			udp_key->udp_dst = swkey->tp_dst;
 		} else if (swkey->nw_proto == IPPROTO_ICMP) {
 			struct odp_key_icmp *icmp_key;
 
-			icmp_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_ICMP, sizeof(*icmp_key)));
+			nla = nla_reserve(skb, ODP_KEY_ATTR_ICMP, sizeof(*icmp_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmp_key = nla_data(nla);
 			icmp_key->icmp_type = ntohs(swkey->tp_src);
 			icmp_key->icmp_code = ntohs(swkey->tp_dst);
 		}
 	} else if (swkey->dl_type == htons(ETH_P_ARP)) {
 		struct odp_key_arp *arp_key;
 
-		arp_key = nla_data(__nla_reserve(skb, ODP_KEY_ATTR_ARP, sizeof(*arp_key)));
+		nla = nla_reserve(skb, ODP_KEY_ATTR_ARP, sizeof(*arp_key));
+		if (!nla)
+			goto nla_put_failure;
+		arp_key = nla_data(nla);
 		arp_key->arp_sip = swkey->nw_src;
 		arp_key->arp_tip = swkey->nw_dst;
 		arp_key->arp_op = htons(swkey->nw_proto);
 	}
 
-exit:
-	return skb->len;
-}
-
-int flow_copy_from_user(struct sw_flow_key *swkey, const struct nlattr __user *ukey, u32 ukey_len)
-{
-	char key[FLOW_BUFSIZE] __aligned(NLA_ALIGNTO);
-
-	if (ukey_len > FLOW_BUFSIZE || ukey_len % NLA_ALIGNTO)
-		return -EINVAL;
-
-	if (copy_from_user(key, ukey, ukey_len))
-		return -EFAULT;
-
-	return flow_from_nlattrs(swkey, (const struct nlattr *)key, ukey_len);
-}
-
-int flow_copy_to_user(struct nlattr __user *ukey, const struct sw_flow_key *swkey, u32 ukey_len)
-{
-	struct sk_buff *skb;
-	int retval;
-
-	skb = alloc_skb(FLOW_BUFSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	retval = flow_to_nlattrs(swkey, skb);
-	if (copy_to_user(ukey, skb->data, min(skb->len, ukey_len)))
-		retval = -EFAULT;
-	kfree_skb(skb);
+	return 0;
 
-	return retval;
+nla_put_failure:
+	return -EMSGSIZE;
 }
 
 /* Initializes the flow module.
diff --git a/datapath/flow.h b/datapath/flow.h
index 5e5ae5e..f9aa44a 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -100,9 +100,8 @@ int flow_cmp(const struct tbl_node *, void *target);
  */
 #define FLOW_BUFSIZE 96
 
-u32 flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int flow_copy_from_user(struct sw_flow_key *, const struct nlattr __user *ukey, u32 key_len);
-int flow_copy_to_user(struct nlattr __user *ukey, const struct sw_flow_key *, u32 key_len);
+int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
+int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *);
 
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
diff --git a/datapath/odp-compat.h b/datapath/odp-compat.h
index ae997e6..b18c31a 100644
--- a/datapath/odp-compat.h
+++ b/datapath/odp-compat.h
@@ -15,32 +15,7 @@
 #include "openvswitch/datapath-protocol.h"
 #include <linux/compat.h>
 
-#define ODP_FLOW_GET32		_IOWR('O', 13, struct compat_odp_flow)
-#define ODP_FLOW_PUT32		_IOWR('O', 14, struct compat_odp_flow)
-#define ODP_FLOW_DUMP32		_IOWR('O', 15, struct compat_odp_flow_dump)
-#define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
 #define ODP_EXECUTE32		_IOR('O', 18, struct compat_odp_execute)
-#define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
-
-struct compat_odp_flow {
-	uint32_t dp_idx;
-	struct odp_flow_stats stats;
-	compat_uptr_t key;
-	u32 key_len;
-	compat_uptr_t actions;
-	u32 actions_len;
-};
-
-struct compat_odp_flow_put {
-	struct compat_odp_flow flow;
-	u32 flags;
-};
-
-struct compat_odp_flow_dump {
-	uint32_t dp_idx;
-	compat_uptr_t flow;
-	uint32_t state[2];
-};
 
 struct compat_odp_execute {
 	uint32_t dp_idx;
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index cf81cb3..23cf4ef 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -70,14 +70,11 @@
 #include <linux/if_link.h>
 #include <linux/netlink.h>
 
-#define ODP_MAX 256             /* Maximum number of datapaths. */
-
-#define ODP_DP_CREATE           _IO('O', 0)
-#define ODP_DP_DESTROY          _IO('O', 1)
-#define ODP_DP_STATS            _IOW('O', 2, struct odp_stats)
-
-#define ODP_GET_DROP_FRAGS      _IOW('O', 3, int)
-#define ODP_SET_DROP_FRAGS      _IOR('O', 4, int)
+#define ODP_DP_NEW              _IOWR('O', 0, struct odp_datapath)
+#define ODP_DP_DEL              _IOR('O', 1, struct odp_datapath)
+#define ODP_DP_GET              _IOWR('O', 2, struct odp_datapath)
+#define ODP_DP_SET		_IOWR('O', 3, struct odp_datapath)
+#define ODP_DP_DUMP		_IOWR('O', 4, struct odp_datapath)
 
 #define ODP_GET_LISTEN_MASK     _IOW('O', 5, int)
 #define ODP_SET_LISTEN_MASK     _IOR('O', 6, int)
@@ -88,16 +85,48 @@
 #define ODP_VPORT_SET           _IOR('O', 22, struct odp_vport)
 #define ODP_VPORT_DUMP          _IOWR('O', 10, struct odp_vport)
 
-#define ODP_FLOW_GET            _IOWR('O', 13, struct odp_flow)
-#define ODP_FLOW_PUT            _IOWR('O', 14, struct odp_flow)
-#define ODP_FLOW_DUMP           _IOWR('O', 15, struct odp_flow_dump)
-#define ODP_FLOW_FLUSH          _IO('O', 16)
-#define ODP_FLOW_DEL            _IOWR('O', 17, struct odp_flow)
+#define ODP_FLOW_NEW            _IOWR('O', 13, struct odp_flow)
+#define ODP_FLOW_DEL            _IOWR('O', 14, struct odp_flow)
+#define ODP_FLOW_GET            _IOWR('O', 15, struct odp_flow)
+#define ODP_FLOW_SET            _IOWR('O', 16, struct odp_flow)
+#define ODP_FLOW_DUMP           _IOWR('O', 17, struct odp_flow)
+#define ODP_FLOW_FLUSH          _IO('O', 19)
 
 #define ODP_EXECUTE             _IOR('O', 18, struct odp_execute)
 
-#define ODP_SET_SFLOW_PROBABILITY _IOR('O', 19, int)
-#define ODP_GET_SFLOW_PROBABILITY _IOW('O', 20, int)
+/**
+ * struct odp_datapath - header with basic information about a datapath.
+ * @dp_idx: Datapath index (-1 to make a request not specific to a datapath).
+ * @len: Length of this structure plus the Netlink attributes following it.
+ * @total_len: Total space available for kernel reply to request.
+ *
+ * Followed by &struct nlattr attributes, whose types are drawn from
+ * %ODP_DP_ATTR_*, up to a length of @len bytes including the &struct
+ * odp_datapath header.
+ */
+struct odp_datapath {
+	int32_t dp_idx;
+	uint32_t len;
+	uint32_t total_len;
+};
+
+enum odp_datapath_type {
+	ODP_DP_ATTR_UNSPEC,
+	ODP_DP_ATTR_NAME,       /* name of dp_ifidx netdev */
+	ODP_DP_ATTR_STATS,      /* struct odp_stats */
+	ODP_DP_ATTR_IPV4_FRAGS,	/* 32-bit enum odp_frag_handling */
+	ODP_DP_ATTR_SAMPLING,   /* 32-bit fraction of packets to sample. */
+	__ODP_DP_ATTR_MAX
+};
+
+#define ODP_DP_ATTR_MAX (__ODP_DP_ATTR_MAX - 1)
+
+/* Values for ODP_DP_ATTR_IPV4_FRAGS. */
+enum odp_frag_handling {
+	ODP_DP_FRAG_UNSPEC,
+	ODP_DP_FRAG_ZERO,	/* Treat IP fragments as transport port 0. */
+	ODP_DP_FRAG_DROP	/* Drop IP fragments. */
+};
 
 struct odp_stats {
     uint64_t n_frags;           /* Number of dropped IP fragments. */
@@ -210,10 +239,6 @@ enum {
 struct odp_flow_stats {
     uint64_t n_packets;         /* Number of matched packets. */
     uint64_t n_bytes;           /* Number of matched bytes. */
-    uint64_t used_sec;          /* Time last used, in system monotonic time. */
-    uint32_t used_nsec;
-    uint8_t  tcp_flags;
-    uint8_t  reserved;
 };
 
 enum odp_key_type {
@@ -271,41 +296,36 @@ struct odp_key_arp {
 	ovs_be16 arp_op;
 };
 
+/**
+ * struct odp_flow - header with basic information about a flow.
+ * @dp_idx: Datapath index.
+ * @len: Length of this structure plus the Netlink attributes following it.
+ * @total_len: Total space available for kernel reply to request.
+ *
+ * Followed by &struct nlattr attributes, whose types are drawn from
+ * %ODP_FLOW_ATTR_*, up to a length of @len bytes including the &struct
+ * odp_flow header.
+ */
 struct odp_flow {
-    uint32_t dp_idx;
-    struct odp_flow_stats stats;
-    struct nlattr *key;
-    uint32_t key_len;
-    struct nlattr *actions;
-    uint32_t actions_len;
+	uint32_t nlmsg_flags;
+	uint32_t dp_idx;
+	uint32_t len;
+	uint32_t total_len;
 };
 
-/* Flags for ODP_FLOW_PUT. */
-#define ODPPF_CREATE        (1 << 0) /* Allow creating a new flow. */
-#define ODPPF_MODIFY        (1 << 1) /* Allow modifying an existing flow. */
-#define ODPPF_ZERO_STATS    (1 << 2) /* Zero the stats of an existing flow. */
-
-/* ODP_FLOW_PUT argument. */
-struct odp_flow_put {
-    struct odp_flow flow;
-    uint32_t flags;
+enum odp_flow_type {
+	ODP_FLOW_ATTR_UNSPEC,
+	ODP_FLOW_ATTR_KEY,       /* Sequence of ODP_KEY_ATTR_* attributes. */
+	ODP_FLOW_ATTR_ACTIONS,   /* Sequence of nested ODPAT_* attributes. */
+	ODP_FLOW_ATTR_STATS,     /* struct odp_flow_stats. */
+	ODP_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
+	ODP_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
+	ODP_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
+	ODP_FLOW_ATTR_STATE,     /* u64 state for ODP_FLOW_DUMP. */
+	__ODP_FLOW_ATTR_MAX
 };
 
-/* ODP_FLOW_DUMP argument.
- *
- * This is used to iterate through the flow table flow-by-flow.  Each
- * ODP_FLOW_DUMP call either stores a new odp_flow into 'flow' or stores 0 into
- * flow->key_len to indicate that the end of the table has been reached, and
- * updates 'state' in-place.
- *
- * Before the first call, zero 'state'.  The format of 'state' is otherwise
- * unspecified.
- */
-struct odp_flow_dump {
-	uint32_t dp_idx;
-	struct odp_flow *flow;
-	uint32_t state[2];
-};
+#define ODP_FLOW_ATTR_MAX (__ODP_FLOW_ATTR_MAX - 1)
 
 /* Action types. */
 enum odp_action_type {
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 3c11be2..ea2f424 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -47,11 +47,64 @@
 #include "rtnetlink-link.h"
 #include "shash.h"
 #include "svec.h"
+#include "unaligned.h"
 #include "util.h"
 #include "vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_linux);
 
+struct dpif_linux_dp {
+    /* ioctl command argument. */
+    int cmd;
+
+    /* struct odp_datapath header. */
+    uint32_t dp_idx;
+
+    /* Attributes. */
+    const char *name;                  /* ODP_DP_ATTR_NAME. */
+    struct odp_stats stats;            /* ODP_DP_ATTR_STATS. */
+    enum odp_frag_handling ipv4_frags; /* ODP_DP_ATTR_IPV4_FRAGS. */
+    const uint32_t *sampling;          /* ODP_DP_ATTR_SAMPLING. */
+};
+
+static void dpif_linux_dp_init(struct dpif_linux_dp *);
+static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
+                                  struct dpif_linux_dp *reply,
+                                  struct ofpbuf **bufp);
+static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
+                             struct ofpbuf **bufp);
+
+struct dpif_linux_flow {
+    /* ioctl command argument. */
+    int cmd;
+
+    /* struct odp_flow header. */
+    unsigned int nlmsg_flags;
+    uint32_t dp_idx;
+
+    /* Attributes.
+     *
+     * The 'stats', 'used', and 'state' members point to 64-bit data that might
+     * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be
+     * used to access their values. */
+    const struct nlattr *key;           /* ODP_FLOW_ATTR_KEY. */
+    size_t key_len;
+    const struct nlattr *actions;       /* ODP_FLOW_ATTR_ACTIONS. */
+    size_t actions_len;
+    const struct odp_flow_stats *stats; /* ODP_FLOW_ATTR_STATS. */
+    const uint8_t *tcp_flags;           /* ODP_FLOW_ATTR_TCP_FLAGS. */
+    const uint64_t *used;               /* ODP_FLOW_ATTR_USED. */
+    bool clear;                         /* ODP_FLOW_ATTR_CLEAR. */
+    const uint64_t *state;              /* ODP_FLOW_ATTR_STATE. */
+};
+
+static void dpif_linux_flow_init(struct dpif_linux_flow *);
+static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
+                                    struct dpif_linux_flow *reply,
+                                    struct ofpbuf **bufp);
+static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
+                                      struct dpif_flow_stats *);
+
 /* Datapath interface for the openvswitch Linux kernel module. */
 struct dpif_linux {
     struct dpif dpif;
@@ -74,7 +127,6 @@ static int do_ioctl(const struct dpif *, int cmd, const void *arg);
 static int open_dpif(const struct dpif_linux_vport *local_vport,
                      struct dpif **);
 static int get_openvswitch_major(void);
-static int create_minor(const char *name, int minor);
 static int open_minor(int minor, int *fdp);
 static int make_openvswitch_device(int minor, char **fnp);
 static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
@@ -90,9 +142,8 @@ dpif_linux_cast(const struct dpif *dpif)
 static int
 dpif_linux_enumerate(struct svec *all_dps)
 {
+    uint32_t dp_idx;
     int major;
-    int error;
-    int i;
 
     /* Check that the Open vSwitch module is loaded. */
     major = get_openvswitch_major();
@@ -100,22 +151,28 @@ dpif_linux_enumerate(struct svec *all_dps)
         return -major;
     }
 
-    error = 0;
-    for (i = 0; i < ODP_MAX; i++) {
-        struct dpif *dpif;
+    dp_idx = 0;
+    for (;;) {
+        struct dpif_linux_dp request, reply;
+        struct ofpbuf *buf;
         char devname[16];
-        int retval;
-
-        sprintf(devname, "dp%d", i);
-        retval = dpif_open(devname, "system", &dpif);
-        if (!retval) {
-            svec_add(all_dps, devname);
-            dpif_uninit(dpif, true);
-        } else if (retval != ENODEV && !error) {
-            error = retval;
+        int error;
+
+        dpif_linux_dp_init(&request);
+        request.dp_idx = dp_idx;
+        request.cmd = ODP_DP_DUMP;
+
+        error = dpif_linux_dp_transact(&request, &reply, &buf);
+        if (error) {
+            return error == ENODEV ? 0 : error;
         }
+        ofpbuf_delete(buf);
+
+        sprintf(devname, "dp%d", reply.dp_idx);
+        svec_add(all_dps, devname);
+
+        dp_idx = reply.dp_idx + 1;
     }
-    return error;
 }
 
 static int
@@ -130,27 +187,20 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
     minor = !strncmp(name, "dp", 2)
             && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
     if (create) {
-        if (minor >= 0) {
-            error = create_minor(name, minor);
-            if (error) {
-                return error;
-            }
-        } else {
-            /* Scan for unused minor number. */
-            for (minor = 0; ; minor++) {
-                if (minor >= ODP_MAX) {
-                    /* All datapath numbers in use. */
-                    return ENOBUFS;
-                }
+        struct dpif_linux_dp request, reply;
+        struct ofpbuf *buf;
+        int error;
 
-                error = create_minor(name, minor);
-                if (!error) {
-                    break;
-                } else if (error != EBUSY) {
-                    return error;
-                }
-            }
+        dpif_linux_dp_init(&request);
+        request.cmd = ODP_DP_NEW;
+        request.dp_idx = minor;
+        request.name = name;
+        error = dpif_linux_dp_transact(&request, &reply, &buf);
+        if (error) {
+            return error;
         }
+        minor = reply.dp_idx;
+        ofpbuf_delete(buf);
     }
 
     dpif_linux_vport_init(&request);
@@ -245,25 +295,41 @@ dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
 static int
 dpif_linux_destroy(struct dpif *dpif_)
 {
-    return do_ioctl(dpif_, ODP_DP_DESTROY, NULL);
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+    struct dpif_linux_dp dp;
+
+    dpif_linux_dp_init(&dp);
+    dp.cmd = ODP_DP_DEL;
+    dp.dp_idx = dpif->minor;
+    return dpif_linux_dp_transact(&dp, NULL, NULL);
 }
 
 static int
 dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
 {
-    memset(stats, 0, sizeof *stats);
-    return do_ioctl(dpif_, ODP_DP_STATS, stats);
+    struct dpif_linux_dp dp;
+    struct ofpbuf *buf;
+    int error;
+
+    error = dpif_linux_dp_get(dpif_, &dp, &buf);
+    if (!error) {
+        *stats = dp.stats;
+        ofpbuf_delete(buf);
+    }
+    return error;
 }
 
 static int
 dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
 {
-    int drop_frags;
+    struct dpif_linux_dp dp;
+    struct ofpbuf *buf;
     int error;
 
-    error = do_ioctl(dpif_, ODP_GET_DROP_FRAGS, &drop_frags);
+    error = dpif_linux_dp_get(dpif_, &dp, &buf);
     if (!error) {
-        *drop_fragsp = drop_frags & 1;
+        *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
+        ofpbuf_delete(buf);
     }
     return error;
 }
@@ -271,8 +337,14 @@ dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
 static int
 dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
 {
-    int drop_frags_int = drop_frags;
-    return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int);
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+    struct dpif_linux_dp dp;
+
+    dpif_linux_dp_init(&dp);
+    dp.cmd = ODP_DP_SET;
+    dp.dp_idx = dpif->minor;
+    dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
+    return dpif_linux_dp_transact(&dp, NULL, NULL);
 }
 
 static int
@@ -460,48 +532,32 @@ dpif_linux_port_poll_wait(const struct dpif *dpif_)
     }
 }
 
-static void
-odp_flow_stats_to_dpif_flow_stats(const struct odp_flow_stats *ofs,
-                                  struct dpif_flow_stats *dfs)
-{
-    dfs->n_packets = ofs->n_packets;
-    dfs->n_bytes = ofs->n_bytes;
-    dfs->used = ofs->used_sec * 1000 + ofs->used_nsec / 1000000;
-    dfs->tcp_flags = ofs->tcp_flags;
-}
-
 static int
 dpif_linux_flow_get(const struct dpif *dpif_,
                     const struct nlattr *key, size_t key_len,
                     struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-    struct ofpbuf *actions = NULL;
-    struct odp_flow odp_flow;
+    struct dpif_linux_flow request, reply;
+    struct ofpbuf *buf;
     int error;
 
-    memset(&odp_flow, 0, sizeof odp_flow);
-    odp_flow.dp_idx = dpif->minor;
-    odp_flow.key = (struct nlattr *) key;
-    odp_flow.key_len = key_len;
-    if (actionsp) {
-        actions = *actionsp = ofpbuf_new(65536);
-        odp_flow.actions = actions->base;
-        odp_flow.actions_len = actions->allocated;
-    }
-
-    error = do_ioctl(dpif_, ODP_FLOW_GET, &odp_flow);
+    dpif_linux_flow_init(&request);
+    request.cmd = ODP_FLOW_GET;
+    request.dp_idx = dpif->minor;
+    request.key = key;
+    request.key_len = key_len;
+    error = dpif_linux_flow_transact(&request, &reply, &buf);
     if (!error) {
         if (stats) {
-            odp_flow_stats_to_dpif_flow_stats(&odp_flow.stats, stats);
-        }
-        if (actions) {
-            actions->size = odp_flow.actions_len;
-            ofpbuf_trim(actions);
+            dpif_linux_flow_get_stats(&reply, stats);
         }
-    } else {
-        if (actions) {
-            ofpbuf_delete(actions);
+        if (actionsp) {
+            buf->data = (void *) reply.actions;
+            buf->size = reply.actions_len;
+            *actionsp = buf;
+        } else {
+            ofpbuf_delete(buf);
         }
     }
     return error;
@@ -514,28 +570,27 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
                     struct dpif_flow_stats *stats)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-    struct odp_flow_put put;
+    struct dpif_linux_flow request, reply;
+    struct ofpbuf *buf;
     int error;
 
-    memset(&put, 0, sizeof put);
-    put.flow.dp_idx = dpif->minor;
-    put.flow.key = (struct nlattr *) key;
-    put.flow.key_len = key_len;
-    put.flow.actions = (struct nlattr *) actions;
-    put.flow.actions_len = actions_len;
-    put.flags = 0;
-    if (flags & DPIF_FP_CREATE) {
-        put.flags |= ODPPF_CREATE;
-    }
-    if (flags & DPIF_FP_MODIFY) {
-        put.flags |= ODPPF_MODIFY;
-    }
+    dpif_linux_flow_init(&request);
+    request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET;
+    request.dp_idx = dpif->minor;
+    request.key = key;
+    request.key_len = key_len;
+    request.actions = actions;
+    request.actions_len = actions_len;
     if (flags & DPIF_FP_ZERO_STATS) {
-        put.flags |= ODPPF_ZERO_STATS;
+        request.clear = true;
     }
-    error = do_ioctl(dpif_, ODP_FLOW_PUT, &put);
+    request.nlmsg_flags = flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
+    error = dpif_linux_flow_transact(&request,
+                                     stats ? &reply : NULL,
+                                     stats ? &buf : NULL);
     if (!error && stats) {
-        odp_flow_stats_to_dpif_flow_stats(&put.flow.stats, stats);
+        dpif_linux_flow_get_stats(&reply, stats);
+        ofpbuf_delete(buf);
     }
     return error;
 }
@@ -546,84 +601,81 @@ dpif_linux_flow_del(struct dpif *dpif_,
                     struct dpif_flow_stats *stats)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-    struct odp_flow odp_flow;
+    struct dpif_linux_flow request, reply;
+    struct ofpbuf *buf;
     int error;
 
-    memset(&odp_flow, 0, sizeof odp_flow);
-    odp_flow.dp_idx = dpif->minor;
-    odp_flow.key = (struct nlattr *) key;
-    odp_flow.key_len = key_len;
-    error = do_ioctl(dpif_, ODP_FLOW_DEL, &odp_flow);
+    dpif_linux_flow_init(&request);
+    request.cmd = ODP_FLOW_DEL;
+    request.dp_idx = dpif->minor;
+    request.key = key;
+    request.key_len = key_len;
+    error = dpif_linux_flow_transact(&request,
+                                     stats ? &reply : NULL,
+                                     stats ? &buf : NULL);
     if (!error && stats) {
-        odp_flow_stats_to_dpif_flow_stats(&odp_flow.stats, stats);
+        dpif_linux_flow_get_stats(&reply, stats);
+        ofpbuf_delete(buf);
     }
     return error;
 }
 
+
 struct dpif_linux_flow_state {
-    struct odp_flow_dump dump;
-    struct odp_flow flow;
-    uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S];
-    uint32_t actionsbuf[65536 / sizeof(uint32_t)];
+    struct dpif_linux_flow flow;
+    struct ofpbuf *buf;
     struct dpif_flow_stats stats;
 };
 
 static int
-dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep)
+dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
 {
-    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-    struct dpif_linux_flow_state *state;
-
-    *statep = state = xmalloc(sizeof *state);
-    state->dump.dp_idx = dpif->minor;
-    state->dump.state[0] = 0;
-    state->dump.state[1] = 0;
-    state->dump.flow = &state->flow;
+    *statep = xzalloc(sizeof(struct dpif_linux_flow_state));
     return 0;
 }
 
 static int
-dpif_linux_flow_dump_next(const struct dpif *dpif, void *state_,
+dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
                           const struct nlattr **key, size_t *key_len,
                           const struct nlattr **actions, size_t *actions_len,
                           const struct dpif_flow_stats **stats)
 {
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     struct dpif_linux_flow_state *state = state_;
+    struct ofpbuf *old_buf = state->buf;
+    struct dpif_linux_flow request;
     int error;
 
-    memset(&state->flow, 0, sizeof state->flow);
-    state->flow.key = (struct nlattr *) state->keybuf;
-    state->flow.key_len = sizeof state->keybuf;
-    if (actions) {
-        state->flow.actions = (struct nlattr *) state->actionsbuf;
-        state->flow.actions_len = sizeof state->actionsbuf;
-    }
+    dpif_linux_flow_init(&request);
+    request.cmd = ODP_FLOW_DUMP;
+    request.dp_idx = dpif->minor;
+    request.state = state->flow.state;
+    error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
+    ofpbuf_delete(old_buf);
 
-    error = do_ioctl(dpif, ODP_FLOW_DUMP, &state->dump);
     if (!error) {
-        if (!state->flow.key_len) {
-            return EOF;
-        }
         if (key) {
-            *key = (const struct nlattr *) state->keybuf;
+            *key = state->flow.key;
             *key_len = state->flow.key_len;
         }
         if (actions) {
-            *actions = (const struct nlattr *) state->actionsbuf;
+            *actions = state->flow.actions;
             *actions_len = state->flow.actions_len;
         }
         if (stats) {
-            odp_flow_stats_to_dpif_flow_stats(&state->flow.stats,
-                                              &state->stats);
+            dpif_linux_flow_get_stats(&state->flow, &state->stats);
             *stats = &state->stats;
         }
     }
-    return error;
+    return error == ENODEV ? EOF : error;
 }
 
 static int
-dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state)
+dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
 {
+    struct dpif_linux_flow_state *state = state_;
+
+    ofpbuf_delete(state->buf);
     free(state);
     return 0;
 }
@@ -661,13 +713,29 @@ static int
 dpif_linux_get_sflow_probability(const struct dpif *dpif_,
                                  uint32_t *probability)
 {
-    return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability);
+    struct dpif_linux_dp dp;
+    struct ofpbuf *buf;
+    int error;
+
+    error = dpif_linux_dp_get(dpif_, &dp, &buf);
+    if (!error) {
+        *probability = dp.sampling ? *dp.sampling : 0;
+        ofpbuf_delete(buf);
+    }
+    return error;
 }
 
 static int
 dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
 {
-    return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability);
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+    struct dpif_linux_dp dp;
+
+    dpif_linux_dp_init(&dp);
+    dp.cmd = ODP_DP_SET;
+    dp.dp_idx = dpif->minor;
+    dp.sampling = &probability;
+    return dpif_linux_dp_transact(&dp, NULL, NULL);
 }
 
 static int
@@ -986,22 +1054,6 @@ get_major(const char *target)
 }
 
 static int
-create_minor(const char *name, int minor)
-{
-    int error;
-    int fd;
-
-    error = open_minor(minor, &fd);
-    if (error) {
-        return error;
-    }
-
-    error = ioctl(fd, ODP_DP_CREATE, name) ? errno : 0;
-    close(fd);
-    return error;
-}
-
-static int
 open_minor(int minor, int *fdp)
 {
     int error;
@@ -1042,6 +1094,24 @@ dpif_linux_port_changed(const struct rtnetlink_link_change *change,
         dpif->change_error = true;
     }
 }
+
+static int
+get_dp0_fd(int *dp0_fdp)
+{
+    static int dp0_fd = -1;
+    if (dp0_fd < 0) {
+        int error;
+        int fd;
+
+        error = open_minor(0, &fd);
+        if (error) {
+            return error;
+        }
+        dp0_fd = fd;
+    }
+    *dp0_fdp = dp0_fd;
+    return 0;
+}
 
 /* Parses the contents of 'buf', which contains a "struct odp_vport" followed
  * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
@@ -1183,25 +1253,21 @@ dpif_linux_vport_transact(const struct dpif_linux_vport *request,
                           struct dpif_linux_vport *reply,
                           struct ofpbuf **bufp)
 {
-    static int dp0_fd = -1;
     struct ofpbuf *buf = NULL;
     int error;
+    int fd;
 
     assert((reply != NULL) == (bufp != NULL));
-    if (dp0_fd < 0) {
-        int fd;
 
-        error = open_minor(0, &fd);
-        if (error) {
-            goto error;
-        }
-        dp0_fd = fd;
+    error = get_dp0_fd(&fd);
+    if (error) {
+        goto error;
     }
 
     buf = ofpbuf_new(1024);
     dpif_linux_vport_to_ofpbuf(request, buf);
 
-    error = ioctl(dp0_fd, request->cmd, buf->data) ? errno : 0;
+    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
     if (error) {
         goto error;
     }
@@ -1242,4 +1308,322 @@ dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
 
     return dpif_linux_vport_transact(&request, reply, bufp);
 }
+
+/* Parses the contents of 'buf', which contains a "struct odp_datapath"
+ * followed by Netlink attributes, into 'dp'.  Returns 0 if successful,
+ * otherwise a positive errno value.
+ *
+ * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
+ * while 'dp' is still in use. */
+static int
+dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
+{
+    static const struct nl_policy odp_datapath_policy[] = {
+        [ODP_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
+        [ODP_DP_ATTR_STATS] = { .type = NL_A_UNSPEC,
+                                .min_len = sizeof(struct odp_stats),
+                                .max_len = sizeof(struct odp_stats),
+                                .optional = true },
+        [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
+        [ODP_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true },
+    };
+
+    struct odp_datapath *odp_dp;
+    struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];
+
+    dpif_linux_dp_init(dp);
+
+    if (!nl_policy_parse(buf, sizeof *odp_dp, odp_datapath_policy,
+                         a, ARRAY_SIZE(odp_datapath_policy))) {
+        return EINVAL;
+    }
+    odp_dp = buf->data;
+
+    dp->dp_idx = odp_dp->dp_idx;
+    dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]);
+    if (a[ODP_DP_ATTR_STATS]) {
+        /* Can't use structure assignment because Netlink doesn't ensure
+         * sufficient alignment for 64-bit members. */
+        memcpy(&dp->stats, nl_attr_get(a[ODP_DP_ATTR_STATS]),
+               sizeof dp->stats);
+    }
+    if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
+        dp->ipv4_frags = nl_attr_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
+    }
+    if (a[ODP_DP_ATTR_SAMPLING]) {
+        dp->sampling = nl_attr_get(a[ODP_DP_ATTR_SAMPLING]);
+    }
+    return 0;
+}
+
+/* Appends to 'buf' (which must initially be empty) a "struct odp_datapath"
+ * followed by Netlink attributes corresponding to 'dp'. */
+static void
+dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
+{
+    struct odp_datapath *odp_dp;
+
+    ofpbuf_reserve(buf, sizeof odp_dp);
+
+    if (dp->name) {
+        nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name);
+    }
+
+    /* Skip ODP_DP_ATTR_STATS since we never have a reason to serialize it. */
+
+    if (dp->ipv4_frags) {
+        nl_msg_put_u32(buf, ODP_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
+    }
+
+    if (dp->sampling) {
+        nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling);
+    }
+
+    odp_dp = ofpbuf_push_uninit(buf, sizeof *odp_dp);
+    odp_dp->dp_idx = dp->dp_idx;
+    odp_dp->len = buf->size;
+    odp_dp->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
+}
+
+/* Clears 'dp' to "empty" values. */
+void
+dpif_linux_dp_init(struct dpif_linux_dp *dp)
+{
+    memset(dp, 0, sizeof *dp);
+    dp->dp_idx = -1;
+}
+
+/* Executes 'request' in the kernel datapath.  If the command fails, returns a
+ * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
+ * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
+ * result of the command is expected to be an odp_datapath also, which is
+ * decoded and stored in '*reply' and '*bufp'.  The caller must free '*bufp'
+ * when the reply is no longer needed ('reply' will contain pointers into
+ * '*bufp'). */
+int
+dpif_linux_dp_transact(const struct dpif_linux_dp *request,
+                       struct dpif_linux_dp *reply, struct ofpbuf **bufp)
+{
+    struct ofpbuf *buf = NULL;
+    int error;
+    int fd;
+
+    assert((reply != NULL) == (bufp != NULL));
+
+    error = get_dp0_fd(&fd);
+    if (error) {
+        goto error;
+    }
+
+    buf = ofpbuf_new(1024);
+    dpif_linux_dp_to_ofpbuf(request, buf);
+
+    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
+    if (error) {
+        goto error;
+    }
+
+    if (bufp) {
+        buf->size = ((struct odp_datapath *) buf->data)->len;
+        error = dpif_linux_dp_from_ofpbuf(reply, buf);
+        if (error) {
+            goto error;
+        }
+        *bufp = buf;
+    } else {
+        ofpbuf_delete(buf);
+    }
+    return 0;
+
+error:
+    ofpbuf_delete(buf);
+    if (bufp) {
+        memset(reply, 0, sizeof *reply);
+        *bufp = NULL;
+    }
+    return error;
+}
+
+/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
+ * The caller must free '*bufp' when the reply is no longer needed ('reply'
+ * will contain pointers into '*bufp').  */
+int
+dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
+                  struct ofpbuf **bufp)
+{
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+    struct dpif_linux_dp request;
+
+    dpif_linux_dp_init(&request);
+    request.cmd = ODP_DP_GET;
+    request.dp_idx = dpif->minor;
+
+    return dpif_linux_dp_transact(&request, reply, bufp);
+}
+
+/* Parses the contents of 'buf', which contains a "struct odp_flow" followed by
+ * Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
+ * positive errno value.
+ *
+ * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
+ * while 'flow' is still in use. */
+static int
+dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
+                            const struct ofpbuf *buf)
+{
+    static const struct nl_policy odp_flow_policy[] = {
+        [ODP_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
+        [ODP_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
+        [ODP_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct odp_flow_stats),
+                                  .max_len = sizeof(struct odp_flow_stats),
+                                  .optional = true },
+        [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
+        [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
+        /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
+        [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
+    };
+
+    struct odp_flow *odp_flow;
+    struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];
+
+    dpif_linux_flow_init(flow);
+
+    if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy,
+                         a, ARRAY_SIZE(odp_flow_policy))) {
+        return EINVAL;
+    }
+    odp_flow = buf->data;
+
+    flow->nlmsg_flags = odp_flow->nlmsg_flags;
+    flow->dp_idx = odp_flow->dp_idx;
+    flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
+    flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
+    if (a[ODP_FLOW_ATTR_ACTIONS]) {
+        flow->actions = nl_attr_get(a[ODP_FLOW_ATTR_ACTIONS]);
+        flow->actions_len = nl_attr_get_size(a[ODP_FLOW_ATTR_ACTIONS]);
+    }
+    if (a[ODP_FLOW_ATTR_STATS]) {
+        flow->stats = nl_attr_get(a[ODP_FLOW_ATTR_STATS]);
+    }
+    if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
+        flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
+    }
+    if (a[ODP_FLOW_ATTR_STATE]) {
+        flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
+    }
+    return 0;
+}
+
+/* Appends to 'buf' (which must initially be empty) a "struct odp_flow"
+ * followed by Netlink attributes corresponding to 'flow'. */
+static void
+dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
+                          struct ofpbuf *buf)
+{
+    struct odp_flow *odp_flow;
+
+    ofpbuf_reserve(buf, sizeof odp_flow);
+
+    if (flow->key_len) {
+        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
+    }
+
+    if (flow->actions_len) {
+        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS,
+                          flow->actions, flow->actions_len);
+    }
+
+    /* We never need to send these to the kernel. */
+    assert(!flow->stats);
+    assert(!flow->tcp_flags);
+    assert(!flow->used);
+
+    if (flow->clear) {
+        nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
+    }
+
+    if (flow->state) {
+        nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
+                       get_unaligned_u64(flow->state));
+    }
+
+    odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
+    odp_flow->nlmsg_flags = flow->nlmsg_flags;
+    odp_flow->dp_idx = flow->dp_idx;
+    odp_flow->len = buf->size;
+    odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
+}
+
+/* Clears 'flow' to "empty" values. */
+void
+dpif_linux_flow_init(struct dpif_linux_flow *flow)
+{
+    memset(flow, 0, sizeof *flow);
+}
+
+/* Executes 'request' in the kernel datapath.  If the command fails, returns a
+ * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
+ * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
+ * result of the command is expected to be an odp_flow also, which is decoded
+ * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
+ * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
+int
+dpif_linux_flow_transact(const struct dpif_linux_flow *request,
+                         struct dpif_linux_flow *reply, struct ofpbuf **bufp)
+{
+    struct ofpbuf *buf = NULL;
+    int error;
+    int fd;
+
+    assert((reply != NULL) == (bufp != NULL));
+
+    error = get_dp0_fd(&fd);
+    if (error) {
+        goto error;
+    }
+
+    buf = ofpbuf_new(1024);
+    dpif_linux_flow_to_ofpbuf(request, buf);
+
+    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
+    if (error) {
+        goto error;
+    }
+
+    if (bufp) {
+        buf->size = ((struct odp_flow *) buf->data)->len;
+        error = dpif_linux_flow_from_ofpbuf(reply, buf);
+        if (error) {
+            goto error;
+        }
+        *bufp = buf;
+    } else {
+        ofpbuf_delete(buf);
+    }
+    return 0;
+
+error:
+    ofpbuf_delete(buf);
+    if (bufp) {
+        memset(reply, 0, sizeof *reply);
+        *bufp = NULL;
+    }
+    return error;
+}
+
+static void
+dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
+                          struct dpif_flow_stats *stats)
+{
+    if (flow->stats) {
+        stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
+        stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
+    } else {
+        stats->n_packets = 0;
+        stats->n_bytes = 0;
+    }
+    stats->used = flow->used ? get_unaligned_u64(flow->used) : 0;
+    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
+}
 
-- 
1.7.1





More information about the dev mailing list