[ovs-dev] [netlink v4 41/52] datapath: Convert datapath operations to use Netlink framing.
Ben Pfaff
blp at nicira.com
Wed Jan 12 05:49:53 UTC 2011
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
datapath/datapath.c | 565 +++++++++++++++++++++----------
datapath/vport.c | 1 +
include/openvswitch/datapath-protocol.h | 47 ++-
lib/dpif-linux.c | 340 +++++++++++++++----
4 files changed, 695 insertions(+), 258 deletions(-)
diff --git a/datapath/datapath.c b/datapath/datapath.c
index ff2a6b8..bc64f89 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(dp_ioctl_hook);
* It is safe to access the datapath and vport structures with just
* dp_mutex.
*/
-static struct datapath __rcu *dps[ODP_MAX];
+static struct datapath __rcu *dps[256];
static DEFINE_MUTEX(dp_mutex);
static struct vport *new_vport(const struct vport_parms *);
@@ -76,7 +76,7 @@ static struct vport *new_vport(const struct vport_parms *);
/* Must be called with rcu_read_lock or dp_mutex. */
struct datapath *get_dp(int dp_idx)
{
- if (dp_idx < 0 || dp_idx >= ODP_MAX)
+ if (dp_idx < 0 || dp_idx >= ARRAY_SIZE(dps))
return NULL;
return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
lockdep_is_held(&dp_mutex));
@@ -208,113 +208,6 @@ static struct kobj_type dp_ktype = {
.release = release_dp
};
-static int create_dp(int dp_idx, const char __user *devnamep)
-{
- struct vport_parms parms;
- char devname[IFNAMSIZ];
- struct vport *vport;
- struct datapath *dp;
- int err;
- int i;
-
- if (devnamep) {
- int retval = strncpy_from_user(devname, devnamep, IFNAMSIZ);
- if (retval < 0) {
- err = -EFAULT;
- goto err;
- } else if (retval >= IFNAMSIZ) {
- err = -ENAMETOOLONG;
- goto err;
- }
- } else {
- snprintf(devname, sizeof devname, "of%d", dp_idx);
- }
-
- rtnl_lock();
- mutex_lock(&dp_mutex);
- err = -ENODEV;
- if (!try_module_get(THIS_MODULE))
- goto err_unlock;
-
- /* Exit early if a datapath with that number already exists.
- * (We don't use -EEXIST because that's ambiguous with 'devname'
- * conflicting with an existing network device name.) */
- err = -EBUSY;
- if (get_dp(dp_idx))
- goto err_put_module;
-
- err = -ENOMEM;
- dp = kzalloc(sizeof *dp, GFP_KERNEL);
- if (dp == NULL)
- goto err_put_module;
- INIT_LIST_HEAD(&dp->port_list);
- mutex_init(&dp->mutex);
- mutex_lock(&dp->mutex);
- dp->dp_idx = dp_idx;
- for (i = 0; i < DP_N_QUEUES; i++)
- skb_queue_head_init(&dp->queues[i]);
- init_waitqueue_head(&dp->waitqueue);
-
- /* Initialize kobject for bridge. This will be added as
- * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
- dp->ifobj.kset = NULL;
- kobject_init(&dp->ifobj, &dp_ktype);
-
- /* Allocate table. */
- err = -ENOMEM;
- rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
- if (!dp->table)
- goto err_free_dp;
-
- /* Set up our datapath device. */
- parms.name = devname;
- parms.type = ODPVT_INTERNAL;
- parms.options = NULL;
- parms.dp = dp;
- parms.port_no = ODPP_LOCAL;
- vport_lock();
- vport = new_vport(&parms);
- vport_unlock();
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- if (err == -EBUSY)
- err = -EEXIST;
-
- goto err_destroy_table;
- }
-
- dp->drop_frags = 0;
- dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
- goto err_destroy_local_port;
- }
-
- rcu_assign_pointer(dps[dp_idx], dp);
- dp_sysfs_add_dp(dp);
-
- mutex_unlock(&dp->mutex);
- mutex_unlock(&dp_mutex);
- rtnl_unlock();
-
- return 0;
-
-err_destroy_local_port:
- dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
-err_destroy_table:
- tbl_destroy(get_table_protected(dp), NULL);
-err_free_dp:
- mutex_unlock(&dp->mutex);
- kfree(dp);
-err_put_module:
- module_put(THIS_MODULE);
-err_unlock:
- mutex_unlock(&dp_mutex);
- rtnl_unlock();
-err:
- return err;
-}
-
static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
@@ -328,22 +221,11 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
kobject_put(&dp->ifobj);
}
-static int destroy_dp(int dp_idx)
+/* Caller must hold RTNL, dp_mutex, and dp->mutex. */
+static void destroy_dp(struct datapath *dp)
{
- struct datapath *dp;
- int err = 0;
struct vport *p, *n;
- rtnl_lock();
- mutex_lock(&dp_mutex);
- dp = get_dp(dp_idx);
- if (!dp) {
- err = -ENODEV;
- goto out;
- }
-
- mutex_lock(&dp->mutex);
-
list_for_each_entry_safe (p, n, &dp->port_list, node)
if (p->port_no != ODPP_LOCAL)
dp_detach_port(p);
@@ -355,11 +237,6 @@ static int destroy_dp(int dp_idx)
mutex_unlock(&dp->mutex);
call_rcu(&dp->rcu, destroy_dp_rcu);
module_put(THIS_MODULE);
-
-out:
- mutex_unlock(&dp_mutex);
- rtnl_unlock();
- return err;
}
/* Called with RTNL lock, dp->mutex, and vport_lock. */
@@ -1200,12 +1077,11 @@ static int execute_packet(const struct odp_execute __user *executep)
return error;
}
-static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
+static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
{
- struct odp_stats stats;
int i;
- stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
+ stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
for_each_possible_cpu(i) {
const struct dp_stats_percpu *percpu_stats;
struct dp_stats_percpu local_stats;
@@ -1218,12 +1094,11 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
local_stats = *percpu_stats;
} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
- stats.n_frags += local_stats.n_frags;
- stats.n_hit += local_stats.n_hit;
- stats.n_missed += local_stats.n_missed;
- stats.n_lost += local_stats.n_lost;
+ stats->n_frags += local_stats.n_frags;
+ stats->n_hit += local_stats.n_hit;
+ stats->n_missed += local_stats.n_missed;
+ stats->n_lost += local_stats.n_lost;
}
- return copy_to_user(statsp, &stats, sizeof stats) ? -EFAULT : 0;
}
/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */
@@ -1277,6 +1152,365 @@ static void set_listen_mask(struct file *f, int listen_mask)
f->private_data = (void*)(long)listen_mask;
}
+static struct nla_policy datapath_policy[ODPDT_MAX + 1] = {
+ [ODPDT_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [ODPDT_IPV4_FRAGS] = { .type = NLA_U32 },
+ [ODPDT_SAMPLING] = { .type = NLA_U32 },
+};
+
+static int copy_datapath_to_user(void __user *dst, struct datapath *dp, uint32_t total_len)
+{
+ struct odp_datapath *odp_datapath;
+ struct sk_buff *skb;
+ struct nlattr *nla;
+ int err;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!skb)
+ goto exit;
+
+ rcu_read_lock();
+ odp_datapath = (struct odp_datapath*)__skb_put(skb, sizeof(struct odp_datapath));
+ odp_datapath->dp_idx = dp->dp_idx;
+ odp_datapath->total_len = total_len;
+
+ NLA_PUT_STRING(skb, ODPPT_NAME, dp_name(dp));
+
+ nla = nla_reserve(skb, ODPDT_STATS, sizeof(struct odp_stats));
+ if (!nla)
+ goto nla_put_failure;
+ get_dp_stats(dp, nla_data(nla));
+
+ NLA_PUT_U32(skb, ODPDT_IPV4_FRAGS, dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
+
+ if (dp->sflow_probability)
+ NLA_PUT_U32(skb, ODPDT_SAMPLING, dp->sflow_probability);
+
+ if (skb->len > total_len)
+ goto nla_put_failure;
+
+ odp_datapath->len = skb->len;
+ err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0;
+ goto exit_unlock;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+exit_unlock:
+ rcu_read_unlock();
+ kfree_skb(skb);
+exit:
+ return err;
+}
+
+static struct sk_buff *copy_datapath_from_user(struct odp_datapath __user *uodp_datapath, struct nlattr *a[ODPDT_MAX + 1])
+{
+ struct odp_datapath *odp_datapath;
+ struct sk_buff *skb;
+ u32 len;
+ int err;
+
+ if (get_user(len, &uodp_datapath->len))
+ return ERR_PTR(-EFAULT);
+ if (len < sizeof(struct odp_datapath))
+ return ERR_PTR(-EINVAL);
+
+ skb = alloc_skb(len, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ err = -EFAULT;
+ if (copy_from_user(__skb_put(skb, len), uodp_datapath, len))
+ goto error_free_skb;
+
+ odp_datapath = (struct odp_datapath *)skb->data;
+ err = -EINVAL;
+ if (odp_datapath->len != len)
+ goto error_free_skb;
+
+ err = nla_parse(a, ODPDT_MAX, (struct nlattr *)(skb->data + sizeof(struct odp_datapath)),
+ skb->len - sizeof(struct odp_datapath), datapath_policy);
+ if (err)
+ goto error_free_skb;
+
+ if (a[ODPDT_IPV4_FRAGS]) {
+ u32 frags = nla_get_u32(a[ODPDT_IPV4_FRAGS]);
+
+ err = -EINVAL;
+ if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
+ goto error_free_skb;
+ }
+
+ err = VERIFY_NUL_STRING(a[ODPDT_NAME]);
+ if (err)
+ goto error_free_skb;
+
+ return skb;
+
+error_free_skb:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+
+static struct datapath *lookup_datapath(struct odp_datapath *odp_datapath, struct nlattr *a[ODPDT_MAX + 1])
+{
+ WARN_ON_ONCE(!mutex_is_locked(&dp_mutex));
+
+ if (!a[ODPDT_NAME]) {
+ struct datapath *dp;
+
+ dp = get_dp(odp_datapath->dp_idx);
+ if (!dp)
+ return ERR_PTR(-ENODEV);
+ mutex_lock(&dp->mutex);
+ return dp;
+ } else {
+ struct datapath *dp;
+ struct vport *vport;
+ int dp_idx;
+
+ vport_lock();
+ vport = vport_locate(nla_data(a[ODPDT_NAME]));
+ dp_idx = vport && vport->port_no == ODPP_LOCAL ? vport->dp->dp_idx : -1;
+ vport_unlock();
+
+ if (dp_idx < 0)
+ return ERR_PTR(-ENODEV);
+
+ dp = get_dp(dp_idx);
+ mutex_lock(&dp->mutex);
+ return dp;
+ }
+}
+
+static void change_datapath(struct datapath *dp, struct nlattr *a[ODPDT_MAX + 1])
+{
+ if (a[ODPDT_IPV4_FRAGS])
+ dp->drop_frags = nla_get_u32(a[ODPDT_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
+ if (a[ODPDT_SAMPLING])
+ dp->sflow_probability = nla_get_u32(a[ODPDT_SAMPLING]);
+}
+
+static int new_datapath(struct odp_datapath __user *uodp_datapath)
+{
+ struct nlattr *a[ODPDT_MAX + 1];
+ struct odp_datapath *odp_datapath;
+ struct vport_parms parms;
+ struct sk_buff *skb;
+ struct datapath *dp;
+ struct vport *vport;
+ int dp_idx;
+ int err;
+ int i;
+
+ skb = copy_datapath_from_user(uodp_datapath, a);
+ err = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto err;
+ odp_datapath = (struct odp_datapath *)skb->data;
+
+ err = -EINVAL;
+ if (!a[ODPDT_NAME])
+ goto err_free_skb;
+
+ rtnl_lock();
+ mutex_lock(&dp_mutex);
+ err = -ENODEV;
+ if (!try_module_get(THIS_MODULE))
+ goto err_unlock_dp_mutex;
+
+ dp_idx = odp_datapath->dp_idx;
+ if (dp_idx < 0) {
+ err = -EFBIG;
+ for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
+ if (get_dp(dp_idx))
+ continue;
+ err = 0;
+ break;
+ }
+ } else if (dp_idx < ARRAY_SIZE(dps))
+ err = get_dp(dp_idx) ? -EBUSY : 0;
+ else
+ err = -EINVAL;
+ if (err)
+ goto err_put_module;
+
+ err = -ENOMEM;
+ dp = kzalloc(sizeof *dp, GFP_KERNEL);
+ if (dp == NULL)
+ goto err_put_module;
+ INIT_LIST_HEAD(&dp->port_list);
+ mutex_init(&dp->mutex);
+ mutex_lock(&dp->mutex);
+ dp->dp_idx = dp_idx;
+ for (i = 0; i < DP_N_QUEUES; i++)
+ skb_queue_head_init(&dp->queues[i]);
+ init_waitqueue_head(&dp->waitqueue);
+
+ /* Initialize kobject for bridge. This will be added as
+ * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
+ dp->ifobj.kset = NULL;
+ kobject_init(&dp->ifobj, &dp_ktype);
+
+ /* Allocate table. */
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
+ if (!dp->table)
+ goto err_free_dp;
+
+ /* Set up our datapath device. */
+ parms.name = nla_data(a[ODPDT_NAME]);
+ parms.type = ODPVT_INTERNAL;
+ parms.options = NULL;
+ parms.dp = dp;
+ parms.port_no = ODPP_LOCAL;
+ vport_lock();
+ vport = new_vport(&parms);
+ vport_unlock();
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ if (err == -EBUSY)
+ err = -EEXIST;
+
+ goto err_destroy_table;
+ }
+
+ dp->drop_frags = 0;
+ dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ if (!dp->stats_percpu) {
+ err = -ENOMEM;
+ goto err_destroy_local_port;
+ }
+
+ change_datapath(dp, a);
+
+ rcu_assign_pointer(dps[dp_idx], dp);
+ dp_sysfs_add_dp(dp);
+
+ mutex_unlock(&dp->mutex);
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+
+ return 0;
+
+err_destroy_local_port:
+ dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
+err_destroy_table:
+ tbl_destroy(get_table_protected(dp), NULL);
+err_free_dp:
+ mutex_unlock(&dp->mutex);
+ kfree(dp);
+err_put_module:
+ module_put(THIS_MODULE);
+err_unlock_dp_mutex:
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+err_free_skb:
+ kfree_skb(skb);
+err:
+ return err;
+}
+
+static int modify_datapath(unsigned int cmd, struct odp_datapath __user *uodp_datapath)
+{
+ struct nlattr *a[ODPDT_MAX + 1];
+ struct datapath *dp;
+ struct sk_buff *skb;
+ int err;
+
+ skb = copy_datapath_from_user(uodp_datapath, a);
+ err = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto exit;
+
+ rtnl_lock();
+ mutex_lock(&dp_mutex);
+ dp = lookup_datapath((struct odp_datapath *)skb->data, a);
+ err = PTR_ERR(dp);
+ if (IS_ERR(dp))
+ goto exit_free;
+
+ if (cmd == ODP_DP_DEL)
+ destroy_dp(dp);
+ else {
+ change_datapath(dp, a);
+ mutex_unlock(&dp->mutex);
+ }
+ err = 0;
+
+exit_free:
+ kfree_skb(skb);
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+exit:
+ return err;
+}
+
+static int get_datapath(struct odp_datapath __user *uodp_datapath)
+{
+ struct nlattr *a[ODPDT_MAX + 1];
+ struct odp_datapath *odp_datapath;
+ struct datapath *dp;
+ struct sk_buff *skb;
+ int err;
+
+ skb = copy_datapath_from_user(uodp_datapath, a);
+ err = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto exit;
+ odp_datapath = (struct odp_datapath *)skb->data;
+
+ mutex_lock(&dp_mutex);
+ dp = lookup_datapath(odp_datapath, a);
+ mutex_unlock(&dp_mutex);
+
+ err = PTR_ERR(dp);
+ if (IS_ERR(dp))
+ goto exit_free;
+
+ err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len);
+ mutex_unlock(&dp->mutex);
+exit_free:
+ kfree_skb(skb);
+exit:
+ return err;
+}
+
+static int dump_datapath(struct odp_datapath __user *uodp_datapath)
+{
+ struct nlattr *a[ODPDT_MAX + 1];
+ struct odp_datapath *odp_datapath;
+ struct sk_buff *skb;
+ u32 dp_idx;
+ int err;
+
+ skb = copy_datapath_from_user(uodp_datapath, a);
+ err = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto exit;
+ odp_datapath = (struct odp_datapath *)skb->data;
+
+ mutex_lock(&dp_mutex);
+ for (dp_idx = odp_datapath->dp_idx; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
+ struct datapath *dp = get_dp(dp_idx);
+ if (!dp)
+ continue;
+
+ mutex_lock(&dp->mutex);
+ mutex_unlock(&dp_mutex);
+ err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len);
+ mutex_unlock(&dp->mutex);
+ goto exit_free;
+ }
+ mutex_unlock(&dp_mutex);
+ err = -ENODEV;
+
+exit_free:
+ kfree_skb(skb);
+exit:
+ return err;
+}
+
static struct nla_policy vport_policy[ODPPT_MAX + 1] = {
[ODPPT_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[ODPPT_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
@@ -1632,18 +1866,26 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
{
int dp_idx = iminor(f->f_dentry->d_inode);
struct datapath *dp;
- int drop_frags, listeners;
- unsigned int sflow_probability;
+ int listeners;
int err;
/* Handle commands with special locking requirements up front. */
switch (cmd) {
- case ODP_DP_CREATE:
- err = create_dp(dp_idx, (char __user *)argp);
+ case ODP_DP_NEW:
+ err = new_datapath((struct odp_datapath __user *)argp);
+ goto exit;
+
+ case ODP_DP_GET:
+ err = get_datapath((struct odp_datapath __user *)argp);
+ goto exit;
+
+ case ODP_DP_DEL:
+ case ODP_DP_SET:
+ err = modify_datapath(cmd, (struct odp_datapath __user *)argp);
goto exit;
- case ODP_DP_DESTROY:
- err = destroy_dp(dp_idx);
+ case ODP_DP_DUMP:
+ err = dump_datapath((struct odp_datapath __user *)argp);
goto exit;
case ODP_VPORT_NEW:
@@ -1694,25 +1936,6 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
goto exit;
switch (cmd) {
- case ODP_DP_STATS:
- err = get_dp_stats(dp, (struct odp_stats __user *)argp);
- break;
-
- case ODP_GET_DROP_FRAGS:
- err = put_user(dp->drop_frags, (int __user *)argp);
- break;
-
- case ODP_SET_DROP_FRAGS:
- err = get_user(drop_frags, (int __user *)argp);
- if (err)
- break;
- err = -EINVAL;
- if (drop_frags != 0 && drop_frags != 1)
- break;
- dp->drop_frags = drop_frags;
- err = 0;
- break;
-
case ODP_GET_LISTEN_MASK:
err = put_user(get_listen_mask(f), (int __user *)argp);
break;
@@ -1728,16 +1951,6 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
set_listen_mask(f, listeners);
break;
- case ODP_GET_SFLOW_PROBABILITY:
- err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
- break;
-
- case ODP_SET_SFLOW_PROBABILITY:
- err = get_user(sflow_probability, (unsigned int __user *)argp);
- if (!err)
- dp->sflow_probability = sflow_probability;
- break;
-
default:
err = -ENOIOCTLCMD;
break;
@@ -1922,24 +2135,22 @@ static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned
int err;
switch (cmd) {
- case ODP_DP_DESTROY:
case ODP_FLOW_FLUSH:
/* Ioctls that don't need any translation at all. */
return openvswitch_ioctl(f, cmd, argp);
- case ODP_DP_CREATE:
+ case ODP_DP_NEW:
+ case ODP_DP_GET:
+ case ODP_DP_DEL:
+ case ODP_DP_SET:
+ case ODP_DP_DUMP:
case ODP_VPORT_NEW:
case ODP_VPORT_DEL:
case ODP_VPORT_GET:
case ODP_VPORT_SET:
case ODP_VPORT_DUMP:
- case ODP_DP_STATS:
- case ODP_GET_DROP_FRAGS:
- case ODP_SET_DROP_FRAGS:
case ODP_SET_LISTEN_MASK:
case ODP_GET_LISTEN_MASK:
- case ODP_SET_SFLOW_PROBABILITY:
- case ODP_GET_SFLOW_PROBABILITY:
/* Ioctls that just need their pointer argument extended. */
return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
}
diff --git a/datapath/vport.c b/datapath/vport.c
index 0fa4dd6..4db1f01 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -87,6 +87,7 @@ do { \
} \
} while (0)
+
/**
* vport_init - initialize vport subsystem
*
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index fbd32d1..cb4ab26 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -70,14 +70,11 @@
#include <linux/if_link.h>
#include <linux/netlink.h>
-#define ODP_MAX 256 /* Maximum number of datapaths. */
-
-#define ODP_DP_CREATE _IO('O', 0)
-#define ODP_DP_DESTROY _IO('O', 1)
-#define ODP_DP_STATS _IOW('O', 2, struct odp_stats)
-
-#define ODP_GET_DROP_FRAGS _IOW('O', 3, int)
-#define ODP_SET_DROP_FRAGS _IOR('O', 4, int)
+#define ODP_DP_NEW _IOWR('O', 0, struct odp_datapath)
+#define ODP_DP_DEL _IOR('O', 1, struct odp_datapath)
+#define ODP_DP_GET _IOWR('O', 2, struct odp_datapath)
+#define ODP_DP_SET _IOWR('O', 3, struct odp_datapath)
+#define ODP_DP_DUMP _IOWR('O', 4, struct odp_datapath)
#define ODP_GET_LISTEN_MASK _IOW('O', 5, int)
#define ODP_SET_LISTEN_MASK _IOR('O', 6, int)
@@ -96,8 +93,38 @@
#define ODP_EXECUTE _IOR('O', 18, struct odp_execute)
-#define ODP_SET_SFLOW_PROBABILITY _IOR('O', 19, int)
-#define ODP_GET_SFLOW_PROBABILITY _IOW('O', 20, int)
+/**
+ * struct odp_datapath - header with basic information about a datapath.
+ * @dp_idx: Datapath index (-1 to make a request not specific to a datapath).
+ * @len: Length of this structure plus the Netlink attributes following it.
+ * @total_len: Total space available for kernel reply to request.
+ *
+ * Followed by &struct nlattr attributes, whose types are drawn from %ODPDT_*,
+ * up to a length of @len bytes including the &struct odp_datapath header.
+ */
+struct odp_datapath {
+ int32_t dp_idx;
+ uint32_t len;
+ uint32_t total_len;
+};
+
+enum odp_datapath_type {
+ ODPDT_UNSPEC,
+ ODPDT_NAME, /* name of dp_ifidx netdev */
+ ODPDT_STATS, /* struct odp_stats */
+ ODPDT_IPV4_FRAGS, /* 32-bit enum odp_frag_handling */
+ ODPDT_SAMPLING, /* 32-bit fraction of packets to sample */
+ __ODPDT_MAX
+};
+
+#define ODPDT_MAX (__ODPDT_MAX - 1)
+
+/* Values for ODPDT_IPV4_FRAGS. */
+enum odp_frag_handling {
+ ODP_DP_FRAG_UNSPEC,
+ ODP_DP_FRAG_ZERO, /* Treat IP fragments as transport port 0. */
+ ODP_DP_FRAG_DROP /* Drop IP fragments. */
+};
struct odp_stats {
uint64_t n_frags; /* Number of dropped IP fragments. */
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 852b468..38a06ea 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -47,11 +47,32 @@
#include "rtnetlink-link.h"
#include "shash.h"
#include "svec.h"
+#include "unaligned.h"
#include "util.h"
#include "vlog.h"
VLOG_DEFINE_THIS_MODULE(dpif_linux);
+struct dpif_linux_dp {
+ /* ioctl command argument. */
+ int cmd;
+
+ /* struct odp_datapath header. */
+ uint32_t dp_idx;
+
+ /* Attributes. */
+ const char *name; /* ODPDT_NAME. */
+ struct odp_stats stats; /* ODPDT_STATS. */
+ enum odp_frag_handling ipv4_frags; /* ODPDT_IPV4_FRAGS. */
+ const uint32_t *sampling; /* ODPDT_SAMPLING. */
+};
+
+static void dpif_linux_dp_init(struct dpif_linux_dp *);
+static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
+ struct dpif_linux_dp *reply,
+ struct ofpbuf **bufp);
+static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
+ struct ofpbuf **bufp);
/* Datapath interface for the openvswitch Linux kernel module. */
struct dpif_linux {
struct dpif dpif;
@@ -75,7 +96,6 @@ static int lookup_internal_device(const char *name, int *dp_idx, int *port_no);
static int open_dpif(const struct dpif_linux_vport *local_vport,
struct dpif **);
static int get_openvswitch_major(void);
-static int create_minor(const char *name, int minor);
static int open_minor(int minor, int *fdp);
static int make_openvswitch_device(int minor, char **fnp);
static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
@@ -91,9 +111,10 @@ dpif_linux_cast(const struct dpif *dpif)
static int
dpif_linux_enumerate(struct svec *all_dps)
{
+ struct dpif_linux_dp request, reply;
+ struct ofpbuf *buf;
int major;
- int error;
- int i;
+ int err;
/* Check that the Open vSwitch module is loaded. */
major = get_openvswitch_major();
@@ -101,22 +122,19 @@ dpif_linux_enumerate(struct svec *all_dps)
return -major;
}
- error = 0;
- for (i = 0; i < ODP_MAX; i++) {
- struct dpif *dpif;
+ dpif_linux_dp_init(&request);
+ request.cmd = ODP_DP_DUMP;
+ for (;
+ !(err = dpif_linux_dp_transact(&request, &reply, &buf));
+ request.dp_idx = reply.dp_idx + 1) {
char devname[16];
- int retval;
-
- sprintf(devname, "dp%d", i);
- retval = dpif_open(devname, "system", &dpif);
- if (!retval) {
- svec_add(all_dps, devname);
- dpif_uninit(dpif, true);
- } else if (retval != ENODEV && !error) {
- error = retval;
- }
+
+ sprintf(devname, "dp%d", reply.dp_idx);
+ svec_add(all_dps, devname);
+
+ ofpbuf_delete(buf);
}
- return error;
+ return err == ENODEV ? 0 : err;
}
static int
@@ -131,27 +149,20 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
minor = !strncmp(name, "dp", 2)
&& isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
if (create) {
- if (minor >= 0) {
- error = create_minor(name, minor);
- if (error) {
- return error;
- }
- } else {
- /* Scan for unused minor number. */
- for (minor = 0; ; minor++) {
- if (minor >= ODP_MAX) {
- /* All datapath numbers in use. */
- return ENOBUFS;
- }
+ struct dpif_linux_dp request, reply;
+ struct ofpbuf *buf;
+ int error;
- error = create_minor(name, minor);
- if (!error) {
- break;
- } else if (error != EBUSY) {
- return error;
- }
- }
+ dpif_linux_dp_init(&request);
+ request.cmd = ODP_DP_NEW;
+ request.dp_idx = minor;
+ request.name = name;
+ error = dpif_linux_dp_transact(&request, &reply, &buf);
+ if (error) {
+ return error;
}
+ minor = reply.dp_idx;
+ ofpbuf_delete(buf);
}
dpif_linux_vport_init(&request);
@@ -245,25 +256,41 @@ dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
static int
dpif_linux_destroy(struct dpif *dpif_)
{
- return do_ioctl(dpif_, ODP_DP_DESTROY, NULL);
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_linux_dp dp;
+
+ dpif_linux_dp_init(&dp);
+ dp.cmd = ODP_DP_DEL;
+ dp.dp_idx = dpif->minor;
+ return dpif_linux_dp_transact(&dp, NULL, NULL);
}
static int
dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
{
- memset(stats, 0, sizeof *stats);
- return do_ioctl(dpif_, ODP_DP_STATS, stats);
+ struct dpif_linux_dp dp;
+ struct ofpbuf *buf;
+ int error;
+
+ error = dpif_linux_dp_get(dpif_, &dp, &buf);
+ if (!error) {
+ *stats = dp.stats;
+ ofpbuf_delete(buf);
+ }
+ return error;
}
static int
dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
{
- int drop_frags;
+ struct dpif_linux_dp dp;
+ struct ofpbuf *buf;
int error;
- error = do_ioctl(dpif_, ODP_GET_DROP_FRAGS, &drop_frags);
+ error = dpif_linux_dp_get(dpif_, &dp, &buf);
if (!error) {
- *drop_fragsp = drop_frags & 1;
+ *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
+ ofpbuf_delete(buf);
}
return error;
}
@@ -271,8 +298,14 @@ dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
static int
dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
{
- int drop_frags_int = drop_frags;
- return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int);
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_linux_dp dp;
+
+ dpif_linux_dp_init(&dp);
+ dp.cmd = ODP_DP_SET;
+ dp.dp_idx = dpif->minor;
+ dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
+ return dpif_linux_dp_transact(&dp, NULL, NULL);
}
static int
@@ -659,13 +692,29 @@ static int
dpif_linux_get_sflow_probability(const struct dpif *dpif_,
uint32_t *probability)
{
- return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability);
+ struct dpif_linux_dp dp;
+ struct ofpbuf *buf;
+ int error;
+
+ error = dpif_linux_dp_get(dpif_, &dp, &buf);
+ if (!error) {
+ *probability = dp.sampling ? *dp.sampling : 0;
+ ofpbuf_delete(buf);
+ }
+ return error;
}
static int
dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
{
- return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability);
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_linux_dp dp;
+
+ dpif_linux_dp_init(&dp);
+ dp.cmd = ODP_DP_SET;
+ dp.dp_idx = dpif->minor;
+ dp.sampling = &probability;
+ return dpif_linux_dp_transact(&dp, NULL, NULL);
}
static int
@@ -1002,22 +1051,6 @@ get_major(const char *target)
}
static int
-create_minor(const char *name, int minor)
-{
- int error;
- int fd;
-
- error = open_minor(minor, &fd);
- if (error) {
- return error;
- }
-
- error = ioctl(fd, ODP_DP_CREATE, name) ? errno : 0;
- close(fd);
- return error;
-}
-
-static int
open_minor(int minor, int *fdp)
{
int error;
@@ -1058,6 +1091,24 @@ dpif_linux_port_changed(const struct rtnetlink_link_change *change,
dpif->change_error = true;
}
}
+
+static int
+get_dp0_fd(int *dp0_fdp)
+{
+ static int dp0_fd = -1;
+ if (dp0_fd < 0) {
+ int error;
+ int fd;
+
+ error = open_minor(0, &fd);
+ if (error) {
+ return error;
+ }
+ dp0_fd = fd;
+ }
+ *dp0_fdp = dp0_fd;
+ return 0;
+}
/* Parses the contents of 'buf', which contains a "struct odp_vport" followed
* by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
@@ -1188,25 +1239,21 @@ dpif_linux_vport_transact(const struct dpif_linux_vport *request,
struct dpif_linux_vport *reply,
struct ofpbuf **bufp)
{
- static int dp0_fd = -1;
struct ofpbuf *buf = NULL;
int error;
+ int fd;
assert((reply != NULL) == (bufp != NULL));
- if (dp0_fd < 0) {
- int fd;
- error = open_minor(0, &fd);
- if (error) {
- goto error;
- }
- dp0_fd = fd;
+ error = get_dp0_fd(&fd);
+ if (error) {
+ goto error;
}
buf = ofpbuf_new(1024);
dpif_linux_vport_to_ofpbuf(request, buf);
- error = ioctl(dp0_fd, request->cmd, buf->data) ? errno : 0;
+ error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
if (error) {
goto error;
}
@@ -1247,4 +1294,155 @@ dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
return dpif_linux_vport_transact(&request, reply, bufp);
}
+
+/* Parses the contents of 'buf', which contains a "struct odp_datapath"
+ * followed by Netlink attributes, into 'dp'. Returns 0 if successful,
+ * otherwise a positive errno value.
+ *
+ * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
+ * while 'dp' is still in use. */
+static int
+dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
+{
+ static const struct nl_policy odp_datapath_policy[] = {
+ [ODPDT_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
+ [ODPDT_STATS] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct odp_stats),
+ .max_len = sizeof(struct odp_stats),
+ .optional = true },
+ [ODPDT_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
+ [ODPDT_SAMPLING] = { .type = NL_A_U32, .optional = true },
+ };
+
+ struct odp_datapath *odp_dp;
+ struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];
+
+ dpif_linux_dp_init(dp);
+
+ if (!nl_policy_parse(buf, sizeof *odp_dp, odp_datapath_policy,
+ a, ARRAY_SIZE(odp_datapath_policy))) {
+ return EINVAL;
+ }
+ odp_dp = buf->data;
+
+ dp->dp_idx = odp_dp->dp_idx;
+ dp->name = nl_attr_get_string(a[ODPDT_NAME]);
+ if (a[ODPDT_STATS]) {
+ /* Can't use structure assignment because Netlink doesn't ensure
+ * sufficient alignment for 64-bit members. */
+ memcpy(&dp->stats, nl_attr_get(a[ODPDT_STATS]), sizeof dp->stats);
+ }
+ if (a[ODPDT_IPV4_FRAGS]) {
+ dp->ipv4_frags = nl_attr_get_u32(a[ODPDT_IPV4_FRAGS]);
+ }
+ if (a[ODPDT_SAMPLING]) {
+ dp->sampling = nl_attr_get(a[ODPDT_SAMPLING]);
+ }
+ return 0;
+}
+
+/* Appends to 'buf' (which must initially be empty) a "struct odp_datapath"
+ * followed by Netlink attributes corresponding to 'dp'. */
+static void
+dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
+{
+ struct odp_datapath *odp_dp;
+
+ ofpbuf_reserve(buf, sizeof odp_dp);
+
+ if (dp->name) {
+ nl_msg_put_string(buf, ODPPT_NAME, dp->name);
+ }
+
+ /* Skip ODPDT_STATS since we never have a reason to serialize it. */
+
+ if (dp->ipv4_frags) {
+ nl_msg_put_u32(buf, ODPDT_IPV4_FRAGS, dp->ipv4_frags);
+ }
+
+ if (dp->sampling) {
+ nl_msg_put_u32(buf, ODPDT_SAMPLING, *dp->sampling);
+ }
+
+ odp_dp = ofpbuf_push_uninit(buf, sizeof *odp_dp);
+ odp_dp->dp_idx = dp->dp_idx;
+ odp_dp->len = buf->size;
+ odp_dp->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
+}
+
+/* Clears 'dp' to "empty" values. */
+void
+dpif_linux_dp_init(struct dpif_linux_dp *dp)
+{
+ memset(dp, 0, sizeof *dp);
+ dp->dp_idx = -1;
+}
+
+/* Executes 'request' in the kernel datapath. If the command fails, returns a
+ * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
+ * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
+ * result of the command is expected to be an odp_datapath also, which is
+ * decoded and stored in '*reply' and '*bufp'. The caller must free '*bufp'
+ * when the reply is no longer needed ('reply' will contain pointers into
+ * '*bufp'). */
+int
+dpif_linux_dp_transact(const struct dpif_linux_dp *request,
+ struct dpif_linux_dp *reply, struct ofpbuf **bufp)
+{
+ struct ofpbuf *buf = NULL;
+ int error;
+ int fd;
+
+ assert((reply != NULL) == (bufp != NULL));
+
+ error = get_dp0_fd(&fd);
+ if (error) {
+ goto error;
+ }
+
+ buf = ofpbuf_new(1024);
+ dpif_linux_dp_to_ofpbuf(request, buf);
+
+ error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
+ if (error) {
+ goto error;
+ }
+
+ if (bufp) {
+ buf->size = ((struct odp_datapath *) buf->data)->len;
+ error = dpif_linux_dp_from_ofpbuf(reply, buf);
+ if (error) {
+ goto error;
+ }
+ *bufp = buf;
+ } else {
+ ofpbuf_delete(buf);
+ }
+ return 0;
+
+error:
+ ofpbuf_delete(buf);
+ if (bufp) {
+ memset(reply, 0, sizeof *reply);
+ *bufp = NULL;
+ }
+ return error;
+}
+
+/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
+ * The caller must free '*bufp' when the reply is no longer needed ('reply'
+ * will contain pointers into '*bufp'). */
+int
+dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
+ struct ofpbuf **bufp)
+{
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_linux_dp request;
+
+ dpif_linux_dp_init(&request);
+ request.cmd = ODP_DP_GET;
+ request.dp_idx = dpif->minor;
+
+ return dpif_linux_dp_transact(&request, reply, bufp);
+}
--
1.7.1
More information about the dev
mailing list