[ovs-dev] [PATCH v3] datapath: Add support for namespace.
Pravin B Shelar
pshelar at nicira.com
Fri Jan 6 03:59:25 UTC 2012
v2-v3:
- Simplified dp-destroy / net exit.
- moved rt_genid() to tunnel.c
v1-v2:
- Removed get/put_net().
- Simplified net registration compat code.
- Fixed capwap, removed VPORT_F_INIT_NS.
- Various coding style fixes.
--8<--------------------------cut here-------------------------->8-
Following patch adds support for Linux net-namespaces. Now we can
have independent OVS instance in each net-ns.
Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
Issue #7821
---
datapath/Modules.mk | 1 +
datapath/brcompat.c | 55 +++---
datapath/datapath.c | 227 ++++++++++++++-------
datapath/datapath.h | 23 ++-
datapath/dp_notify.c | 7 +-
datapath/genl_exec.c | 2 +
datapath/linux/Modules.mk | 2 +
datapath/linux/compat/include/net/genetlink.h | 11 +
datapath/linux/compat/include/net/net_namespace.h | 43 ++++-
datapath/linux/compat/include/net/netns/generic.h | 15 ++
datapath/linux/compat/net_namespace.c | 76 +++++++
datapath/tunnel.c | 190 +++++++++--------
datapath/tunnel.h | 28 +++-
datapath/vport-capwap.c | 88 +++++----
datapath/vport-capwap.h | 31 +++
datapath/vport-gre.c | 12 +-
datapath/vport-internal_dev.c | 8 +-
datapath/vport-netdev.c | 2 +-
datapath/vport-patch.c | 23 ++-
datapath/vport.c | 53 +++++-
datapath/vport.h | 16 ++-
21 files changed, 650 insertions(+), 263 deletions(-)
create mode 100644 datapath/linux/compat/include/net/netns/generic.h
create mode 100644 datapath/linux/compat/net_namespace.c
create mode 100644 datapath/vport-capwap.h
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 4d17568..96e7f7d 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -38,6 +38,7 @@ openvswitch_headers = \
tunnel.h \
vlan.h \
vport.h \
+ vport-capwap.h \
vport-generic.h \
vport-internal_dev.h \
vport-netdev.h
diff --git a/datapath/brcompat.c b/datapath/brcompat.c
index 10a75ec..ae4ccb7 100644
--- a/datapath/brcompat.c
+++ b/datapath/brcompat.c
@@ -50,9 +50,10 @@ static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
static struct sk_buff *brc_reply; /* Reply from userspace. */
static u32 brc_seq; /* Sequence number for current op. */
-static struct sk_buff *brc_send_command(struct sk_buff *,
+static struct sk_buff *brc_send_command(struct net *,
+ struct sk_buff *,
struct nlattr **attrs);
-static int brc_send_simple_command(struct sk_buff *);
+static int brc_send_simple_command(struct net *, struct sk_buff *);
static struct sk_buff *brc_make_request(int op, const char *bridge,
const char *port)
@@ -74,13 +75,13 @@ error:
return NULL;
}
-static int brc_send_simple_command(struct sk_buff *request)
+static int brc_send_simple_command(struct net *net, struct sk_buff *request)
{
struct nlattr *attrs[BRC_GENL_A_MAX + 1];
struct sk_buff *reply;
int error;
- reply = brc_send_command(request, attrs);
+ reply = brc_send_command(net, request, attrs);
if (IS_ERR(reply))
return PTR_ERR(reply);
@@ -89,7 +90,7 @@ static int brc_send_simple_command(struct sk_buff *request)
return -error;
}
-static int brc_add_del_bridge(char __user *uname, int add)
+static int brc_add_del_bridge(struct net *net, char __user *uname, int add)
{
struct sk_buff *request;
char name[IFNAMSIZ];
@@ -106,10 +107,11 @@ static int brc_add_del_bridge(char __user *uname, int add)
if (!request)
return -ENOMEM;
- return brc_send_simple_command(request);
+ return brc_send_simple_command(net, request);
}
-static int brc_get_indices(int op, const char *br_name,
+static int brc_get_indices(struct net *net,
+ int op, const char *br_name,
int __user *uindices, int n)
{
struct nlattr *attrs[BRC_GENL_A_MAX + 1];
@@ -127,7 +129,7 @@ static int brc_get_indices(int op, const char *br_name,
if (!request)
return -ENOMEM;
- reply = brc_send_command(request, attrs);
+ reply = brc_send_command(net, request, attrs);
ret = PTR_ERR(reply);
if (IS_ERR(reply))
goto exit;
@@ -155,13 +157,13 @@ exit:
}
/* Called with br_ioctl_mutex. */
-static int brc_get_bridges(int __user *uindices, int n)
+static int brc_get_bridges(struct net *net, int __user *uindices, int n)
{
- return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
+ return brc_get_indices(net, BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
}
/* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */
-static int old_deviceless(void __user *uarg)
+static int old_deviceless(struct net *net, void __user *uarg)
{
unsigned long args[3];
@@ -170,12 +172,12 @@ static int old_deviceless(void __user *uarg)
switch (args[0]) {
case BRCTL_GET_BRIDGES:
- return brc_get_bridges((int __user *)args[1], args[2]);
+ return brc_get_bridges(net, (int __user *)args[1], args[2]);
case BRCTL_ADD_BRIDGE:
- return brc_add_del_bridge((void __user *)args[1], 1);
+ return brc_add_del_bridge(net, (void __user *)args[1], 1);
case BRCTL_DEL_BRIDGE:
- return brc_add_del_bridge((void __user *)args[1], 0);
+ return brc_add_del_bridge(net, (void __user *)args[1], 0);
}
return -EOPNOTSUPP;
@@ -185,19 +187,21 @@ static int old_deviceless(void __user *uarg)
static int
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
+{
+ struct net *net = NULL;
#else
brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
-#endif
{
+#endif
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
- return old_deviceless(uarg);
+ return old_deviceless(net, uarg);
case SIOCBRADDBR:
- return brc_add_del_bridge(uarg, 1);
+ return brc_add_del_bridge(net, uarg, 1);
case SIOCBRDELBR:
- return brc_add_del_bridge(uarg, 0);
+ return brc_add_del_bridge(net, uarg, 0);
}
return -EOPNOTSUPP;
@@ -212,7 +216,7 @@ static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- port = __dev_get_by_index(&init_net, port_ifindex);
+ port = __dev_get_by_index(dev_net(dev), port_ifindex);
if (!port)
return -EINVAL;
@@ -224,7 +228,7 @@ static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
return -ENOMEM;
rtnl_unlock();
- err = brc_send_simple_command(request);
+ err = brc_send_simple_command(dev_net(dev), request);
rtnl_lock();
return err;
@@ -255,7 +259,7 @@ static int brc_get_port_list(struct net_device *dev, int __user *uindices,
int retval;
rtnl_unlock();
- retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
+ retval = brc_get_indices(dev_net(dev), BRC_GENL_C_GET_PORTS, dev->name,
uindices, num);
rtnl_lock();
@@ -288,7 +292,7 @@ static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
rtnl_unlock();
- reply = brc_send_command(request, attrs);
+ reply = brc_send_command(dev_net(dev), request, attrs);
retval = PTR_ERR(reply);
if (IS_ERR(reply))
goto exit;
@@ -378,6 +382,7 @@ static struct genl_family brc_genl_family = {
.name = BRC_GENL_FAMILY_NAME,
.version = 1,
.maxattr = BRC_GENL_A_MAX,
+ SET_NETNSOK
};
static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
@@ -456,7 +461,8 @@ static struct genl_ops brc_genl_ops[] = {
},
};
-static struct sk_buff *brc_send_command(struct sk_buff *request,
+static struct sk_buff *brc_send_command(struct net *net,
+ struct sk_buff *request,
struct nlattr **attrs)
{
unsigned long int flags;
@@ -475,7 +481,8 @@ static struct sk_buff *brc_send_command(struct sk_buff *request,
nlmsg_end(request, nlmsg_hdr(request));
/* Send message. */
- error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
+ error = genlmsg_multicast_netns(net, request, 0,
+ brc_mc_group.id, GFP_KERNEL);
if (error < 0)
goto error;
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 17871e4..76bf8f5 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -68,6 +68,8 @@
static void rehash_flow_table(struct work_struct *work);
static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+int ovs_net_id __read_mostly;
+
int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
EXPORT_SYMBOL(ovs_dp_ioctl_hook);
@@ -88,25 +90,21 @@ EXPORT_SYMBOL(ovs_dp_ioctl_hook);
* each other.
*/
-/* Global list of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-static LIST_HEAD(dps);
-
static struct vport *new_vport(const struct vport_parms *);
-static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
const struct dp_upcall_info *);
-static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+static int queue_userspace_packet(struct net *, int dp_ifindex,
+ struct sk_buff *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
-static struct datapath *get_dp(int dp_ifindex)
+static struct datapath *get_dp(struct net *net, int dp_ifindex)
{
struct datapath *dp = NULL;
struct net_device *dev;
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+ dev = dev_get_by_index_rcu(net, dp_ifindex);
if (dev) {
struct vport *vport = ovs_internal_dev_get_vport(dev);
if (vport)
@@ -218,11 +216,11 @@ static void dp_ifinfo_notify(int event, struct vport *port)
}
}
- rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ rtnl_notify(skb, port->net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
return;
err:
- rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
+ rtnl_set_sk_err(port->net, RTNLGRP_LINK, err);
out:
kfree_skb(skb);
}
@@ -243,6 +241,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
free_percpu(dp->stats_percpu);
+ release_net(dp->net);
kobject_put(&dp->ifobj);
}
@@ -338,7 +337,8 @@ static struct genl_family dp_packet_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
- .maxattr = OVS_PACKET_ATTR_MAX
+ .maxattr = OVS_PACKET_ATTR_MAX,
+ SET_NETNSOK
};
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
@@ -362,9 +362,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
forward_ip_summed(skb, true);
if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(dp->net, dp_ifindex, skb, upcall_info);
else
- err = queue_gso_packets(dp_ifindex, skb, upcall_info);
+ err = queue_gso_packets(dp->net, dp_ifindex, skb, upcall_info);
if (err)
goto err;
@@ -380,7 +380,8 @@ err:
return err;
}
-static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+static int queue_gso_packets(struct net *net, int dp_ifindex,
+ struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
struct dp_upcall_info later_info;
@@ -395,7 +396,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
if (err)
break;
@@ -425,7 +426,8 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
return err;
}
-static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+static int queue_userspace_packet(struct net *net, int dp_ifindex,
+ struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
@@ -480,7 +482,7 @@ static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
skb_copy_and_csum_dev(skb, nla_data(nla));
- err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+ err = genlmsg_unicast(net, user_skb, upcall_info->pid);
out:
kfree_skb(nskb);
@@ -488,15 +490,10 @@ out:
}
/* Called with genl_mutex. */
-static int flush_flows(int dp_ifindex)
+static int flush_flows(struct datapath *dp)
{
struct flow_table *old_table;
struct flow_table *new_table;
- struct datapath *dp;
-
- dp = get_dp(dp_ifindex);
- if (!dp)
- return -ENODEV;
old_table = genl_dereference(dp->table);
new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
@@ -780,7 +777,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->priority = flow->key.phy.priority;
rcu_read_lock();
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -854,7 +851,8 @@ static struct genl_family dp_flow_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
- .maxattr = OVS_FLOW_ATTR_MAX
+ .maxattr = OVS_FLOW_ATTR_MAX,
+ SET_NETNSOK
};
static struct genl_multicast_group ovs_dp_flow_multicast_group = {
@@ -1003,7 +1001,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
}
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
error = -ENODEV;
if (!dp)
goto error;
@@ -1104,7 +1102,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
ovs_dp_flow_multicast_group.id, info->nlhdr,
GFP_KERNEL);
else
- netlink_set_err(INIT_NET_GENL_SOCK, 0,
+ netlink_set_err(GENL_SOCK(dp->net), 0,
ovs_dp_flow_multicast_group.id,
PTR_ERR(reply));
return 0;
@@ -1133,7 +1131,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
@@ -1162,16 +1160,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
int err;
int key_len;
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
if (!a[OVS_FLOW_ATTR_KEY])
- return flush_flows(ovs_header->dp_ifindex);
+ return flush_flows(dp);
+
err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
if (err)
return err;
- dp = get_dp(ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
-
table = genl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
if (!flow)
@@ -1200,7 +1199,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
struct flow_table *table;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
@@ -1264,7 +1263,8 @@ static struct genl_family dp_datapath_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
- .maxattr = OVS_DP_ATTR_MAX
+ .maxattr = OVS_DP_ATTR_MAX,
+ SET_NETNSOK
};
static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
@@ -1326,18 +1326,19 @@ static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
}
/* Called with genl_mutex and optionally with RTNL lock also. */
-static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
+static struct datapath *lookup_datapath(struct net *net,
+ struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
struct datapath *dp;
if (!a[OVS_DP_ATTR_NAME])
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
else {
struct vport *vport;
rcu_read_lock();
- vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
+ vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
rcu_read_unlock();
}
@@ -1351,6 +1352,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
+ struct ovs_net *ovs_net;
int err;
err = -EINVAL;
@@ -1388,6 +1390,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
goto err_destroy_table;
}
+ dp->net = hold_net(sock_net(skb->sk));
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1412,7 +1415,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(reply))
goto err_destroy_local_port;
- list_add_tail(&dp->list_node, &dps);
+ ovs_net = net_generic(dp->net, ovs_net_id);
+ list_add_tail(&dp->list_node, &ovs_net->dps);
ovs_dp_sysfs_add_dp(dp);
rtnl_unlock();
@@ -1437,10 +1441,21 @@ err_unlock_rtnl:
err:
return err;
}
+static void __dp_destroy(struct datapath *dp)
+{
+ struct vport *vport, *next_vport;
+ list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
+ if (vport->port_no != OVSP_LOCAL)
+ ovs_dp_detach_port(vport);
+
+ ovs_dp_sysfs_del_dp(dp);
+ list_del(&dp->list_node);
+ ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+
+}
static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct vport *vport, *next_vport;
struct sk_buff *reply;
struct datapath *dp;
int err;
@@ -1450,7 +1465,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit;
rtnl_lock();
- dp = lookup_datapath(info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto exit_unlock;
@@ -1461,13 +1476,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(reply))
goto exit_unlock;
- list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
- if (vport->port_no != OVSP_LOCAL)
- ovs_dp_detach_port(vport);
-
- ovs_dp_sysfs_del_dp(dp);
- list_del(&dp->list_node);
- ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+ __dp_destroy(dp);
/* rtnl_unlock() will wait until all the references to devices that
* are pending unregistration have been dropped. We do it here to
@@ -1501,7 +1510,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- dp = lookup_datapath(info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp))
return PTR_ERR(dp);
@@ -1509,7 +1518,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
info->snd_seq, OVS_DP_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
- netlink_set_err(INIT_NET_GENL_SOCK, 0,
+ netlink_set_err(GENL_SOCK(dp->net), 0,
ovs_dp_datapath_multicast_group.id, err);
return 0;
}
@@ -1531,7 +1540,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- dp = lookup_datapath(info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp))
return PTR_ERR(dp);
@@ -1545,11 +1554,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
struct datapath *dp;
int skip = cb->args[0];
int i = 0;
- list_for_each_entry(dp, &dps, list_node) {
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
if (i < skip)
continue;
if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
@@ -1608,7 +1618,8 @@ static struct genl_family dp_vport_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
- .maxattr = OVS_VPORT_ATTR_MAX
+ .maxattr = OVS_VPORT_ATTR_MAX,
+ SET_NETNSOK
};
struct genl_multicast_group ovs_dp_vport_multicast_group = {
@@ -1680,14 +1691,15 @@ static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
}
/* Called with RTNL lock or RCU read lock. */
-static struct vport *lookup_vport(struct ovs_header *ovs_header,
+static struct vport *lookup_vport(struct net *net,
+ struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
{
struct datapath *dp;
struct vport *vport;
if (a[OVS_VPORT_ATTR_NAME]) {
- vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
+ vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
if (!vport)
return ERR_PTR(-ENODEV);
return vport;
@@ -1697,7 +1709,7 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header,
if (port_no >= DP_MAX_PORTS)
return ERR_PTR(-EFBIG);
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (!dp)
return ERR_PTR(-ENODEV);
@@ -1745,7 +1757,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto exit;
rtnl_lock();
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto exit_unlock;
@@ -1820,7 +1832,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit;
rtnl_lock();
- vport = lookup_vport(info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
@@ -1841,7 +1853,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
OVS_VPORT_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
- netlink_set_err(INIT_NET_GENL_SOCK, 0,
+ netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
ovs_dp_vport_multicast_group.id, err);
return 0;
}
@@ -1867,7 +1879,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit;
rtnl_lock();
- vport = lookup_vport(info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
@@ -1907,7 +1919,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit;
rcu_read_lock();
- vport = lookup_vport(ovs_header, a);
+ vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
@@ -1935,7 +1947,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
u32 port_no;
int retval;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
@@ -2047,15 +2059,20 @@ error:
static int __rehash_flow_table(void *dummy)
{
struct datapath *dp;
+ struct net *net;
- list_for_each_entry(dp, &dps, list_node) {
- struct flow_table *old_table = genl_dereference(dp->table);
- struct flow_table *new_table;
+ for_each_net(net) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- new_table = ovs_flow_tbl_rehash(old_table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ struct flow_table *old_table = genl_dereference(dp->table);
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_rehash(old_table);
+ if (!IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->table, new_table);
+ ovs_flow_tbl_deferred_destroy(old_table);
+ }
}
}
return 0;
@@ -2067,6 +2084,58 @@ static void rehash_flow_table(struct work_struct *work)
schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
}
+static void dp_destroy_all(struct ovs_net *ovs_net)
+{
+ struct datapath *dp, *dp_next;
+ LIST_HEAD(dp_kill_list);
+
+ rtnl_lock();
+ list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) {
+ __dp_destroy(dp);
+ list_add_tail(&dp->list_node, &dp_kill_list);
+ }
+ rtnl_unlock();
+
+ list_for_each_entry(dp, &dp_kill_list, list_node) {
+ call_rcu(&dp->rcu, destroy_dp_rcu);
+ module_put(THIS_MODULE);
+ }
+}
+
+static int ovs_init_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ int err;
+
+ err = ovs_tnl_init_net(net);
+ if (err)
+ return err;
+
+ err = ovs_vport_init_net(net);
+ if (err) {
+ ovs_tnl_exit_net(net);
+ return err;
+ }
+ INIT_LIST_HEAD(&ovs_net->dps);
+ return 0;
+}
+
+static void ovs_exit_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ dp_destroy_all(ovs_net);
+ ovs_vport_exit_net(net);
+ ovs_tnl_exit_net(net);
+}
+
+static struct pernet_operations ovs_net_ops = {
+ .init = ovs_init_net,
+ .exit = ovs_exit_net,
+ .id = &ovs_net_id,
+ .size = sizeof(struct ovs_net),
+};
+
static int __init dp_init(void)
{
struct sk_buff *dummy_skb;
@@ -2085,21 +2154,21 @@ static int __init dp_init(void)
if (err)
goto error_genl_exec;
- err = ovs_tnl_init();
- if (err)
- goto error_wq;
-
err = ovs_flow_init();
if (err)
- goto error_tnl_exit;
+ goto error_wq;
err = ovs_vport_init();
if (err)
goto error_flow_exit;
+ err = register_pernet_device(&ovs_net_ops);
+ if (err < 0)
+ goto error_vport_exit;
+
err = register_netdevice_notifier(&ovs_dp_device_notifier);
if (err)
- goto error_vport_exit;
+ goto error_netns_exit;
err = dp_register_genl();
if (err < 0)
@@ -2111,12 +2180,12 @@ static int __init dp_init(void)
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_netns_exit:
+ unregister_pernet_device(&ovs_net_ops);
error_vport_exit:
ovs_vport_exit();
error_flow_exit:
ovs_flow_exit();
-error_tnl_exit:
- ovs_tnl_exit();
error_wq:
ovs_workqueues_exit();
error_genl_exec:
@@ -2131,9 +2200,9 @@ static void dp_cleanup(void)
rcu_barrier();
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+ unregister_pernet_device(&ovs_net_ops);
ovs_vport_exit();
ovs_flow_exit();
- ovs_tnl_exit();
ovs_workqueues_exit();
genl_exec_exit();
}
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 27151b9..d5a8e41 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -29,10 +29,11 @@
#include "checksum.h"
#include "compat.h"
-#include "flow.h"
#include "dp_sysfs.h"
+#include "flow.h"
+#include "tunnel.h"
#include "vlan.h"
-
+#include "vport.h"
struct vport;
#define DP_MAX_PORTS 1024
@@ -69,6 +70,7 @@ struct dp_stats_percpu {
* @port_list: List of all ports in @ports in arbitrary order. RTNL required
* to iterate or modify.
* @stats_percpu: Per-CPU datapath statistics.
+ * @net: Reference to net namespace.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -87,6 +89,9 @@ struct datapath {
/* Stats. */
struct dp_stats_percpu __percpu *stats_percpu;
+
+ /* Network namespace ref. */
+ struct net *net;
};
/**
@@ -131,6 +136,17 @@ struct dp_upcall_info {
u32 pid;
};
+struct ovs_net {
+ /* Per Network namespace list of datapaths to enable dumping them
+ * all out. Protected by genl_mutex.
+ */
+ struct list_head dps;
+ /* Per network namespace data for tnl. */
+ struct tnl_net tnl_net;
+ /* Per network namespace data for vport. */
+ struct vport_net vport_net;
+};
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_multicast_group ovs_dp_vport_multicast_group;
extern int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -145,4 +161,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+
+extern int ovs_net_id;
+
#endif /* datapath.h */
diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c
index d040d46..34654c6 100644
--- a/datapath/dp_notify.c
+++ b/datapath/dp_notify.c
@@ -46,14 +46,15 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
OVS_VPORT_CMD_DEL);
ovs_dp_detach_port(vport);
if (IS_ERR(notify)) {
- netlink_set_err(INIT_NET_GENL_SOCK, 0,
+ netlink_set_err(GENL_SOCK(vport->net), 0,
ovs_dp_vport_multicast_group.id,
PTR_ERR(notify));
break;
}
- genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
- GFP_KERNEL);
+ genlmsg_multicast_netns(vport->net, notify, 0,
+ ovs_dp_vport_multicast_group.id,
+ GFP_KERNEL);
}
break;
diff --git a/datapath/genl_exec.c b/datapath/genl_exec.c
index e579529..66c7f94 100644
--- a/datapath/genl_exec.c
+++ b/datapath/genl_exec.c
@@ -100,6 +100,8 @@ int genl_exec(genl_exec_func_t func, void *data)
genl_exec_function = func;
genl_exec_data = data;
+
+ /* There is no need to send msg to current namespace. */
ret = genlmsg_unicast(&init_net, genlmsg_skb, 0);
if (!ret) {
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 40c3927..ec66dd8 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -7,6 +7,7 @@ openvswitch_sources += \
linux/compat/ip_output-openvswitch.c \
linux/compat/kmemdup.c \
linux/compat/netdevice.c \
+ linux/compat/net_namespace.c \
linux/compat/reciprocal_div.c \
linux/compat/skbuff-openvswitch.c \
linux/compat/time.c \
@@ -62,6 +63,7 @@ openvswitch_headers += \
linux/compat/include/net/netlink.h \
linux/compat/include/net/protocol.h \
linux/compat/include/net/route.h \
+ linux/compat/include/net/netns/generic.h \
linux/compat/genetlink.inc
both_modules += brcompat
diff --git a/datapath/linux/compat/include/net/genetlink.h b/datapath/linux/compat/include/net/genetlink.h
index a1ff7c1..4e7f1b6 100644
--- a/datapath/linux/compat/include/net/genetlink.h
+++ b/datapath/linux/compat/include/net/genetlink.h
@@ -101,6 +101,10 @@ static inline int genlmsg_multicast_flags(struct sk_buff *skb, u32 pid,
}
#endif /* linux kernel < 2.6.19 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+#define genlmsg_multicast_netns(net, skb, pid, grp, flags) \
+ genlmsg_multicast(skb, pid, grp, flags)
+#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
@@ -170,4 +174,11 @@ static inline struct net *genl_info_net(struct genl_info *info)
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
#define genlmsg_unicast(ignore_net, skb, pid) genlmsg_unicast(skb, pid)
#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+#define SET_NETNSOK
+#else
+#define SET_NETNSOK .netnsok = true,
+#endif
+
#endif /* genetlink.h */
diff --git a/datapath/linux/compat/include/net/net_namespace.h b/datapath/linux/compat/include/net/net_namespace.h
index 9ce9fcd..1c25cc6 100644
--- a/datapath/linux/compat/include/net/net_namespace.h
+++ b/datapath/linux/compat/include/net/net_namespace.h
@@ -5,11 +5,48 @@
/* <net/net_namespace.h> exists, go ahead and include it. */
#include_next <net/net_namespace.h>
#endif
-
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
-#define INIT_NET_GENL_SOCK init_net.genl_sock
+#define GENL_SOCK(net) ((net)->genl_sock)
#else
-#define INIT_NET_GENL_SOCK genl_sock
+#define GENL_SOCK(net) (genl_sock)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
+struct net;
+
+static inline struct net *sock_net(const struct sock *sk)
+{
+ return NULL;
+}
+
+#define for_each_net(net) { net = NULL; }
+
+static inline struct net *hold_net(struct net *net)
+{
+ return net;
+}
+
+static inline void release_net(struct net *net)
+{
+}
+
+#endif
+
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,32)
+#define pernet_operations rpl_pernet_operations
+struct pernet_operations {
+ int (*init)(struct net *net);
+ void (*exit)(struct net *net);
+ int *id;
+ size_t size;
+};
+
+#define register_pernet_device rpl_register_pernet_device
+extern int register_pernet_device(struct pernet_operations *);
+
+#define unregister_pernet_device rpl_unregister_pernet_device
+extern void unregister_pernet_device(struct pernet_operations *);
#endif
#endif /* net/net_namespace.h wrapper */
diff --git a/datapath/linux/compat/include/net/netns/generic.h b/datapath/linux/compat/include/net/netns/generic.h
new file mode 100644
index 0000000..fd44470
--- /dev/null
+++ b/datapath/linux/compat/include/net/netns/generic.h
@@ -0,0 +1,15 @@
+#ifndef __NET_NET_NETNS_GENERIC_WRAPPER_H
+#define __NET_NET_NETNS_GENERIC_WRAPPER_H 1
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
+/* <net/netns/generic.h> exists, go ahead and include it. */
+#include_next <net/netns/generic.h>
+#else
+extern void *__ovs_net;
+static inline void *net_generic(const struct net *net, int id)
+{
+ return __ovs_net;
+}
+#endif
+
+#endif /* net/netns/generic.h wrapper */
diff --git a/datapath/linux/compat/net_namespace.c b/datapath/linux/compat/net_namespace.c
new file mode 100644
index 0000000..31c4a6e
--- /dev/null
+++ b/datapath/linux/compat/net_namespace.c
@@ -0,0 +1,76 @@
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32)
+
+#undef pernet_operations
+struct pernet_operations pnet_compat;
+
+struct rpl_pernet_operations *pnet;
+static int __net_init ovs_init_net(struct net *net)
+{
+ int err;
+ void *ovs_net = kzalloc(pnet->size, GFP_KERNEL);
+
+ if (!ovs_net)
+ return -ENOMEM;
+
+ err = net_assign_generic(net, *pnet->id, ovs_net);
+ if (err) {
+ kfree(ovs_net);
+ return err;
+ }
+
+ err = pnet->init(net);
+ if (err) {
+ kfree(ovs_net);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __net_init ovs_exit_net(struct net *net)
+{
+ void *ovs_net = net_generic(net, *pnet->id);
+
+ pnet->exit(net);
+ net_assign_generic(net, *pnet->id, NULL);
+ kfree(ovs_net);
+}
+
+int rpl_register_pernet_device(struct rpl_pernet_operations *rpl_pnet)
+{
+ pnet = rpl_pnet;
+ pnet_compat.init = ovs_init_net;
+ pnet_compat.exit = ovs_exit_net;
+ return register_pernet_gen_subsys(pnet->id, &pnet_compat);
+}
+
+void rpl_unregister_pernet_device(struct rpl_pernet_operations *pnet)
+{
+ unregister_pernet_gen_subsys(*pnet->id, &pnet_compat);
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+void *__ovs_net;
+int rpl_register_pernet_device(struct rpl_pernet_operations *rpl_pnet)
+{
+ __ovs_net = kzalloc(rpl_pnet->size, GFP_KERNEL);
+ if (!__ovs_net)
+ return -ENOMEM;
+
+ rpl_pnet->init(NULL);
+ return 0;
+}
+
+void rpl_unregister_pernet_device(struct rpl_pernet_operations *rpl_pnet)
+{
+ rpl_pnet->exit(NULL);
+ kfree(__ovs_net);
+}
+#endif
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 33d2fe9..1a66db3 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -16,6 +16,8 @@
* 02110-1301, USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
@@ -82,23 +84,12 @@
#define CACHE_DATA_ALIGN 16
#define PORT_TABLE_SIZE 1024
-static struct hlist_head *port_table __read_mostly;
-static int port_table_count;
static void cache_cleaner(struct work_struct *work);
static DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner);
-/*
- * These are just used as an optimization: they don't require any kind of
- * synchronization because we could have just as easily read the value before
- * the port change happened.
- */
-static unsigned int key_local_remote_ports __read_mostly;
-static unsigned int key_remote_ports __read_mostly;
-static unsigned int key_multicast_ports __read_mostly;
-static unsigned int local_remote_ports __read_mostly;
-static unsigned int remote_ports __read_mostly;
-static unsigned int multicast_ports __read_mostly;
+/* Port table count across all namespaces. */
+static int port_table_count;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
#define rt_dst(rt) (rt->dst)
@@ -124,6 +115,12 @@ static struct vport *tnl_vport_to_vport(const struct tnl_vport *tnl_vport)
return vport_from_priv(tnl_vport);
}
+static struct tnl_net *get_tnl_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ return &ovs_net->tnl_net;
+}
+
/* This is analogous to rtnl_dereference for the tunnel cache. It checks that
* cache_lock is held, so it is only for update side code.
*/
@@ -164,12 +161,13 @@ static void free_cache_rcu(struct rcu_head *rcu)
* within an RCU callback. Fortunately this part doesn't require waiting for
* an RCU grace period.
*/
-static void free_mutable_rtnl(struct tnl_mutable_config *mutable)
+static void free_mutable_rtnl(struct net *net,
+ struct tnl_mutable_config *mutable)
{
ASSERT_RTNL();
if (ipv4_is_multicast(mutable->key.daddr) && mutable->mlink) {
struct in_device *in_dev;
- in_dev = inetdev_by_index(&init_net, mutable->mlink);
+ in_dev = inetdev_by_index(net, mutable->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, mutable->key.daddr);
}
@@ -184,7 +182,7 @@ static void assign_config_rcu(struct vport *vport,
old_config = rtnl_dereference(tnl_vport->mutable);
rcu_assign_pointer(tnl_vport->mutable, new_config);
- free_mutable_rtnl(old_config);
+ free_mutable_rtnl(vport->net, old_config);
call_rcu(&old_config->rcu, free_config_rcu);
}
@@ -200,24 +198,25 @@ static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache)
call_rcu(&old_cache->rcu, free_cache_rcu);
}
-static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
+static unsigned int *find_port_pool(struct tnl_net *tnl_net,
+ const struct tnl_mutable_config *mutable)
{
bool is_multicast = ipv4_is_multicast(mutable->key.daddr);
if (mutable->flags & TNL_F_IN_KEY_MATCH) {
if (mutable->key.saddr)
- return &local_remote_ports;
+ return &tnl_net->local_remote_ports;
else if (is_multicast)
- return &multicast_ports;
+ return &tnl_net->multicast_ports;
else
- return &remote_ports;
+ return &tnl_net->remote_ports;
} else {
if (mutable->key.saddr)
- return &key_local_remote_ports;
+ return &tnl_net->key_local_remote_ports;
else if (is_multicast)
- return &key_multicast_ports;
+ return &tnl_net->key_multicast_ports;
else
- return &key_remote_ports;
+ return &tnl_net->key_remote_ports;
}
}
@@ -226,14 +225,15 @@ static u32 port_hash(const struct port_lookup_key *key)
return jhash2((u32 *)key, (PORT_KEY_LEN / sizeof(u32)), 0);
}
-static struct hlist_head *find_bucket(u32 hash)
+static struct hlist_head *find_bucket(struct tnl_net *tnl_net, u32 hash)
{
- return &port_table[(hash & (PORT_TABLE_SIZE - 1))];
+ return &tnl_net->port_table[(hash & (PORT_TABLE_SIZE - 1))];
}
static void port_table_add_port(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_net *tnl_net = get_tnl_net(vport->net);
const struct tnl_mutable_config *mutable;
u32 hash;
@@ -242,30 +242,32 @@ static void port_table_add_port(struct vport *vport)
mutable = rtnl_dereference(tnl_vport->mutable);
hash = port_hash(&mutable->key);
- hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
+ hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(tnl_net, hash));
port_table_count++;
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
+ (*find_port_pool(tnl_net, rtnl_dereference(tnl_vport->mutable)))++;
}
static void port_table_move_port(struct vport *vport,
struct tnl_mutable_config *new_mutable)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_net *tnl_net = get_tnl_net(vport->net);
u32 hash;
hash = port_hash(&new_mutable->key);
hlist_del_init_rcu(&tnl_vport->hash_node);
- hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
+ hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(tnl_net, hash));
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
+ (*find_port_pool(tnl_net, rtnl_dereference(tnl_vport->mutable)))--;
assign_config_rcu(vport, new_mutable);
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
+ (*find_port_pool(tnl_net, rtnl_dereference(tnl_vport->mutable)))++;
}
static void port_table_remove_port(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_net *tnl_net = get_tnl_net(vport->net);
hlist_del_init_rcu(&tnl_vport->hash_node);
@@ -273,10 +275,11 @@ static void port_table_remove_port(struct vport *vport)
if (port_table_count == 0)
cancel_delayed_work_sync(&cache_cleaner_wq);
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
+ (*find_port_pool(tnl_net, rtnl_dereference(tnl_vport->mutable)))--;
}
-static struct vport *port_table_lookup(struct port_lookup_key *key,
+static struct vport *port_table_lookup(struct tnl_net *tnl_net,
+ struct port_lookup_key *key,
const struct tnl_mutable_config **pmutable)
{
struct hlist_node *n;
@@ -284,7 +287,7 @@ static struct vport *port_table_lookup(struct port_lookup_key *key,
u32 hash = port_hash(key);
struct tnl_vport *tnl_vport;
- bucket = find_bucket(hash);
+ bucket = find_bucket(tnl_net, hash);
hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node) {
struct tnl_mutable_config *mutable;
@@ -299,10 +302,11 @@ static struct vport *port_table_lookup(struct port_lookup_key *key,
return NULL;
}
-struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
- int tunnel_type,
+struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
+ __be64 key, int tunnel_type,
const struct tnl_mutable_config **mutable)
{
+ struct tnl_net *tnl_net = get_tnl_net(net);
struct port_lookup_key lookup;
struct vport *vport;
bool is_multicast = ipv4_is_multicast(saddr);
@@ -313,14 +317,14 @@ struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
/* First try for exact match on in_key. */
lookup.in_key = key;
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
- if (!is_multicast && key_local_remote_ports) {
- vport = port_table_lookup(&lookup, mutable);
+ if (!is_multicast && tnl_net->key_local_remote_ports) {
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
}
- if (key_remote_ports) {
+ if (tnl_net->key_remote_ports) {
lookup.saddr = 0;
- vport = port_table_lookup(&lookup, mutable);
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
@@ -330,14 +334,14 @@ struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
/* Then try matches that wildcard in_key. */
lookup.in_key = 0;
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
- if (!is_multicast && local_remote_ports) {
- vport = port_table_lookup(&lookup, mutable);
+ if (!is_multicast && tnl_net->local_remote_ports) {
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
}
- if (remote_ports) {
+ if (tnl_net->remote_ports) {
lookup.saddr = 0;
- vport = port_table_lookup(&lookup, mutable);
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
}
@@ -345,17 +349,17 @@ struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
if (is_multicast) {
lookup.saddr = 0;
lookup.daddr = saddr;
- if (key_multicast_ports) {
+ if (tnl_net->key_multicast_ports) {
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
lookup.in_key = key;
- vport = port_table_lookup(&lookup, mutable);
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
}
- if (multicast_ports) {
+ if (tnl_net->multicast_ports) {
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
lookup.in_key = 0;
- vport = port_table_lookup(&lookup, mutable);
+ vport = port_table_lookup(tnl_net, &lookup, mutable);
if (vport)
return vport;
}
@@ -811,6 +815,13 @@ static void *get_cached_header(const struct tnl_cache *cache)
return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
}
+#ifdef HAVE_RT_GENID
+static inline int rt_genid(struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+#endif
+
static bool check_cache_valid(const struct tnl_cache *cache,
const struct tnl_mutable_config *mutable)
{
@@ -825,7 +836,7 @@ static bool check_cache_valid(const struct tnl_cache *cache,
time_before(jiffies, cache->expiration) &&
#endif
#ifdef HAVE_RT_GENID
- atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
+ rt_genid(dev_net(rt_dst(cache->rt).dev)) == cache->rt->rt_genid &&
#endif
#ifdef HAVE_HH_SEQ
hh->hh_lock.sequence == cache->hh_seq &&
@@ -850,19 +861,25 @@ static void __cache_cleaner(struct tnl_vport *tnl_vport)
static void cache_cleaner(struct work_struct *work)
{
+ struct net *net;
int i;
schedule_cache_cleaner();
rcu_read_lock();
- for (i = 0; i < PORT_TABLE_SIZE; i++) {
- struct hlist_node *n;
- struct hlist_head *bucket;
- struct tnl_vport *tnl_vport;
-
- bucket = &port_table[i];
- hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node)
- __cache_cleaner(tnl_vport);
+ for_each_net(net) {
+ struct tnl_net *tnl_net = get_tnl_net(net);
+
+ for (i = 0; i < PORT_TABLE_SIZE; i++) {
+
+ struct hlist_node *n;
+ struct hlist_head *bucket;
+ struct tnl_vport *tnl_vport;
+
+ bucket = &tnl_net->port_table[i];
+ hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node)
+ __cache_cleaner(tnl_vport);
+ }
}
rcu_read_unlock();
}
@@ -989,7 +1006,8 @@ unlock:
return cache;
}
-static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
+static struct rtable *__find_route(struct net *net,
+ const struct tnl_mutable_config *mutable,
u8 ipproto, u8 tos)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
@@ -1000,7 +1018,7 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
.proto = ipproto };
struct rtable *rt;
- if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
+ if (unlikely(ip_route_output_key(net, &rt, &fl)))
return ERR_PTR(-EADDRNOTAVAIL);
return rt;
@@ -1010,7 +1028,7 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
.flowi4_tos = tos,
.flowi4_proto = ipproto };
- return ip_route_output_key(&init_net, &fl);
+ return ip_route_output_key(net, &fl);
#endif
}
@@ -1031,7 +1049,8 @@ static struct rtable *find_route(struct vport *vport,
} else {
struct rtable *rt;
- rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos);
+ rt = __find_route(vport->net, mutable,
+ tnl_vport->tnl_ops->ipproto, tos);
if (IS_ERR(rt))
return NULL;
@@ -1360,11 +1379,13 @@ static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
/* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
* zeroed. */
-static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops,
+static int tnl_set_config(struct net *net, struct nlattr *options,
+ const struct tnl_ops *tnl_ops,
const struct vport *cur_vport,
struct tnl_mutable_config *mutable)
{
const struct vport *old_vport;
+ struct tnl_net *tnl_net;
const struct tnl_mutable_config *old_mutable;
struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
int err;
@@ -1417,7 +1438,8 @@ static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops,
mutable->tunnel_hlen += sizeof(struct iphdr);
- old_vport = port_table_lookup(&mutable->key, &old_mutable);
+ tnl_net = get_tnl_net(net);
+ old_vport = port_table_lookup(tnl_net, &mutable->key, &old_mutable);
if (old_vport && old_vport != cur_vport)
return -EEXIST;
@@ -1426,7 +1448,7 @@ static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops,
struct net_device *dev;
struct rtable *rt;
- rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos);
+ rt = __find_route(net, mutable, tnl_ops->ipproto, mutable->tos);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt_dst(rt).dev;
@@ -1472,7 +1494,8 @@ struct vport *ovs_tnl_create(const struct vport_parms *parms,
get_random_bytes(&initial_frag_id, sizeof(int));
atomic_set(&tnl_vport->frag_id, initial_frag_id);
- err = tnl_set_config(parms->options, tnl_ops, NULL, mutable);
+ err = tnl_set_config(parms->dp->net, parms->options, tnl_ops,
+ NULL, mutable);
if (err)
goto error_free_mutable;
@@ -1489,7 +1512,7 @@ struct vport *ovs_tnl_create(const struct vport_parms *parms,
return vport;
error_free_mutable:
- free_mutable_rtnl(mutable);
+ free_mutable_rtnl(vport->net, mutable);
kfree(mutable);
error_free_vport:
ovs_vport_free(vport);
@@ -1516,7 +1539,8 @@ int ovs_tnl_set_options(struct vport *vport, struct nlattr *options)
memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
/* Parse the others configured by userspace. */
- err = tnl_set_config(options, tnl_vport->tnl_ops, vport, mutable);
+ err = tnl_set_config(vport->net, options, tnl_vport->tnl_ops,
+ vport, mutable);
if (err)
goto error_free;
@@ -1528,7 +1552,7 @@ int ovs_tnl_set_options(struct vport *vport, struct nlattr *options)
return 0;
error_free:
- free_mutable_rtnl(mutable);
+ free_mutable_rtnl(vport->net, mutable);
kfree(mutable);
error:
return err;
@@ -1576,7 +1600,7 @@ void ovs_tnl_destroy(struct vport *vport)
mutable = rtnl_dereference(tnl_vport->mutable);
port_table_remove_port(vport);
- free_mutable_rtnl(mutable);
+ free_mutable_rtnl(vport->net, mutable);
call_rcu(&tnl_vport->rcu, free_port_rcu);
}
@@ -1619,36 +1643,24 @@ void ovs_tnl_free_linked_skbs(struct sk_buff *skb)
}
}
-int ovs_tnl_init(void)
+int ovs_tnl_init_net(struct net *net)
{
+ struct tnl_net *tnl_net = get_tnl_net(net);
int i;
- port_table = kmalloc(PORT_TABLE_SIZE * sizeof(struct hlist_head *),
- GFP_KERNEL);
- if (!port_table)
+ tnl_net->port_table = kmalloc(PORT_TABLE_SIZE * sizeof(struct hlist_head *),
+ GFP_KERNEL);
+ if (!tnl_net->port_table)
return -ENOMEM;
for (i = 0; i < PORT_TABLE_SIZE; i++)
- INIT_HLIST_HEAD(&port_table[i]);
+ INIT_HLIST_HEAD(&tnl_net->port_table[i]);
return 0;
}
-void ovs_tnl_exit(void)
+void ovs_tnl_exit_net(struct net *net)
{
- int i;
-
- for (i = 0; i < PORT_TABLE_SIZE; i++) {
- struct tnl_vport *tnl_vport;
- struct hlist_head *hash_head;
- struct hlist_node *n;
-
- hash_head = &port_table[i];
- hlist_for_each_entry(tnl_vport, n, hash_head, hash_node) {
- BUG();
- goto out;
- }
- }
-out:
- kfree(port_table);
+ struct tnl_net *tnl_net = get_tnl_net(net);
+ kfree(tnl_net->port_table);
}
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 6865ae6..f470702 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -20,6 +20,9 @@
#define TUNNEL_H 1
#include <linux/version.h>
+#include <net/ip_vs.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "flow.h"
#include "openvswitch/tunnel.h"
@@ -242,6 +245,23 @@ struct tnl_vport {
#endif
};
+struct tnl_net {
+ struct hlist_head *port_table;
+
+ /*
+ * These are just used as an optimization: they don't require any kind
+ * of synchronization because we could have just as easily read the
+ * value before the port change happened.
+ */
+
+ unsigned int key_local_remote_ports;
+ unsigned int key_remote_ports;
+ unsigned int key_multicast_ports;
+ unsigned int local_remote_ports;
+ unsigned int remote_ports;
+ unsigned int multicast_ports;
+};
+
struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
const struct tnl_ops *);
void ovs_tnl_destroy(struct vport *);
@@ -255,16 +275,16 @@ const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
-struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
- int tunnel_type,
+struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
+ __be64 key, int tunnel_type,
const struct tnl_mutable_config **mutable);
bool ovs_tnl_frag_needed(struct vport *vport,
const struct tnl_mutable_config *mutable,
struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
-int ovs_tnl_init(void);
-void ovs_tnl_exit(void);
+int ovs_tnl_init_net(struct net *);
+void ovs_tnl_exit_net(struct net *);
static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
{
return vport_priv(vport);
diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index 6c1b0da..493405b 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -16,6 +16,7 @@
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/net.h>
+#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_frag.h>
@@ -23,6 +24,7 @@
#include <net/protocol.h>
#include <net/udp.h>
+#include "datapath.h"
#include "tunnel.h"
#include "vport.h"
#include "vport-generic.h"
@@ -97,6 +99,12 @@ struct capwaphdr_wsi_key {
__be64 key;
};
+static inline struct capwap_net *ovs_get_capwap_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ return &ovs_net->vport_net.capwap;
+}
+
/* Flag indicating a 64bit key is stored in WSI data field */
#define CAPWAP_WSI_F_KEY64 0x80
@@ -137,8 +145,6 @@ struct frag_skb_cb {
static struct sk_buff *fragment(struct sk_buff *, const struct vport *,
struct dst_entry *dst, unsigned int hlen);
-static void defrag_init(void);
-static void defrag_exit(void);
static struct sk_buff *defrag(struct sk_buff *, bool frag_last);
static void capwap_frag_init(struct inet_frag_queue *, void *match);
@@ -154,13 +160,6 @@ static struct inet_frags frag_state = {
.frag_expire = capwap_frag_expire,
.secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
};
-static struct netns_frags frag_netns_state = {
- .timeout = CAPWAP_FRAG_TIMEOUT,
- .high_thresh = CAPWAP_FRAG_MAX_MEM,
- .low_thresh = CAPWAP_FRAG_PRUNE_MEM,
-};
-
-static struct socket *capwap_rcv_socket;
static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
{
@@ -333,8 +332,8 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
goto out;
iph = ip_hdr(skb);
- vport = ovs_tnl_find_port(iph->daddr, iph->saddr, key, TNL_T_PROTO_CAPWAP,
- &mutable);
+ vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key,
+ TNL_T_PROTO_CAPWAP, &mutable);
if (unlikely(!vport)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
goto error;
@@ -369,42 +368,62 @@ static struct vport *capwap_create(const struct vport_parms *parms)
/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
#define UDP_ENCAP_CAPWAP 10
-static int capwap_init(void)
+static int capwap_init_net(struct net *net)
{
int err;
+ struct capwap_net *capwap_net = ovs_get_capwap_net(net);
struct sockaddr_in sin;
- err = sock_create(AF_INET, SOCK_DGRAM, 0, &capwap_rcv_socket);
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
+ &capwap_net->capwap_rcv_socket);
if (err)
goto error;
+ sk_change_net(capwap_net->capwap_rcv_socket->sk, net);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = htons(CAPWAP_DST_PORT);
- err = kernel_bind(capwap_rcv_socket, (struct sockaddr *)&sin,
+ err = kernel_bind(capwap_net->capwap_rcv_socket, (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
if (err)
goto error_sock;
- udp_sk(capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
- udp_sk(capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
+ udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
+ udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
+
+ capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT;
+ capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM;
+ capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM;
- defrag_init();
+ inet_frags_init_net(&capwap_net->frag_state);
return 0;
error_sock:
- sock_release(capwap_rcv_socket);
+ sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
error:
- pr_warn("cannot register capwap protocol handler\n");
+ pr_warn("cannot register capwap protocol handler : %d\n", err);
return err;
}
+static void capwap_exit_net(struct net *net)
+{
+ struct capwap_net *capwap_net = ovs_get_capwap_net(net);
+
+ inet_frags_exit_net(&capwap_net->frag_state, &frag_state);
+ sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
+}
+
+static int capwap_init(void)
+{
+ inet_frags_init(&frag_state);
+ return 0;
+}
+
static void capwap_exit(void)
{
- defrag_exit();
- sock_release(capwap_rcv_socket);
+ inet_frags_fini(&frag_state);
}
static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to)
@@ -529,13 +548,14 @@ static u32 frag_hash(struct frag_match *match)
frag_state.rnd) & (INETFRAGS_HASHSZ - 1);
}
-static struct frag_queue *queue_find(struct frag_match *match)
+static struct frag_queue *queue_find(struct netns_frags *ns_frag_state,
+ struct frag_match *match)
{
struct inet_frag_queue *ifq;
read_lock(&frag_state.lock);
- ifq = inet_frag_find(&frag_netns_state, &frag_state, match, frag_hash(match));
+ ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match));
if (!ifq)
return NULL;
@@ -710,19 +730,21 @@ static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last)
{
struct iphdr *iph = ip_hdr(skb);
struct capwaphdr *cwh = capwap_hdr(skb);
+ struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev));
+ struct netns_frags *ns_frag_state = &capwap_net->frag_state;
struct frag_match match;
u16 frag_off;
struct frag_queue *fq;
- if (atomic_read(&frag_netns_state.mem) > frag_netns_state.high_thresh)
- inet_frag_evictor(&frag_netns_state, &frag_state);
+ if (atomic_read(&ns_frag_state->mem) > ns_frag_state->high_thresh)
+ inet_frag_evictor(ns_frag_state, &frag_state);
match.daddr = iph->daddr;
match.saddr = iph->saddr;
match.id = cwh->frag_id;
frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK;
- fq = queue_find(&match);
+ fq = queue_find(ns_frag_state, &match);
if (fq) {
spin_lock(&fq->ifq.lock);
skb = frag_queue(fq, skb, frag_off, frag_last);
@@ -737,18 +759,6 @@ static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last)
return NULL;
}
-static void defrag_init(void)
-{
- inet_frags_init(&frag_state);
- inet_frags_init_net(&frag_netns_state);
-}
-
-static void defrag_exit(void)
-{
- inet_frags_exit_net(&frag_netns_state, &frag_state);
- inet_frags_fini(&frag_state);
-}
-
static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_)
{
struct frag_match *match = match_;
@@ -790,6 +800,8 @@ const struct vport_ops ovs_capwap_vport_ops = {
.flags = VPORT_F_TUN_ID,
.init = capwap_init,
.exit = capwap_exit,
+ .init_net = capwap_init_net,
+ .exit_net = capwap_exit_net,
.create = capwap_create,
.destroy = ovs_tnl_destroy,
.set_addr = ovs_tnl_set_addr,
diff --git a/datapath/vport-capwap.h b/datapath/vport-capwap.h
new file mode 100644
index 0000000..5bd7850
--- /dev/null
+++ b/datapath/vport-capwap.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_CAPWAP_H
+#define VPORT_CAPWAP_H 1
+
+#include <linux/net.h>
+
+struct capwap_net {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+ struct socket *capwap_rcv_socket;
+ struct netns_frags frag_state;
+#endif
+};
+
+#endif /* vport-capwap.h */
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 4411cac..aec4849 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -29,6 +29,7 @@
#include <net/ip.h>
#include <net/protocol.h>
+#include "datapath.h"
#include "tunnel.h"
#include "vport.h"
#include "vport-generic.h"
@@ -205,8 +206,8 @@ static void gre_err(struct sk_buff *skb, u32 info)
if (tunnel_hdr_len < 0)
return;
- vport = ovs_tnl_find_port(iph->saddr, iph->daddr, key, TNL_T_PROTO_GRE,
- &mutable);
+ vport = ovs_tnl_find_port(dev_net(skb->dev), iph->saddr, iph->daddr, key,
+ TNL_T_PROTO_GRE, &mutable);
if (!vport)
return;
@@ -342,8 +343,8 @@ static int gre_rcv(struct sk_buff *skb)
goto error;
iph = ip_hdr(skb);
- vport = ovs_tnl_find_port(iph->daddr, iph->saddr, key, TNL_T_PROTO_GRE,
- &mutable);
+ vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, key,
+ TNL_T_PROTO_GRE, &mutable);
if (unlikely(!vport)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
goto error;
@@ -381,6 +382,9 @@ static struct vport *gre_create(const struct vport_parms *parms)
static const struct net_protocol gre_protocol_handlers = {
.handler = gre_rcv,
.err_handler = gre_err,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
+ .netns_ok = 1,
+#endif
};
static int gre_init(void)
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index c56f3b2..d04f433 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -203,7 +203,8 @@ static void do_setup(struct net_device *netdev)
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+ NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
netdev->vlan_features = netdev->features;
@@ -239,9 +240,14 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
goto error_free_vport;
}
+ dev_net_set(netdev_vport->dev, vport->net);
internal_dev = internal_dev_priv(netdev_vport->dev);
internal_dev->vport = vport;
+ /* Restrict bridge port to current netns. */
+ if (vport->port_no == OVSP_LOCAL)
+ netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+
err = register_netdevice(netdev_vport->dev);
if (err)
goto error_free_netdev;
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index 2db5592..eae2fb8 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -139,7 +139,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
netdev_vport = netdev_vport_priv(vport);
- netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
+ netdev_vport->dev = dev_get_by_name(vport->net, parms->name);
if (!netdev_vport->dev) {
err = -ENODEV;
goto error_free_vport;
diff --git a/datapath/vport-patch.c b/datapath/vport-patch.c
index 53b24b0..a7be489 100644
--- a/datapath/vport-patch.c
+++ b/datapath/vport-patch.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
#include "compat.h"
#include "datapath.h"
@@ -49,7 +50,7 @@ struct patch_vport {
static struct hlist_head *peer_table;
#define PEER_HASH_BUCKETS 256
-static void update_peers(const char *name, struct vport *);
+static void update_peers(const char *name, struct net *, struct vport *);
static struct patch_vport *patch_vport_priv(const struct vport *vport)
{
@@ -164,8 +165,9 @@ static struct vport *patch_create(const struct vport_parms *parms)
peer_name = patchconf->peer_name;
hlist_add_head(&patch_vport->hash_node, hash_bucket(peer_name));
- rcu_assign_pointer(patch_vport->peer, ovs_vport_locate(peer_name));
- update_peers(patch_vport->name, vport);
+ rcu_assign_pointer(patch_vport->peer,
+ ovs_vport_locate(vport->net, peer_name));
+ update_peers(patch_vport->name, vport->net, vport);
return vport;
@@ -190,7 +192,7 @@ static void patch_destroy(struct vport *vport)
{
struct patch_vport *patch_vport = patch_vport_priv(vport);
- update_peers(patch_vport->name, NULL);
+ update_peers(patch_vport->name, vport->net, NULL);
hlist_del(&patch_vport->hash_node);
call_rcu(&patch_vport->rcu, free_port_rcu);
}
@@ -216,28 +218,31 @@ static int patch_set_options(struct vport *vport, struct nlattr *options)
hlist_del(&patch_vport->hash_node);
- rcu_assign_pointer(patch_vport->peer, ovs_vport_locate(patchconf->peer_name));
- hlist_add_head(&patch_vport->hash_node, hash_bucket(patchconf->peer_name));
+ rcu_assign_pointer(patch_vport->peer,
+ ovs_vport_locate(vport->net, patchconf->peer_name));
- return 0;
+ hlist_add_head(&patch_vport->hash_node,
+ hash_bucket(patchconf->peer_name));
+ return 0;
error_free:
kfree(patchconf);
error:
return err;
}
-static void update_peers(const char *name, struct vport *vport)
+static void update_peers(const char *name, struct net *net, struct vport *vport)
{
struct hlist_head *bucket = hash_bucket(name);
struct patch_vport *peer_vport;
struct hlist_node *node;
hlist_for_each_entry(peer_vport, node, bucket, hash_node) {
+ struct vport *curr_vport = vport_from_priv(peer_vport);
const char *peer_name;
peer_name = rtnl_dereference(peer_vport->patchconf)->peer_name;
- if (!strcmp(peer_name, name))
+ if (!strcmp(peer_name, name) && curr_vport->net == net)
rcu_assign_pointer(peer_vport->peer, vport);
}
}
diff --git a/datapath/vport.c b/datapath/vport.c
index e9ccdbd..83043ec 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -28,7 +28,9 @@
#include <linux/rtnetlink.h>
#include <linux/compat.h>
#include <linux/version.h>
+#include <net/net_namespace.h>
+#include "datapath.h"
#include "vport.h"
#include "vport-internal_dev.h"
@@ -51,6 +53,45 @@ static int n_vport_types;
static struct hlist_head *dev_table;
#define VPORT_HASH_BUCKETS 1024
+static void vport_exit_net(struct net *net, int max)
+{
+ int i;
+
+ for (i = 0; i < max; i++) {
+ const struct vport_ops *ops = vport_ops_list[i];
+
+ if (ops->exit_net)
+ ops->exit_net(net);
+ }
+}
+
+int ovs_vport_init_net(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < n_vport_types; i++) {
+ const struct vport_ops *ops = vport_ops_list[i];
+ int err;
+
+ err = 0;
+
+ if (ops->init_net)
+ err = ops->init_net(net);
+
+ if (err) {
+ vport_exit_net(net, i);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void ovs_vport_exit_net(struct net *net)
+{
+ vport_exit_net(net, n_vport_types);
+}
+
/**
* ovs_vport_init - initialize vport subsystem
*
@@ -132,14 +173,15 @@ static struct hlist_head *hash_bucket(const char *name)
*
* Must be called with RTNL or RCU read lock.
*/
-struct vport *ovs_vport_locate(const char *name)
+struct vport *ovs_vport_locate(struct net *net, const char *name)
{
struct hlist_head *bucket = hash_bucket(name);
struct vport *vport;
struct hlist_node *node;
hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
- if (!strcmp(name, vport->ops->get_name(vport)))
+ if (!strcmp(name, vport->ops->get_name(vport)) &&
+ vport->net == net)
return vport;
return NULL;
@@ -187,6 +229,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->dp = parms->dp;
vport->port_no = parms->port_no;
+ vport->net = parms->dp->net;
vport->upcall_pid = parms->upcall_pid;
vport->ops = ops;
@@ -241,14 +284,16 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
for (i = 0; i < n_vport_types; i++) {
if (vport_ops_list[i]->type == parms->type) {
+ struct hlist_head *bucket;
+
vport = vport_ops_list[i]->create(parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto out;
}
- hlist_add_head_rcu(&vport->hash_node,
- hash_bucket(vport->ops->get_name(vport)));
+ bucket = hash_bucket(vport->ops->get_name(vport));
+ hlist_add_head_rcu(&vport->hash_node, bucket);
return vport;
}
}
diff --git a/datapath/vport.h b/datapath/vport.h
index 44cf603..44f3bf0 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -20,25 +20,32 @@
#define VPORT_H 1
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
-#include "datapath.h"
+#include "vport-capwap.h"
struct vport;
struct vport_parms;
+struct vport_net {
+ struct capwap_net capwap;
+};
+
/* The following definitions are for users of the vport subsytem: */
int ovs_vport_init(void);
void ovs_vport_exit(void);
+int ovs_vport_init_net(struct net *net);
+void ovs_vport_exit_net(struct net *net);
struct vport *ovs_vport_add(const struct vport_parms *);
void ovs_vport_del(struct vport *);
-struct vport *ovs_vport_locate(const char *name);
+struct vport *ovs_vport_locate(struct net *net, const char *name);
int ovs_vport_set_addr(struct vport *, const unsigned char *);
void ovs_vport_set_stats(struct vport *, struct ovs_vport_stats *);
@@ -89,6 +96,7 @@ struct vport_err_stats {
struct vport {
struct rcu_head rcu;
u16 port_no;
+ struct net *net;
struct datapath *dp;
struct kobject kobj;
char linkname[IFNAMSIZ];
@@ -172,6 +180,10 @@ struct vport_ops {
int (*init)(void);
void (*exit)(void);
+ /* Called at net init and exit respectively. */
+ int (*init_net)(struct net *);
+ void (*exit_net)(struct net *);
+
/* Called with RTNL lock. */
struct vport *(*create)(const struct vport_parms *);
void (*destroy)(struct vport *);
--
1.7.1
More information about the dev
mailing list