[ovs-discuss] [NIC-19 7/8] brcompat: Move BRCTL_GET_BRIDGES, BRCTL_GET_PORT_LIST into userspace.

Ben Pfaff blp at nicira.com
Thu Jul 30 22:27:48 UTC 2009


The Citrix QA scripts assume that "brctl show" will show a topology as if
the Linux bridge were still in use; that is, as if there were one bridge
per VLAN and as if bonds were network devices of their own instead of
separate devices.  However, we were showing the datapath topology, i.e.
all VLANs and bond devices lumped together into a single datapath.  This
commit fixes the VLAN end of the problem, by moving the implementation of
the ioctls that brctl uses into userspace in ovs-brcompatd and putting the
necessary translation logic into ovs-brcompatd.

By itself, this commit does not fix the problem for bonds: the port name
for a bond does not (normally) under Open vSwitch exist as an actual
Linux network device, and thus it has no ifindex, and so ovs-brcompatd
can't pass it back to the kernel to report to brctl.  This fix will have
to wait for another commit.

Bug NIC-19.
---
 datapath/brcompat.c                    |   98 +++++++++-----------
 include/openvswitch/brcompat-netlink.h |    7 +-
 vswitchd/ovs-brcompatd.c               |  162 +++++++++++++++++++++++++++++---
 3 files changed, 199 insertions(+), 68 deletions(-)

diff --git a/datapath/brcompat.c b/datapath/brcompat.c
index db0a70f..ab2f70a 100644
--- a/datapath/brcompat.c
+++ b/datapath/brcompat.c
@@ -46,36 +46,6 @@ static u32 brc_seq;		     /* Sequence number for current op. */
 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
 static int brc_send_simple_command(struct sk_buff *);
 
-static int
-get_dp_ifindices(int *indices, int num)
-{
-	int i, index = 0;
-
-	rcu_read_lock();
-	for (i=0; i < ODP_MAX && index < num; i++) {
-		struct datapath *dp = get_dp(i);
-		if (!dp)
-			continue;
-		indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
-	}
-	rcu_read_unlock();
-
-	return index;
-}
-
-static void
-get_port_ifindices(struct datapath *dp, int *ifindices, int num)
-{
-	struct net_bridge_port *p;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu (p, &dp->port_list, node) {
-		if (p->port_no < num)
-			ifindices[p->port_no] = p->dev->ifindex;
-	}
-	rcu_read_unlock();
-}
-
 static struct sk_buff *
 brc_make_request(int op, const char *bridge, const char *port)
 {
@@ -84,7 +54,8 @@ brc_make_request(int op, const char *bridge, const char *port)
 		goto error;
 
 	genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
-	NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+	if (bridge)
+		NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
 	if (port)
 		NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
 	return skb;
@@ -127,26 +98,57 @@ static int brc_add_del_bridge(char __user *uname, int add)
 	return brc_send_simple_command(request);
 }
 
-static int brc_get_bridges(int __user *uindices, int n)
+static int brc_get_indices(int op, const char *br_name,
+			   int __user *uindices, int n)
 {
+	struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+	struct sk_buff *request, *reply;
 	int *indices;
 	int ret;
+	int len;
 
+	if (n < 0)
+		return -EINVAL;
 	if (n >= 2048)
 		return -ENOMEM;
 
-	indices = kcalloc(n, sizeof(int), GFP_KERNEL);
-	if (indices == NULL)
+	request = brc_make_request(op, br_name, NULL);
+	if (!request)
 		return -ENOMEM;
 
-	n = get_dp_ifindices(indices, n);
+	reply = brc_send_command(request, attrs);
+	ret = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit;
+
+	ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+	if (ret < 0)
+		goto exit_free_skb;
+
+	ret = -EINVAL;
+	if (!attrs[BRC_GENL_A_IFINDEXES])
+		goto exit_free_skb;
+
+	len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
+	indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
+	if (len % sizeof(int))
+		goto exit_free_skb;
 
+	n = min_t(int, n, len / sizeof(int));
 	ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
 
-	kfree(indices);
+exit_free_skb:
+	kfree_skb(reply);
+exit:
 	return ret;
 }
 
+/* Called with br_ioctl_mutex. */
+static int brc_get_bridges(int __user *uindices, int n)
+{
+	return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
+}
+
 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
 static int
 old_deviceless(void __user *uarg)
@@ -239,26 +241,14 @@ brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
 static int
 brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
 {
-	struct dp_dev *dp_dev = netdev_priv(dev);
-	struct datapath *dp = dp_dev->dp;
-	int *indices;
-
-	if (num < 0)
-		return -EINVAL;
-	if (num == 0)
-		num = 256;
-	if (num > DP_MAX_PORTS)
-		num = DP_MAX_PORTS;
+	int retval;
 
-	indices = kcalloc(num, sizeof(int), GFP_KERNEL);
-	if (indices == NULL)
-		return -ENOMEM;
+	rtnl_unlock();
+	retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
+				 uindices, num);
+	rtnl_lock();
 
-	get_port_ifindices(dp, indices, num);
-	if (copy_to_user(uindices, indices, num * sizeof(int)))
-		num = -EFAULT;
-	kfree(indices);
-	return num;
+	return retval;
 }
 
 /*
diff --git a/include/openvswitch/brcompat-netlink.h b/include/openvswitch/brcompat-netlink.h
index 694bdcc..7d5ac4c 100644
--- a/include/openvswitch/brcompat-netlink.h
+++ b/include/openvswitch/brcompat-netlink.h
@@ -47,8 +47,8 @@ enum {
 	BRC_GENL_A_UNSPEC,
 
 	/*
-	 * "K:" messages appear in messages from the kernel to userspace.
-	 * "U:" messages appear in messages from userspace to the kernel.
+	 * "K:" attributes appear in messages from the kernel to userspace.
+	 * "U:" attributes appear in messages from userspace to the kernel.
 	 */
 
 	/* BRC_GENL_C_DP_ADD, BRC_GENL_C_DP_DEL. */
@@ -75,6 +75,7 @@ enum {
 
 	/* BRC_GENL_C_DP_RESULT. */
 	BRC_GENL_A_FDB_DATA,    /* U: FDB records. */
+	BRC_GENL_A_IFINDEXES,   /* U: "int" ifindexes of bridges or ports. */
 
 	__BRC_GENL_A_MAX,
 	BRC_GENL_A_MAX = __BRC_GENL_A_MAX - 1
@@ -96,6 +97,8 @@ enum brc_genl_command {
 	BRC_GENL_C_QUERY_MC,	/* U: Get multicast group for brcompat. */
 	BRC_GENL_C_SET_PROC,	/* U: Set contents of file in /proc. */
 	BRC_GENL_C_FDB_QUERY,	/* K: Read records from forwarding database. */
+	BRC_GENL_C_GET_BRIDGES, /* K: Get ifindexes of all bridges. */
+	BRC_GENL_C_GET_PORTS,   /* K: Get ifindexes of all ports on a bridge. */
 
 	__BRC_GENL_C_MAX,
 	BRC_GENL_C_MAX = __BRC_GENL_C_MAX - 1
diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c
index 83701a7..7a244a0 100644
--- a/vswitchd/ovs-brcompatd.c
+++ b/vswitchd/ovs-brcompatd.c
@@ -240,21 +240,15 @@ rewrite_and_reload_config(void)
     return 0;
 }
 
-/* Get all the interfaces for 'bridge' as 'ifaces', breaking bonded interfaces
- * down into their constituent parts.
- *
- * If 'vlan' < 0, all interfaces on 'bridge' are reported.  If 'vlan' == 0,
- * then only interfaces for trunk ports or ports with implicit VLAN 0 are
- * reported.  If 'vlan' > 0, only interfaces with implict VLAN 'vlan' are
- * reported.  */
 static void
-get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan)
+do_get_bridge_parts(const char *bridge, struct svec *parts, int vlan,
+                    bool break_down_bonds)
 {
     struct svec ports;
     int i;
 
     svec_init(&ports);
-    svec_init(ifaces);
+    svec_init(parts);
     cfg_get_all_keys(&ports, "bridge.%s.port", bridge);
     for (i = 0; i < ports.n; i++) {
         const char *port_name = ports.names[i];
@@ -267,19 +261,44 @@ get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan)
                 continue;
             }
         }
-        if (cfg_has_section("bonding.%s", port_name)) {
+        if (break_down_bonds && cfg_has_section("bonding.%s", port_name)) {
             struct svec slaves;
             svec_init(&slaves);
             cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name);
-            svec_append(ifaces, &slaves);
+            svec_append(parts, &slaves);
             svec_destroy(&slaves);
         } else {
-            svec_add(ifaces, port_name);
+            svec_add(parts, port_name);
         }
     }
     svec_destroy(&ports);
 }
 
+/* Get all the interfaces for 'bridge' as 'ifaces', breaking bonded interfaces
+ * down into their constituent parts.
+ *
+ * If 'vlan' < 0, all interfaces on 'bridge' are reported.  If 'vlan' == 0,
+ * then only interfaces for trunk ports or ports with implicit VLAN 0 are
+ * reported.  If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are
+ * reported.  */
+static void
+get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan)
+{
+    do_get_bridge_parts(bridge, ifaces, vlan, true);
+}
+
+/* Get all the ports for 'bridge' as 'ports'.  Bonded ports are reported under
+ * the bond name, not broken down into their constituent interfaces.
+ *
+ * If 'vlan' < 0, all ports on 'bridge' are reported.  If 'vlan' == 0, then
+ * only trunk ports or ports with implicit VLAN 0 are reported.  If 'vlan' > 0,
+ * only port with implicit VLAN 'vlan' are reported.  */
+static void
+get_bridge_ports(const char *bridge, struct svec *ports, int vlan)
+{
+    do_get_bridge_parts(bridge, ports, vlan, false);
+}
+
 /* Go through the configuration file and remove any ports that no longer
  * exist associated with a bridge. */
 static void
@@ -740,6 +759,117 @@ handle_fdb_query_cmd(struct ofpbuf *buffer)
     return 0;
 }
 
+static void
+send_ifindex_reply(uint32_t seq, struct svec *ifaces)
+{
+    struct ofpbuf *reply;
+    const char *iface;
+    size_t n_indices;
+    int *indices;
+    size_t i;
+
+    /* Make sure that any given interface only occurs once.  This shouldn't
+     * happen, but who knows what people put into their configuration files. */
+    svec_sort_unique(ifaces);
+
+    /* Convert 'ifaces' into ifindexes. */
+    n_indices = 0;
+    indices = xmalloc(ifaces->n * sizeof *indices);
+    SVEC_FOR_EACH (i, iface, ifaces) {
+        int ifindex = if_nametoindex(iface);
+        if (ifindex) {
+            indices[n_indices++] = ifindex;
+        }
+    }
+
+    /* Compose and send reply. */
+    reply = compose_reply(seq, 0);
+    nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES,
+                      indices, n_indices * sizeof *indices);
+    send_reply(reply);
+
+    /* Free memory. */
+    free(indices);
+}
+
+static int
+handle_get_bridges_cmd(struct ofpbuf *buffer)
+{
+    struct svec bridges;
+    const char *br_name;
+    size_t i;
+
+    uint32_t seq;
+
+    int error;
+
+    /* Parse Netlink command.
+     *
+     * The command doesn't actually have any arguments, but we need the
+     * sequence number to send the reply. */
+    error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL);
+    if (error) {
+        return error;
+    }
+
+    /* Get all the real bridges and all the fake ones. */
+    cfg_read();
+    cfg_get_subsections(&bridges, "bridge");
+    SVEC_FOR_EACH (i, br_name, &bridges) {
+        const char *iface_name;
+        struct svec ifaces;
+        size_t j;
+
+        get_bridge_ifaces(br_name, &ifaces, -1);
+        SVEC_FOR_EACH (j, iface_name, &ifaces) {
+            if (cfg_get_bool(0, "iface.%s.fake-bridge", iface_name)) {
+                svec_add(&bridges, iface_name);
+            }
+        }
+        svec_destroy(&ifaces);
+    }
+
+    send_ifindex_reply(seq, &bridges);
+    svec_destroy(&bridges);
+
+    return 0;
+}
+
+static int
+handle_get_ports_cmd(struct ofpbuf *buffer)
+{
+    uint32_t seq;
+
+    const char *linux_bridge;
+    char *ovs_bridge;
+    int br_vlan;
+
+    struct svec ports;
+
+    int error;
+
+    /* Parse Netlink command. */
+    error = parse_command(buffer, &seq, &linux_bridge, NULL, NULL, NULL);
+    if (error) {
+        return error;
+    }
+
+    cfg_read();
+    error = linux_bridge_to_ovs_bridge(linux_bridge, &ovs_bridge, &br_vlan);
+    if (error) {
+        send_simple_reply(seq, error);
+        return error;
+    }
+    get_bridge_ports(ovs_bridge, &ports, br_vlan);
+
+    send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */
+
+    svec_destroy(&ports);
+    free(ovs_bridge);
+
+    return 0;
+}
+
 static int
 brc_recv_update(void)
 {
@@ -802,6 +932,14 @@ brc_recv_update(void)
         retval = handle_fdb_query_cmd(buffer);
         break;
 
+    case BRC_GENL_C_GET_BRIDGES:
+        retval = handle_get_bridges_cmd(buffer);
+        break;
+
+    case BRC_GENL_C_GET_PORTS:
+        retval = handle_get_ports_cmd(buffer);
+        break;
+
     default:
         retval = EPROTO;
     }
-- 
1.6.3.3





More information about the discuss mailing list