[ovs-dev] [PATCH] Update fake bond devices' statistics with the sum of bond slaves' stats.

Ben Pfaff blp at nicira.com
Tue Apr 13 22:16:47 UTC 2010


Needed by XAPI to accurately report bond statistics.

Ugh.

Bug NIC-63.
---
 datapath/dp_dev.c            |   30 ++++++++++----
 datapath/dp_dev.h            |   14 ++++++-
 include/openvswitch/dp_dev.h |   66 +++++++++++++++++++++++++++++
 lib/netdev-linux.c           |   93 +++++++++++++++++++++++++++++++----------
 lib/netdev-provider.h        |    8 ++++
 lib/netdev.c                 |   13 ++++++
 lib/netdev.h                 |    1 +
 vswitchd/bridge.c            |   42 +++++++++++++++++++
 8 files changed, 235 insertions(+), 32 deletions(-)
 create mode 100644 include/openvswitch/dp_dev.h

diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c
index 2bbd6fe..0d307e1 100644
--- a/datapath/dp_dev.c
+++ b/datapath/dp_dev.c
@@ -17,13 +17,7 @@
 
 #include "datapath.h"
 #include "dp_dev.h"
-
-struct pcpu_lstats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long tx_packets;
-	unsigned long tx_bytes;
-};
+#include "openvswitch/dp_dev.h"
 
 struct datapath *dp_dev_get_dp(struct net_device *netdev)
 {
@@ -37,7 +31,10 @@ static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
 	int i;
 
 	stats = &dp_dev->stats;
-	memset(stats, 0, sizeof *stats);
+	stats->rx_bytes = dp_dev->extra_stats.rx_bytes;
+	stats->rx_packets = dp_dev->extra_stats.rx_packets;
+	stats->tx_bytes = dp_dev->extra_stats.tx_bytes;
+	stats->tx_packets = dp_dev->extra_stats.tx_packets;
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
 
@@ -47,6 +44,7 @@ static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
 		stats->tx_bytes   += lb_stats->tx_bytes;
 		stats->tx_packets += lb_stats->tx_packets;
 	}
+
 	return stats;
 }
 
@@ -164,6 +162,22 @@ static void dp_dev_free(struct net_device *netdev)
 
 static int dp_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct dp_dev *dp_dev = dp_dev_priv(dev);
+
+	if (cmd == DP_DEV_SET_STATS) {
+		struct dp_dev_stats stats;
+
+		if (copy_from_user(&stats, ifr->ifr_data, sizeof(stats)))
+			return -EFAULT;
+
+		dp_dev->extra_stats.rx_bytes = stats.rx_bytes;
+		dp_dev->extra_stats.rx_packets = stats.rx_packets;
+		dp_dev->extra_stats.tx_bytes = stats.tx_bytes;
+		dp_dev->extra_stats.tx_packets = stats.tx_packets;
+
+		return 0;
+	}
+
 	if (dp_ioctl_hook)
 		return dp_ioctl_hook(dev, ifr, cmd);
 	return -EOPNOTSUPP;
diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h
index 1fb4394..5ae1fdc 100644
--- a/datapath/dp_dev.h
+++ b/datapath/dp_dev.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -11,6 +11,12 @@
 
 #include <linux/percpu.h>
 
+struct pcpu_lstats {
+	unsigned long rx_packets;
+	unsigned long rx_bytes;
+	unsigned long tx_packets;
+	unsigned long tx_bytes;
+};
 struct dp_dev {
 	struct datapath *dp;
 	int port_no;
@@ -18,6 +24,12 @@ struct dp_dev {
 	struct net_device *dev;
 	struct net_device_stats stats;
 	struct pcpu_lstats *lstats;
+
+	/* This is warty support for XAPI, which does not support summing bond
+	 * device statistics itself.  The array lists ifindexes of network
+	 * devices whose statistics should be added to our own, so that
+	 * e.g. bond0's stats can be the sum of eth0 and eth1.  */
+	struct pcpu_lstats extra_stats;
 };
 
 static inline struct dp_dev *dp_dev_priv(struct net_device *netdev)
diff --git a/include/openvswitch/dp_dev.h b/include/openvswitch/dp_dev.h
new file mode 100644
index 0000000..a35dcdb
--- /dev/null
+++ b/include/openvswitch/dp_dev.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later.  The permission statements for each of these licenses is
+ * given below.  You may license your modifications to this file under either
+ * of these licenses or both.  If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * ----------------------------------------------------------------------
+ */
+
+/* Ioctl for Open vSwitch "internal ports" to support XAPI, which does not
+ * support summing statistics from bond slaves, but still needs to get bond
+ * statistics.
+ *
+ * This is a nasty wart that needs removing. */
+
+#ifndef OPENVSWITCH_DP_DEV_H
+#define OPENVSWITCH_DP_DEV_H 1
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#endif
+
+#include <linux/sockios.h>
+
+struct dp_dev_stats {
+	__u64 rx_packets;
+	__u64 rx_bytes;
+	__u64 tx_packets;
+	__u64 tx_bytes;
+};
+
+#define DP_DEV_SET_STATS        SIOCDEVPRIVATE
+
+#endif /* openvswitch/dp_dev.h */
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 736b588..edd74f5 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -50,6 +50,7 @@
 #include "netlink.h"
 #include "ofpbuf.h"
 #include "openflow/openflow.h"
+#include "openvswitch/dp_dev.h"
 #include "openvswitch/gre.h"
 #include "packets.h"
 #include "poll-loop.h"
@@ -84,7 +85,7 @@ enum {
     VALID_IN6 = 1 << 3,
     VALID_MTU = 1 << 4,
     VALID_CARRIER = 1 << 5,
-    VALID_IS_INTERNAL = 1 << 6
+    VALID_IS_XXX = 1 << 6       /* Represents is_internal and is_tap. */
 };
 
 struct tap_state {
@@ -97,13 +98,16 @@ struct netdev_dev_linux {
     struct shash_node *shash_node;
     unsigned int cache_valid;
 
+    /* The following are figured out "on demand" only.  They are only valid
+     * when the corresponding VALID_* bit in 'cache_valid' is set. */
     int ifindex;
     uint8_t etheraddr[ETH_ADDR_LEN];
     struct in_addr address, netmask;
     struct in6_addr in6;
     int mtu;
     int carrier;
-    bool is_internal;
+    bool is_internal;           /* Is this an openvswitch internal device? */
+    bool is_tap;                /* Is this a tuntap device? */
 
     union {
         struct tap_state tap;
@@ -1219,6 +1223,33 @@ check_for_working_netlink_stats(void)
     }
 }
 
+/* Brings the 'is_internal' and 'is_tap' members of 'netdev_dev' up-to-date. */
+static void
+netdev_linux_update_is_xxx(struct netdev *netdev,
+                           struct netdev_dev_linux *netdev_dev)
+{
+    if (!(netdev_dev->cache_valid & VALID_IS_XXX)) {
+        netdev_dev->is_internal = false;
+        netdev_dev->is_tap = !strcmp(netdev_get_type(netdev), "tap");
+        if (!netdev_dev->is_tap) {
+            struct ethtool_drvinfo drvinfo;
+            int error;
+
+            memset(&drvinfo, 0, sizeof drvinfo);
+            error = netdev_linux_do_ethtool(netdev_get_name(netdev),
+                                            (struct ethtool_cmd *)&drvinfo,
+                                            ETHTOOL_GDRVINFO,
+                                            "ETHTOOL_GDRVINFO");
+
+            if (!error && !strcmp(drvinfo.driver, "openvswitch")) {
+                netdev_dev->is_internal = true;
+            }
+        }
+
+        netdev_dev->cache_valid |= VALID_IS_XXX;
+    }
+}
+
 /* Retrieves current device stats for 'netdev'.
  *
  * XXX All of the members of struct netdev_stats are 64 bits wide, but on
@@ -1229,6 +1260,7 @@ netdev_linux_get_stats(const struct netdev *netdev_,
 {
     struct netdev_dev_linux *netdev_dev =
                                 netdev_dev_linux_cast(netdev_get_dev(netdev_));
+    struct netdev *netdev = (struct netdev *) netdev_;
     static int use_netlink_stats = -1;
     int error;
     struct netdev_stats raw_stats;
@@ -1236,26 +1268,7 @@ netdev_linux_get_stats(const struct netdev *netdev_,
 
     COVERAGE_INC(netdev_get_stats);
 
-    if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) {
-        netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), "tap");
-        if (!netdev_dev->is_internal) {
-            struct ethtool_drvinfo drvinfo;
-
-            memset(&drvinfo, 0, sizeof drvinfo);
-            error = netdev_linux_do_ethtool(netdev_get_name(netdev_),
-                                            (struct ethtool_cmd *)&drvinfo,
-                                            ETHTOOL_GDRVINFO,
-                                            "ETHTOOL_GDRVINFO");
-
-            if (!error) {
-                netdev_dev->is_internal = !strcmp(drvinfo.driver,
-                                                        "openvswitch");
-            }
-        }
-
-        netdev_dev->cache_valid |= VALID_IS_INTERNAL;
-    }
-
+    netdev_linux_update_is_xxx(netdev, netdev_dev);
     if (netdev_dev->is_internal) {
         collect_stats = &raw_stats;
     }
@@ -1278,7 +1291,7 @@ netdev_linux_get_stats(const struct netdev *netdev_,
      * will appear to be swapped relative to the other ports since we are the
      * one sending the data, not a remote computer.  For consistency, we swap
      * them back here. */
-    if (!error && netdev_dev->is_internal) {
+    if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) {
         stats->rx_packets = raw_stats.tx_packets;
         stats->tx_packets = raw_stats.rx_packets;
         stats->rx_bytes = raw_stats.tx_bytes;
@@ -1305,6 +1318,37 @@ netdev_linux_get_stats(const struct netdev *netdev_,
     return error;
 }
 
+static int
+netdev_linux_set_stats(struct netdev *netdev,
+                       const struct netdev_stats *stats)
+{
+    struct netdev_dev_linux *netdev_dev =
+        netdev_dev_linux_cast(netdev_get_dev(netdev));
+    struct dp_dev_stats dp_dev_stats;
+    struct ifreq ifr;
+
+    /* We must reject this call if 'netdev' is not an Open vSwitch internal
+     * port, because the ioctl that we are about to execute is in the "device
+     * private ioctls" range, which means that executing it on a device that
+     * is not the type we expect could do any random thing.
+     *
+     * (Amusingly, these ioctl numbers are commented "THESE IOCTLS ARE
+     * _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X" in linux/sockios.h.  I guess
+     * DaveM is a little behind on that.) */
+    netdev_linux_update_is_xxx(netdev, netdev_dev);
+    if (!netdev_dev->is_internal) {
+        return EOPNOTSUPP;
+    }
+
+    dp_dev_stats.rx_packets = stats->rx_packets;
+    dp_dev_stats.rx_bytes = stats->rx_bytes;
+    dp_dev_stats.tx_packets = stats->tx_packets;
+    dp_dev_stats.tx_bytes = stats->tx_bytes;
+    ifr.ifr_data = (void *) &dp_dev_stats;
+    return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr,
+                                 DP_DEV_SET_STATS, "DP_DEV_SET_STATS");
+}
+
 /* Stores the features supported by 'netdev' into each of '*current',
  * '*advertised', '*supported', and '*peer' that are non-null.  Each value is a
  * bitmap of "enum ofp_port_features" bits, in host byte order.  Returns 0 if
@@ -1988,6 +2032,7 @@ const struct netdev_class netdev_linux_class = {
     netdev_linux_get_ifindex,
     netdev_linux_get_carrier,
     netdev_linux_get_stats,
+    netdev_linux_set_stats,
 
     netdev_linux_get_features,
     netdev_linux_set_advertisements,
@@ -2036,6 +2081,7 @@ const struct netdev_class netdev_tap_class = {
     netdev_linux_get_ifindex,
     netdev_linux_get_carrier,
     netdev_linux_get_stats,
+    netdev_linux_set_stats,
 
     netdev_linux_get_features,
     netdev_linux_set_advertisements,
@@ -2084,6 +2130,7 @@ const struct netdev_class netdev_gre_class = {
     netdev_linux_get_ifindex,
     netdev_linux_get_carrier,
     netdev_linux_get_stats,
+    netdev_linux_set_stats,
 
     netdev_linux_get_features,
     netdev_linux_set_advertisements,
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index e1c18cc..f2a3eaa 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -265,6 +265,14 @@ struct netdev_class {
      * (UINT64_MAX). */
     int (*get_stats)(const struct netdev *netdev, struct netdev_stats *);
 
+    /* Sets the device stats for 'netdev' to 'stats'.
+     *
+     * Most network devices won't support this feature and will set this
+     * function pointer to 0, which is equivalent to returning EOPNOTSUPP.
+     * Additional, some network devices might only allow setting their stats to
+     * 0. */
+    int (*set_stats)(struct netdev *netdev, const struct netdev_stats *);
+
     /* Stores the features supported by 'netdev' into each of '*current',
      * '*advertised', '*supported', and '*peer'.  Each value is a bitmap of
      * "enum ofp_port_features" bits, in host byte order. */
diff --git a/lib/netdev.c b/lib/netdev.c
index 99b5d24..6365807 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -933,6 +933,19 @@ netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
     return error;
 }
 
+/* Attempts to change the stats for 'netdev' to those provided in 'stats'.
+ * Returns 0 if successful, otherwise a positive errno value.
+ *
+ * This will probably fail for most network devices.  Some devices might only
+ * allow setting their stats to 0. */
+int
+netdev_set_stats(struct netdev *netdev, const struct netdev_stats *stats)
+{
+    return (netdev_get_dev(netdev)->netdev_class->set_stats
+             ? netdev_get_dev(netdev)->netdev_class->set_stats(netdev, stats)
+             : EOPNOTSUPP);
+}
+
 /* Attempts to set input rate limiting (policing) policy, such that up to
  * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst
  * size of 'kbits' kb. */
diff --git a/lib/netdev.h b/lib/netdev.h
index 27eb82e..d027465 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -146,6 +146,7 @@ int netdev_turn_flags_on(struct netdev *, enum netdev_flags, bool permanent);
 int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent);
 
 int netdev_get_stats(const struct netdev *, struct netdev_stats *);
+int netdev_set_stats(struct netdev *, const struct netdev_stats *);
 int netdev_set_policing(struct netdev *, uint32_t kbits_rate, 
                         uint32_t kbits_burst);
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 620689f..1d945de 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -137,6 +137,7 @@ struct port {
     int updelay, downdelay;     /* Delay before iface goes up/down, in ms. */
     bool bond_compat_is_stale;  /* Need to call port_update_bond_compat()? */
     bool bond_fake_iface;       /* Fake a bond interface for legacy compat? */
+    long bond_next_fake_iface_update; /* Next update to fake bond stats. */
     int bond_rebalance_interval; /* Interval between rebalances, in ms. */
     long long int bond_next_rebalance; /* Next rebalancing time. */
 
@@ -1848,6 +1849,34 @@ bond_enable_slave(struct iface *iface, bool enable)
     port->bond_compat_is_stale = true;
 }
 
+/* Attempts to make the sum of the bond slaves' statistics appear on the fake
+ * bond interface. */
+static void
+bond_update_fake_iface_stats(struct port *port)
+{
+    struct netdev_stats bond_stats;
+    struct netdev *bond_dev;
+    size_t i;
+
+    memset(&bond_stats, 0, sizeof bond_stats);
+
+    for (i = 0; i < port->n_ifaces; i++) {
+        struct netdev_stats slave_stats;
+
+        if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) {
+            bond_stats.rx_packets += slave_stats.rx_packets;
+            bond_stats.rx_bytes += slave_stats.rx_bytes;
+            bond_stats.tx_packets += slave_stats.tx_packets;
+            bond_stats.tx_bytes += slave_stats.tx_bytes;
+        }
+    }
+
+    if (!netdev_open_default(port->name, &bond_dev)) {
+        netdev_set_stats(bond_dev, &bond_stats);
+        netdev_close(bond_dev);
+    }
+}
+
 static void
 bond_run(struct bridge *br)
 {
@@ -1863,6 +1892,12 @@ bond_run(struct bridge *br)
                     bond_enable_slave(iface, !iface->enabled);
                 }
             }
+
+            if (port->bond_fake_iface
+                && time_msec() >= port->bond_next_fake_iface_update) {
+                bond_update_fake_iface_stats(port);
+                port->bond_next_fake_iface_update = time_msec() + 1000;
+            }
         }
 
         if (port->bond_compat_is_stale) {
@@ -1888,6 +1923,9 @@ bond_wait(struct bridge *br)
                 poll_timer_wait(iface->delay_expires - time_msec());
             }
         }
+        if (port->bond_fake_iface) {
+            poll_timer_wait(port->bond_next_fake_iface_update - time_msec());
+        }
     }
 }
 
@@ -3328,6 +3366,10 @@ port_update_bonding(struct port *port)
             bond_choose_active_iface(port);
             port->bond_next_rebalance
                 = time_msec() + port->bond_rebalance_interval;
+
+            if (port->cfg->bond_fake_iface) {
+                port->bond_next_fake_iface_update = time_msec();
+            }
         }
         port->bond_compat_is_stale = true;
         port->bond_fake_iface = port->cfg->bond_fake_iface;
-- 
1.6.6.1





More information about the dev mailing list