[ovs-dev] [PATCH 2/2] lib: Show tunnel egress interface in ovsdb
Ethan Jackson
ethan at nicira.com
Wed Dec 29 01:49:48 UTC 2010
This commit parses rtnetlink address notifications from the
kernel in order to display the egress interface of tunnels in the
database.
Bug #4103.
---
lib/automake.mk | 4 +-
lib/netdev-dummy.c | 1 +
lib/netdev-linux.c | 1 +
lib/netdev-provider.h | 11 +++
lib/netdev-vport.c | 190 +++++++++++++++++++++++++++++++++++++++++++-
lib/netdev.c | 10 +++
lib/netdev.h | 1 +
lib/rtnetlink-addr.c | 114 ++++++++++++++++++++++++++
lib/rtnetlink-addr.h | 57 +++++++++++++
vswitchd/bridge.c | 9 ++
vswitchd/vswitch.ovsschema | 4 +
vswitchd/vswitch.xml | 8 ++
12 files changed, 407 insertions(+), 3 deletions(-)
create mode 100644 lib/rtnetlink-addr.c
create mode 100644 lib/rtnetlink-addr.h
diff --git a/lib/automake.mk b/lib/automake.mk
index 4c6a877..2cdb6f2 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -191,7 +191,9 @@ lib_libopenvswitch_a_SOURCES += \
lib/rtnetlink.c \
lib/rtnetlink.h \
lib/rtnetlink-link.c \
- lib/rtnetlink-link.h
+ lib/rtnetlink-link.h \
+ lib/rtnetlink-addr.c \
+ lib/rtnetlink-addr.h
endif
if HAVE_OPENSSL
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index ddcbe36..218a022 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -320,6 +320,7 @@ static const struct netdev_class dummy_class = {
NULL, /* get_in6 */
NULL, /* add_router */
NULL, /* get_next_hop */
+ NULL, /* get_tnl_iface */
NULL, /* arp_lookup */
netdev_dummy_update_flags,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7f9a8e3..5654dd4 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2176,6 +2176,7 @@ netdev_linux_poll_remove(struct netdev_notifier *notifier_)
netdev_linux_get_in6, \
netdev_linux_add_router, \
netdev_linux_get_next_hop, \
+ NULL, /* get_tnl_iface */ \
netdev_linux_arp_lookup, \
\
netdev_linux_update_flags, \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index d955bb1..038f277 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -518,6 +518,17 @@ struct netdev_class {
int (*get_next_hop)(const struct in_addr *host, struct in_addr *next_hop,
char **netdev_name);
+ /* Looks up the name of the interface out of which traffic will egress if
+ * 'netdev' is a tunnel. If unsuccessful, or 'netdev' is not a tunnel,
+ * will return null. This function does not necessarily return the
+ * physical interface out which traffic will egress. Instead it returns
+ * the interface which is assigned 'netdev's remote_ip. This may be an
+ * internal interface such as a bridge port.
+ *
+ * This function may be set to null if 'netdev' is not a tunnel or it is
+ * not supported. */
+ const char *(*get_tnl_iface)(const struct netdev *netdev);
+
/* Looks up the ARP table entry for 'ip' on 'netdev' and stores the
* corresponding MAC address in 'mac'. A return value of ENXIO, in
* particular, indicates that there is no ARP table entry for 'ip' on
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 11db099..81f27d3 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -20,21 +20,41 @@
#include <errno.h>
#include <fcntl.h>
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include "byte-order.h"
+#include "hash.h"
+#include "hmap.h"
#include "list.h"
#include "netdev-provider.h"
+#include "netlink.h"
+#include "netlink-socket.h"
+#include "ofpbuf.h"
#include "openvswitch/datapath-protocol.h"
#include "openvswitch/tunnel.h"
#include "packets.h"
+#include "rtnetlink.h"
+#include "rtnetlink-addr.h"
#include "shash.h"
#include "socket-util.h"
#include "vlog.h"
VLOG_DEFINE_THIS_MODULE(netdev_vport);
+static struct hmap addr_map;
+static struct rtnetlink_notifier netdev_vport_addr_notifier;
+
+struct addr_node {
+ struct hmap_node node;
+ uint32_t ifa_addr;
+ uint32_t ifa_prefixlen;
+ uint32_t ifa_index;
+ char ifa_label[IFNAMSIZ];
+};
+
struct netdev_vport_notifier {
struct netdev_notifier notifier;
struct list list_node;
@@ -66,6 +86,11 @@ static int netdev_vport_create(const struct netdev_class *, const char *,
const struct shash *, struct netdev_dev **);
static void netdev_vport_poll_notify(const struct netdev *);
+static void netdev_vport_addr_init(void);
+static void netdev_vport_addr_destroy(void);
+static void netdev_vport_addr_change(const struct rtnetlink_addr_change *,
+ void *);
+
static bool
is_vport_class(const struct netdev_class *class)
{
@@ -116,6 +141,8 @@ netdev_vport_create(const struct netdev_class *netdev_class, const char *name,
struct netdev_dev_vport *dev;
int error;
+ netdev_vport_addr_init();
+
dev = xmalloc(sizeof *dev);
*netdev_devp = &dev->netdev_dev;
netdev_dev_init(&dev->netdev_dev, name, netdev_class);
@@ -134,6 +161,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_)
{
struct netdev_dev_vport *netdev_dev = netdev_dev_vport_cast(netdev_dev_);
+ netdev_vport_addr_destroy();
free(netdev_dev);
}
@@ -387,6 +415,163 @@ netdev_vport_poll_remove(struct netdev_notifier *notifier_)
free(notifier);
}
+
+static void
+netdev_vport_run(void)
+{
+ rtnetlink_addr_notifier_run();
+}
+
+static void
+netdev_vport_wait(void)
+{
+ rtnetlink_addr_notifier_wait();
+}
+
+/* Rtnetlink-addr code used for get_tnl_iface. */
+
+static int
+netdev_vport_addr_reset(void)
+{
+ int error;
+ struct nl_dump dump;
+ struct rtgenmsg *rtmsg;
+ struct ofpbuf request, reply;
+ struct addr_node *rn, *rn_next;
+ static struct nl_sock *rtnl_sock;
+
+ HMAP_FOR_EACH_SAFE (rn, rn_next, node, &addr_map) {
+ hmap_remove(&addr_map, &rn->node);
+ free(rn);
+ }
+
+ error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
+ if (error) {
+ VLOG_WARN_RL(&rl, "Failed to create NETLINK_ROUTE socket");
+ return error;
+ }
+
+ ofpbuf_init(&request, 0);
+
+ nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETADDR, NLM_F_REQUEST);
+
+ rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
+ rtmsg->rtgen_family = AF_INET;
+
+ nl_dump_start(&dump, rtnl_sock, &request);
+
+ while (nl_dump_next(&dump, &reply)) {
+ struct rtnetlink_addr_change change;
+
+ rtnetlink_addr_parse(&reply, &change);
+ netdev_vport_addr_change(&change, NULL);
+ }
+
+ nl_sock_destroy(rtnl_sock);
+
+ return nl_dump_done(&dump);
+}
+
+static void
+netdev_vport_addr_change(const struct rtnetlink_addr_change *change,
+ void *aux OVS_UNUSED)
+{
+ if (!change) {
+ netdev_vport_addr_reset();
+ } else if (change->nlmsg_type == RTM_NEWADDR) {
+ uint32_t hash;
+ struct addr_node *rn;
+
+ rn = xzalloc(sizeof *rn);
+ rn->ifa_addr = change->ifa_addr;
+ rn->ifa_index = change->ifa_index;
+ rn->ifa_prefixlen = change->ifa_prefixlen;
+
+ strncpy(rn->ifa_label, change->ifa_label, IFNAMSIZ);
+ rn->ifa_label[IFNAMSIZ - 1] = '\0';
+
+ hash = hash_3words(rn->ifa_addr, rn->ifa_prefixlen, rn->ifa_index);
+ hmap_insert(&addr_map, &rn->node, hash);
+ } else if (change->nlmsg_type == RTM_DELADDR) {
+ uint32_t hash;
+ struct addr_node *rn, *rn_iter;
+
+ rn = NULL;
+ hash = hash_3words(change->ifa_addr, change->ifa_prefixlen,
+ change->ifa_index);
+ HMAP_FOR_EACH_WITH_HASH (rn_iter, node, hash, &addr_map) {
+ if (rn_iter->ifa_addr == change->ifa_addr &&
+ rn_iter->ifa_prefixlen == change->ifa_prefixlen &&
+ rn_iter->ifa_index == change->ifa_index) {
+ rn = rn_iter;
+ break;
+ }
+ }
+
+ if (rn) {
+ hmap_remove(&addr_map, &rn->node);
+ free(rn);
+ }
+ }
+}
+
+static void
+netdev_vport_addr_init(void)
+{
+ hmap_init(&addr_map);
+ rtnetlink_addr_notifier_register(&netdev_vport_addr_notifier,
+ netdev_vport_addr_change, NULL);
+ netdev_vport_addr_reset();
+}
+
+static void
+netdev_vport_addr_destroy(void)
+{
+ struct addr_node *rn, *rn_next;
+
+ rtnetlink_addr_notifier_unregister(&netdev_vport_addr_notifier);
+
+ HMAP_FOR_EACH_SAFE(rn, rn_next, node, &addr_map) {
+ hmap_remove(&addr_map, &rn->node);
+ free(rn);
+ }
+}
+
+static const char *
+netdev_vport_get_tnl_iface(const struct netdev *netdev)
+{
+ int prefix;
+ uint32_t addr;
+ struct addr_node *rn;
+ const char *type, *name;
+ struct netdev_dev_vport *ndv;
+ struct tnl_port_config *config;
+
+ ndv = netdev_dev_vport_cast(netdev_get_dev(netdev));
+ type = netdev_dev_get_type(&ndv->netdev_dev);
+
+ if (strcmp(type, "gre") && strcmp(type, "capwap")) {
+ return NULL;
+ }
+
+ config = (struct tnl_port_config *) ndv->config;
+ addr = ntohl(config->daddr);
+
+ prefix = 0;
+ name = NULL;
+
+ HMAP_FOR_EACH(rn, node, &addr_map) {
+ if (rn->ifa_prefixlen > prefix) {
+ uint32_t mask = 0xffffffff << (32 - rn->ifa_prefixlen);
+ if ((addr & mask) == (rn->ifa_addr & mask)) {
+ name = rn->ifa_label;
+ prefix = rn->ifa_prefixlen;
+ }
+ }
+ }
+
+ return name;
+}
/* Helper functions. */
@@ -606,8 +791,8 @@ parse_patch_config(const struct netdev_dev *dev, const struct shash *args,
#define VPORT_FUNCTIONS \
NULL, /* init */ \
- NULL, /* run */ \
- NULL, /* wait */ \
+ netdev_vport_run, \
+ netdev_vport_wait, \
\
netdev_vport_create, \
netdev_vport_destroy, \
@@ -654,6 +839,7 @@ parse_patch_config(const struct netdev_dev *dev, const struct shash *args,
NULL, /* get_in6 */ \
NULL, /* add_router */ \
NULL, /* get_next_hop */ \
+ netdev_vport_get_tnl_iface, \
NULL, /* arp_lookup */ \
\
netdev_vport_update_flags, \
diff --git a/lib/netdev.c b/lib/netdev.c
index c7de906..4b2e59e 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -769,6 +769,16 @@ netdev_get_next_hop(const struct netdev *netdev,
return error;
}
+const char *
+netdev_get_tnl_iface(const struct netdev *netdev)
+{
+ struct netdev_dev *dev = netdev_get_dev(netdev);
+
+ return (dev->netdev_class->get_tnl_iface
+ ? dev->netdev_class->get_tnl_iface(netdev)
+ : NULL);
+}
+
/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and
* returns 0. Otherwise, returns a positive errno value and sets '*in6' to
* all-zero-bits (in6addr_any).
diff --git a/lib/netdev.h b/lib/netdev.h
index 6635a55..d7d7097 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -141,6 +141,7 @@ int netdev_get_in6(const struct netdev *, struct in6_addr *);
int netdev_add_router(struct netdev *, struct in_addr router);
int netdev_get_next_hop(const struct netdev *, const struct in_addr *host,
struct in_addr *next_hop, char **);
+const char *netdev_get_tnl_iface(const struct netdev *);
int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]);
int netdev_get_flags(const struct netdev *, enum netdev_flags *);
diff --git a/lib/rtnetlink-addr.c b/lib/rtnetlink-addr.c
new file mode 100644
index 0000000..c3ee0d0
--- /dev/null
+++ b/lib/rtnetlink-addr.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "rtnetlink-addr.h"
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+
+#include "netlink.h"
+#include "ofpbuf.h"
+#include "rtnetlink.h"
+
+static struct rtnetlink *rtn = NULL;
+static struct rtnetlink_addr_change rtn_change;
+
+/* Parses an rtnetlink message 'buf' into 'change'. If 'buf' is unparseable,
+ * leaves 'change' untouched and returns false. Otherwise, fills out 'change'
+ * and returns true. */
+bool
+rtnetlink_addr_parse(struct ofpbuf *buf, struct rtnetlink_addr_change *change)
+{
+ bool parsed;
+
+ static const struct nl_policy policy[] = {
+ [IFA_ADDRESS] = { .type = NL_A_U32, .optional = false },
+ [IFA_LABEL] = { .type = NL_A_STRING, .optional = false }
+ };
+
+ static struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+ parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifaddrmsg),
+ policy, attrs, ARRAY_SIZE(policy));
+
+ if (parsed) {
+ const struct nlmsghdr *nlmsg;
+ const struct ifaddrmsg *ifaddr;
+
+ nlmsg = buf->data;
+ ifaddr = ((const struct ifaddrmsg *)
+ ((const char *) buf->data + NLMSG_HDRLEN));
+
+ change->nlmsg_type = nlmsg->nlmsg_type;
+ change->ifa_index = ifaddr->ifa_index;
+ change->ifa_addr = ntohl(nl_attr_get_u32(attrs[IFA_ADDRESS]));
+ change->ifa_label = nl_attr_get_string(attrs[IFA_LABEL]);
+ change->ifa_prefixlen = ifaddr->ifa_prefixlen;
+ }
+
+ return parsed;
+}
+
+/* Registers 'cb' to be called with auxiliary data 'aux' with address change
+ * notifications. The notifier is stored in 'notifier', which the callers must
+ * not modify or free.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. */
+int
+rtnetlink_addr_notifier_register(struct rtnetlink_notifier *notifier,
+ rtnetlink_addr_notify_func *cb, void *aux)
+{
+ rtnetlink_parse_func *pf = (rtnetlink_parse_func *) rtnetlink_addr_parse;
+ rtnetlink_notify_func *nf = (rtnetlink_notify_func *) cb;
+
+ if (!rtn) {
+ rtn = rtnetlink_create(RTNLGRP_IPV4_IFADDR, pf, &rtn_change);
+ }
+
+ return rtnetlink_notifier_register(rtn, notifier, nf, aux);
+}
+
+/* Cancels notification on 'notifier', which must have previously been
+ * registered with rtnetlink_addr_notifier_register(). */
+void
+rtnetlink_addr_notifier_unregister(struct rtnetlink_notifier *notifier)
+{
+ rtnetlink_notifier_unregister(rtn, notifier);
+}
+
+/* Calls all of the registered notifiers, passing along any as-yet-unreported
+ * address change events. */
+void
+rtnetlink_addr_notifier_run(void)
+{
+ if (rtn) {
+ rtnetlink_notifier_run(rtn);
+ }
+}
+
+/* Causes poll_block() to wake up when address change notifications are ready.
+ */
+void
+rtnetlink_addr_notifier_wait(void)
+{
+ if (rtn) {
+ rtnetlink_notifier_wait(rtn);
+ }
+}
diff --git a/lib/rtnetlink-addr.h b/lib/rtnetlink-addr.h
new file mode 100644
index 0000000..f4e831f
--- /dev/null
+++ b/lib/rtnetlink-addr.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RTNETLINK_ADDR_H
+#define RTNETLINK_ADDR_H 1
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct ofpbuf;
+struct rtnetlink_notifier;
+
+/* A digested version of an address message sent down by the kernel to indicate
+ * that an address has been added or removed from an interface. */
+struct rtnetlink_addr_change {
+ /* Copied from struct nlmsghdr. */
+ int nlmsg_type; /* e.g. RTM_NEWADDR, RTM_DELADDR. */
+
+ /* Copied from struct ifaddrmsg. */
+ int ifa_index; /* Index of network device. */
+ unsigned char ifa_prefixlen; /* Number of bits in the subnet mask. */
+
+ /* Extracted from Netlink attributes. */
+ uint32_t ifa_addr; /* Address in host byte order. */
+ const char *ifa_label; /* Name of the interface. */
+
+};
+
+/* Function called to report that an address has changed. 'change' describes
+ * the specific change. It may be null, in which case the function must assume
+ * everything has changed. 'aux' is as specified in the call to
+ * rtnetlink_addr_notifier_register(). */
+typedef
+void rtnetlink_addr_notify_func(const struct rtnetlink_addr_change *change,
+ void *aux);
+
+bool rtnetlink_addr_parse(struct ofpbuf *, struct rtnetlink_addr_change *);
+int rtnetlink_addr_notifier_register(struct rtnetlink_notifier *,
+ rtnetlink_addr_notify_func *, void *aux);
+void rtnetlink_addr_notifier_unregister(struct rtnetlink_notifier *);
+void rtnetlink_addr_notifier_run(void);
+void rtnetlink_addr_notifier_wait(void);
+
+#endif /* rtnetlink-addr.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 96a24fd..d505b53 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -1102,6 +1102,14 @@ dpid_from_hash(const void *data, size_t n)
}
static void
+iface_refresh_tunnel_egress(struct iface *iface)
+{
+ const char *name = netdev_get_tnl_iface(iface->netdev);
+
+ ovsrec_interface_set_tunnel_egress_iface(iface->cfg, name);
+}
+
+static void
iface_refresh_cfm_stats(struct iface *iface)
{
size_t i;
@@ -1310,6 +1318,7 @@ bridge_run(void)
struct iface *iface = port->ifaces[j];
iface_refresh_stats(iface);
iface_refresh_cfm_stats(iface);
+ iface_refresh_tunnel_egress(iface);
}
}
}
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index a1917ee..45359dd 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -147,6 +147,10 @@
"ingress_policing_burst": {
"type": {"key": {"type": "integer",
"minInteger": 0}}},
+ "tunnel_egress_iface": {
+ "type": {"key": {"type": "string"},
+ "min": 0, "max": 1},
+ "ephemeral": true},
"mac": {
"type": {"key": {"type": "string"},
"min": 0, "max": 1}},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4aa4649..fd0aa29 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1102,6 +1102,14 @@
</dl>
</column>
+ <column name="tunnel_egress_iface">
+ Egress interface for tunnels. Currently only relevant for GRE and
+ CAPWAP tunnels. On Linux systems, this column will show the name of
+ the interface which is responsible for routing traffic destined for the
+ configured 'remote_ip'. This could be an internal interface such as a
+ bridge port.
+ </column>
+
<column name="other_config">
Key-value pairs for rarely used interface features. Currently,
there are none defined.
--
1.7.2.3
More information about the dev
mailing list