[ovs-dev] [PATCH 2/2] lib: Show tunnel egress interface in ovsdb

Ethan Jackson ethan at nicira.com
Wed Dec 29 20:35:27 UTC 2010


This commit parses rtnetlink address notifications from the
kernel in order to display the egress interface of tunnels in the
database.

Bug #4103.
---
 lib/automake.mk            |    4 +-
 lib/netdev-dummy.c         |    1 +
 lib/netdev-linux.c         |    1 +
 lib/netdev-provider.h      |   11 +++
 lib/netdev-vport.c         |  190 +++++++++++++++++++++++++++++++++++++++++++-
 lib/netdev.c               |   10 +++
 lib/netdev.h               |    1 +
 lib/rtnetlink-addr.c       |  114 ++++++++++++++++++++++++++
 lib/rtnetlink-addr.h       |   57 +++++++++++++
 vswitchd/bridge.c          |    9 ++
 vswitchd/vswitch.ovsschema |    8 ++-
 vswitchd/vswitch.xml       |    8 ++
 12 files changed, 408 insertions(+), 6 deletions(-)
 create mode 100644 lib/rtnetlink-addr.c
 create mode 100644 lib/rtnetlink-addr.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 4c6a877..2cdb6f2 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -191,7 +191,9 @@ lib_libopenvswitch_a_SOURCES += \
 	lib/rtnetlink.c \
 	lib/rtnetlink.h \
 	lib/rtnetlink-link.c \
-	lib/rtnetlink-link.h
+	lib/rtnetlink-link.h \
+	lib/rtnetlink-addr.c \
+	lib/rtnetlink-addr.h
 endif
 
 if HAVE_OPENSSL
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index ddcbe36..218a022 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -320,6 +320,7 @@ static const struct netdev_class dummy_class = {
     NULL,                       /* get_in6 */
     NULL,                       /* add_router */
     NULL,                       /* get_next_hop */
+    NULL,                       /* get_tnl_iface */
     NULL,                       /* arp_lookup */
 
     netdev_dummy_update_flags,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7f9a8e3..5654dd4 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2176,6 +2176,7 @@ netdev_linux_poll_remove(struct netdev_notifier *notifier_)
     netdev_linux_get_in6,                                       \
     netdev_linux_add_router,                                    \
     netdev_linux_get_next_hop,                                  \
+    NULL,                       /* get_tnl_iface */             \
     netdev_linux_arp_lookup,                                    \
                                                                 \
     netdev_linux_update_flags,                                  \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index d955bb1..038f277 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -518,6 +518,17 @@ struct netdev_class {
     int (*get_next_hop)(const struct in_addr *host, struct in_addr *next_hop,
                         char **netdev_name);
 
+    /* Looks up the name of the interface out of which traffic will egress if
+     * 'netdev' is a tunnel.  If unsuccessful, or 'netdev' is not a tunnel,
+     * will return null.  This function does not necessarily return the
+     * physical interface out which traffic will egress.  Instead it returns
+     * the interface which is assigned 'netdev's remote_ip.  This may be an
+     * internal interface such as a bridge port.
+     *
+     * This function may be set to null if 'netdev' is not a tunnel or it is
+     * not supported. */
+    const char *(*get_tnl_iface)(const struct netdev *netdev);
+
     /* Looks up the ARP table entry for 'ip' on 'netdev' and stores the
      * corresponding MAC address in 'mac'.  A return value of ENXIO, in
      * particular, indicates that there is no ARP table entry for 'ip' on
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 11db099..2c43d7b 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -20,21 +20,41 @@
 
 #include <errno.h>
 #include <fcntl.h>
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
 #include <net/if.h>
 #include <sys/ioctl.h>
 
 #include "byte-order.h"
+#include "hash.h"
+#include "hmap.h"
 #include "list.h"
 #include "netdev-provider.h"
+#include "netlink.h"
+#include "netlink-socket.h"
+#include "ofpbuf.h"
 #include "openvswitch/datapath-protocol.h"
 #include "openvswitch/tunnel.h"
 #include "packets.h"
+#include "rtnetlink.h"
+#include "rtnetlink-addr.h"
 #include "shash.h"
 #include "socket-util.h"
 #include "vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_vport);
 
+static struct hmap addr_map;
+static struct rtnetlink_notifier netdev_vport_addr_notifier;
+
+struct addr_node {
+    struct hmap_node node;
+    uint32_t ifa_addr;        /* Address in host byte order. */
+    uint32_t ifa_prefixlen;   /* Subnet prefix length. */
+    uint32_t ifa_index;       /* Interface kernel index. */
+    char ifa_label[IFNAMSIZ]; /* Name of the interface. */
+};
+
 struct netdev_vport_notifier {
     struct netdev_notifier notifier;
     struct list list_node;
@@ -66,6 +86,11 @@ static int netdev_vport_create(const struct netdev_class *, const char *,
                                const struct shash *, struct netdev_dev **);
 static void netdev_vport_poll_notify(const struct netdev *);
 
+static void netdev_vport_addr_init(void);
+static void netdev_vport_addr_destroy(void);
+static void netdev_vport_addr_change(const struct rtnetlink_addr_change *,
+                                     void *);
+
 static bool
 is_vport_class(const struct netdev_class *class)
 {
@@ -108,6 +133,13 @@ netdev_vport_get_config(const struct netdev *netdev, void *config)
 }
 
 static int
+netdev_vport_init(void)
+{
+    netdev_vport_addr_init();
+    return 0;
+}
+
+static int
 netdev_vport_create(const struct netdev_class *netdev_class, const char *name,
                     const struct shash *args,
                     struct netdev_dev **netdev_devp)
@@ -387,6 +419,157 @@ netdev_vport_poll_remove(struct netdev_notifier *notifier_)
 
     free(notifier);
 }
+
+static void
+netdev_vport_run(void)
+{
+    rtnetlink_addr_notifier_run();
+}
+
+static void
+netdev_vport_wait(void)
+{
+    rtnetlink_addr_notifier_wait();
+}
+
+/* Rtnetlink-addr code used for get_tnl_iface. */
+
+static int
+netdev_vport_addr_reset(void)
+{
+    int error;
+    struct nl_dump dump;
+    struct rtgenmsg *rtmsg;
+    struct ofpbuf request, reply;
+    struct addr_node *rn, *rn_next;
+    static struct nl_sock *rtnl_sock;
+
+    HMAP_FOR_EACH_SAFE (rn, rn_next, node, &addr_map) {
+        hmap_remove(&addr_map, &rn->node);
+        free(rn);
+    }
+
+    error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
+    if (error) {
+        VLOG_WARN_RL(&rl, "Failed to create NETLINK_ROUTE socket");
+        return error;
+    }
+
+    ofpbuf_init(&request, 0);
+
+    nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETADDR, NLM_F_REQUEST);
+
+    rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
+    rtmsg->rtgen_family = AF_INET;
+
+    nl_dump_start(&dump, rtnl_sock, &request);
+
+    while (nl_dump_next(&dump, &reply)) {
+        struct rtnetlink_addr_change change;
+
+        rtnetlink_addr_parse(&reply, &change);
+        netdev_vport_addr_change(&change, NULL);
+    }
+
+    error = nl_dump_done(&dump);
+    nl_sock_destroy(rtnl_sock);
+
+    return error;
+}
+
+static void
+netdev_vport_addr_change(const struct rtnetlink_addr_change *change,
+                         void *aux OVS_UNUSED)
+{
+    if (!change) {
+        netdev_vport_addr_reset();
+    } else if (change->nlmsg_type == RTM_NEWADDR) {
+        uint32_t hash;
+        struct addr_node *rn;
+
+        rn                = xzalloc(sizeof *rn);
+        rn->ifa_addr      = change->ifa_addr;
+        rn->ifa_index     = change->ifa_index;
+        rn->ifa_prefixlen = change->ifa_prefixlen;
+
+        strncpy(rn->ifa_label, change->ifa_label, IFNAMSIZ);
+        rn->ifa_label[IFNAMSIZ - 1] = '\0';
+
+        hash = hash_3words(rn->ifa_addr, rn->ifa_prefixlen, rn->ifa_index);
+        hmap_insert(&addr_map, &rn->node, hash);
+    } else if (change->nlmsg_type == RTM_DELADDR) {
+        uint32_t hash;
+        struct addr_node *rn, *rn_iter;
+
+        rn = NULL;
+        hash = hash_3words(change->ifa_addr, change->ifa_prefixlen,
+                           change->ifa_index);
+        HMAP_FOR_EACH_WITH_HASH (rn_iter, node, hash, &addr_map) {
+            if (rn_iter->ifa_addr == change->ifa_addr &&
+                rn_iter->ifa_prefixlen == change->ifa_prefixlen &&
+                rn_iter->ifa_index == change->ifa_index) {
+                rn = rn_iter;
+                break;
+            }
+        }
+
+        if (rn) {
+            hmap_remove(&addr_map, &rn->node);
+            free(rn);
+        }
+    }
+}
+
+static void
+netdev_vport_addr_init(void)
+{
+    static bool addr_is_init = false;
+
+    if (!addr_is_init) {
+        hmap_init(&addr_map);
+        rtnetlink_addr_notifier_register(&netdev_vport_addr_notifier,
+                                         netdev_vport_addr_change, NULL);
+        netdev_vport_addr_reset();
+        addr_is_init = true;
+    }
+}
+
+static const char *
+netdev_vport_get_tnl_iface(const struct netdev *netdev)
+{
+    int prefix;
+    uint32_t addr;
+    struct addr_node *rn;
+    const char *type, *name;
+    struct netdev_dev_vport *ndv;
+    struct tnl_port_config *config;
+
+    ndv  = netdev_dev_vport_cast(netdev_get_dev(netdev));
+    type = netdev_dev_get_type(&ndv->netdev_dev);
+
+    if (strcmp(type, "gre") && strcmp(type, "capwap")
+        && strcmp(type, "ipsec_gre")) {
+        return NULL;
+    }
+
+    config = (struct tnl_port_config *) ndv->config;
+    addr   = ntohl(config->daddr);
+
+    prefix = 0;
+    name   = NULL;
+
+    HMAP_FOR_EACH(rn, node, &addr_map) {
+        if (rn->ifa_prefixlen > prefix) {
+            uint32_t mask = 0xffffffff << (32 - rn->ifa_prefixlen);
+            if ((addr & mask) == (rn->ifa_addr & mask)) {
+                name   = rn->ifa_label;
+                prefix = rn->ifa_prefixlen;
+            }
+        }
+    }
+
+    return name;
+}
 
 /* Helper functions. */
 
@@ -605,9 +788,9 @@ parse_patch_config(const struct netdev_dev *dev, const struct shash *args,
 }
 
 #define VPORT_FUNCTIONS                                     \
-    NULL,                       /* init */                  \
-    NULL,                       /* run */                   \
-    NULL,                       /* wait */                  \
+    netdev_vport_init,                                      \
+    netdev_vport_run,                                       \
+    netdev_vport_wait,                                      \
                                                             \
     netdev_vport_create,                                    \
     netdev_vport_destroy,                                   \
@@ -654,6 +837,7 @@ parse_patch_config(const struct netdev_dev *dev, const struct shash *args,
     NULL,                       /* get_in6 */               \
     NULL,                       /* add_router */            \
     NULL,                       /* get_next_hop */          \
+    netdev_vport_get_tnl_iface,                             \
     NULL,                       /* arp_lookup */            \
                                                             \
     netdev_vport_update_flags,                              \
diff --git a/lib/netdev.c b/lib/netdev.c
index c7de906..4b2e59e 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -769,6 +769,16 @@ netdev_get_next_hop(const struct netdev *netdev,
     return error;
 }
 
+const char *
+netdev_get_tnl_iface(const struct netdev *netdev)
+{
+    struct netdev_dev *dev = netdev_get_dev(netdev);
+
+    return (dev->netdev_class->get_tnl_iface
+            ? dev->netdev_class->get_tnl_iface(netdev)
+            : NULL);
+}
+
 /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and
  * returns 0.  Otherwise, returns a positive errno value and sets '*in6' to
  * all-zero-bits (in6addr_any).
diff --git a/lib/netdev.h b/lib/netdev.h
index 6635a55..d7d7097 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -141,6 +141,7 @@ int netdev_get_in6(const struct netdev *, struct in6_addr *);
 int netdev_add_router(struct netdev *, struct in_addr router);
 int netdev_get_next_hop(const struct netdev *, const struct in_addr *host,
                         struct in_addr *next_hop, char **);
+const char *netdev_get_tnl_iface(const struct netdev *);
 int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]);
 
 int netdev_get_flags(const struct netdev *, enum netdev_flags *);
diff --git a/lib/rtnetlink-addr.c b/lib/rtnetlink-addr.c
new file mode 100644
index 0000000..afc212a
--- /dev/null
+++ b/lib/rtnetlink-addr.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "rtnetlink-addr.h"
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+
+#include "netlink.h"
+#include "ofpbuf.h"
+#include "rtnetlink.h"
+
+static struct rtnetlink *rtn = NULL;
+static struct rtnetlink_addr_change rtn_change;
+
+/* Parses an rtnetlink message 'buf' into 'change'.  If 'buf' is unparseable,
+ * leaves 'change' untouched and returns false.  Otherwise, fills out 'change'
+ * and returns true. */
+bool
+rtnetlink_addr_parse(struct ofpbuf *buf, struct rtnetlink_addr_change *change)
+{
+    bool parsed;
+
+    static const struct nl_policy policy[] = {
+        [IFA_ADDRESS] = { .type = NL_A_U32,    .optional = false },
+        [IFA_LABEL]   = { .type = NL_A_STRING, .optional = false }
+    };
+
+    static struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+    parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifaddrmsg),
+                             policy, attrs, ARRAY_SIZE(policy));
+
+    if (parsed) {
+        const struct nlmsghdr *nlmsg;
+        const struct ifaddrmsg *ifaddr;
+
+        nlmsg  = buf->data;
+        ifaddr = ((const struct ifaddrmsg *)
+                  ((const char *) buf->data + NLMSG_HDRLEN));
+
+        change->nlmsg_type    = nlmsg->nlmsg_type;
+        change->ifa_index     = ifaddr->ifa_index;
+        change->ifa_addr      = ntohl(nl_attr_get_be32(attrs[IFA_ADDRESS]));
+        change->ifa_label     = nl_attr_get_string(attrs[IFA_LABEL]);
+        change->ifa_prefixlen = ifaddr->ifa_prefixlen;
+    }
+
+    return parsed;
+}
+
+/* Registers 'cb' to be called with auxiliary data 'aux' with address change
+ * notifications.  The notifier is stored in 'notifier', which the callers must
+ * not modify or free.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. */
+int
+rtnetlink_addr_notifier_register(struct rtnetlink_notifier *notifier,
+                                 rtnetlink_addr_notify_func *cb, void *aux)
+{
+    rtnetlink_parse_func *pf = (rtnetlink_parse_func *) rtnetlink_addr_parse;
+    rtnetlink_notify_func *nf = (rtnetlink_notify_func *) cb;
+
+    if (!rtn) {
+        rtn = rtnetlink_create(RTNLGRP_IPV4_IFADDR, pf, &rtn_change);
+    }
+
+    return rtnetlink_notifier_register(rtn, notifier, nf, aux);
+}
+
+/* Cancels notification on 'notifier', which must have previously been
+ * registered with rtnetlink_addr_notifier_register(). */
+void
+rtnetlink_addr_notifier_unregister(struct rtnetlink_notifier *notifier)
+{
+    rtnetlink_notifier_unregister(rtn, notifier);
+}
+
+/* Calls all of the registered notifiers, passing along any as-yet-unreported
+ * address change events. */
+void
+rtnetlink_addr_notifier_run(void)
+{
+    if (rtn) {
+        rtnetlink_notifier_run(rtn);
+    }
+}
+
+/* Causes poll_block() to wake up when address change notifications are ready.
+ */
+void
+rtnetlink_addr_notifier_wait(void)
+{
+    if (rtn) {
+        rtnetlink_notifier_wait(rtn);
+    }
+}
diff --git a/lib/rtnetlink-addr.h b/lib/rtnetlink-addr.h
new file mode 100644
index 0000000..f4e831f
--- /dev/null
+++ b/lib/rtnetlink-addr.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RTNETLINK_ADDR_H
+#define RTNETLINK_ADDR_H 1
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct ofpbuf;
+struct rtnetlink_notifier;
+
+/* A digested version of an address message sent down by the kernel to indicate
+ * that an address has been added or removed from an interface. */
+struct rtnetlink_addr_change {
+    /* Copied from struct nlmsghdr. */
+    int nlmsg_type;               /* e.g. RTM_NEWADDR, RTM_DELADDR. */
+
+    /* Copied from struct ifaddrmsg. */
+    int ifa_index;                /* Index of network device. */
+    unsigned char ifa_prefixlen;  /* Number of bits in the subnet mask. */
+
+    /* Extracted from Netlink attributes. */
+    uint32_t ifa_addr;            /* Address in host byte order. */
+    const char *ifa_label;        /* Name of the interface. */
+
+};
+
+/* Function called to report that an address has changed.  'change' describes
+ * the specific change.  It may be null, in which case the function must assume
+ * everything has changed.  'aux' is as specified in the call to
+ * rtnetlink_addr_notifier_register(). */
+typedef
+void rtnetlink_addr_notify_func(const struct rtnetlink_addr_change *change,
+                                void *aux);
+
+bool rtnetlink_addr_parse(struct ofpbuf *, struct rtnetlink_addr_change *);
+int rtnetlink_addr_notifier_register(struct rtnetlink_notifier *,
+                                     rtnetlink_addr_notify_func *, void *aux);
+void rtnetlink_addr_notifier_unregister(struct rtnetlink_notifier *);
+void rtnetlink_addr_notifier_run(void);
+void rtnetlink_addr_notifier_wait(void);
+
+#endif /* rtnetlink-addr.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 96a24fd..d505b53 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -1102,6 +1102,14 @@ dpid_from_hash(const void *data, size_t n)
 }
 
 static void
+iface_refresh_tunnel_egress(struct iface *iface)
+{
+    const char *name = netdev_get_tnl_iface(iface->netdev);
+
+    ovsrec_interface_set_tunnel_egress_iface(iface->cfg, name);
+}
+
+static void
 iface_refresh_cfm_stats(struct iface *iface)
 {
     size_t i;
@@ -1310,6 +1318,7 @@ bridge_run(void)
                         struct iface *iface = port->ifaces[j];
                         iface_refresh_stats(iface);
                         iface_refresh_cfm_stats(iface);
+                        iface_refresh_tunnel_egress(iface);
                     }
                 }
             }
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index a1917ee..d21a85c 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "1.0.0",
- "cksum": "514853437 13985",
+ "version": "1.0.1",
+ "cksum": "665434435 14130",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -147,6 +147,10 @@
        "ingress_policing_burst": {
          "type": {"key": {"type": "integer",
                           "minInteger": 0}}},
+       "tunnel_egress_iface": {
+         "type": {"key": {"type": "string"},
+                  "min": 0, "max": 1},
+         "ephemeral": true},
        "mac": {
          "type": {"key": {"type": "string"},
                   "min": 0, "max": 1}},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4aa4649..fd0aa29 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1102,6 +1102,14 @@
         </dl>
       </column>
 
+      <column name="tunnel_egress_iface">
+        Egress interface for tunnels.  Currently only relevant for GRE and
+        CAPWAP tunnels.  On Linux systems, this column will show the name of
+        the interface which is responsible for routing traffic destined for the
+        configured 'remote_ip'.  This could be an internal interface such as a
+        bridge port.
+      </column>
+
       <column name="other_config">
         Key-value pairs for rarely used interface features.  Currently,
         there are none defined.
-- 
1.7.2.3





More information about the dev mailing list