[ovs-dev] [patch_ports (rebased) 3/4] ofproto-dpif: Implement patch ports in userspace.

Ethan Jackson ethan at nicira.com
Wed Jan 23 20:51:12 UTC 2013


This commit moves responsibility for implementing patch ports from
the datapath to ofproto-dpif.  There are two main reasons to do
this.

The first is a matter of design:  ofproto-dpif both has more
information than the datapath, and is better suited to handle the
complexity required to implement patch ports.

The second is performance.  My setup is a virtual machine with two
basic learning bridges connected by patch ports.  I used
ovs-benchmark to ping the virtual router IP residing outside the
VM.  Over a 60 second run, "ovs-benchmark rate" improves from
14618.1 to 19311.9 transactions per second, or a 32% improvement.
Similarly, "ovs-benchmark latency" improves from 6ms to 4ms.

Signed-off-by: Ethan Jackson <ethan at nicira.com>
---
 FAQ                    |   25 +++--
 NEWS                   |    1 +
 lib/netdev-provider.h  |    1 +
 lib/netdev-vport.c     |  280 +++++++++++++++++++++++++-----------------------
 lib/netdev-vport.h     |   10 +-
 lib/netdev.c           |    8 ++
 lib/netdev.h           |    1 +
 ofproto/ofproto-dpif.c |  173 ++++++++++++++++++++++++++----
 tests/ofproto-dpif.at  |   57 ++++++++++
 9 files changed, 390 insertions(+), 166 deletions(-)

diff --git a/FAQ b/FAQ
index ab1c1cc..a466ca4 100644
--- a/FAQ
+++ b/FAQ
@@ -172,17 +172,24 @@ A: The kernel module in upstream Linux 3.3 and later does not include
          vSwitch distribution instead of the upstream Linux kernel
          module.
 
-       - Patch virtual ports, that is, interfaces with type "patch".
-         You can use Linux "veth" devices as a substitute.
-
-         We don't have any plans to add patch ports upstream.
-
 Q: What features are not available when using the userspace datapath?
 
-A: Tunnel and patch virtual ports are not supported, as described in the
-   previous answer.  It is also not possible to use queue-related
-   actions.  On Linux kernels before 2.6.39, maximum-sized VLAN packets
-   may not be transmitted.
+A: The kernel module in upstream Linux 3.3 and later does not include
+   tunnel virtual ports, that is, interfaces with type "gre",
+   "ipsec_gre", "gre64", "ipsec_gre64", "vxlan", or "capwap".  It is
+   possible to create tunnels in Linux and attach them to Open vSwitch
+   as system devices.  However, they cannot be dynamically created
+   through the OVSDB protocol or set the tunnel ids as a flow action.
+
+   Work is in progress in adding tunnel virtual ports to the upstream
+   Linux version of the Open vSwitch kernel module.  For now, if you
+   need these features, use the kernel module from the Open vSwitch
+   distribution instead of the upstream Linux kernel module.
+
+   The upstream kernel module does not include patch ports, but this
+   only matters for Open vSwitch 1.9 and earlier, because Open vSwitch
+   1.10 and later implement patch ports without using this kernel
+   feature.
 
 
 Terminology
diff --git a/NEWS b/NEWS
index 6cf09ba..62488d0 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,7 @@ post-v1.9.0
         retire that meaning of ANY in favor of the OpenFlow 1.1 meaning.
     - Inheritance of the Don't Fragment bit in IP tunnels (df_inherit) is
       no longer supported.
+    - Patch ports are implemented in userspace.
 
 
 v1.9.0 - xx xxx xxxx
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 9db950c..c7de2c2 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -601,6 +601,7 @@ const struct netdev_class *netdev_lookup_provider(const char *type);
 extern const struct netdev_class netdev_linux_class;
 extern const struct netdev_class netdev_internal_class;
 extern const struct netdev_class netdev_tap_class;
+extern const struct netdev_class netdev_patch_class;
 #ifdef __FreeBSD__
 extern const struct netdev_class netdev_bsd_class;
 #endif
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 60437b9..4054ee0 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -29,6 +29,7 @@
 #include "byte-order.h"
 #include "daemon.h"
 #include "dirs.h"
+#include "dpif.h"
 #include "dpif-linux.h"
 #include "hash.h"
 #include "hmap.h"
@@ -56,21 +57,21 @@ VLOG_DEFINE_THIS_MODULE(netdev_vport);
 
 struct netdev_dev_vport {
     struct netdev_dev netdev_dev;
-    struct ofpbuf *options;
     unsigned int change_seq;
     uint8_t etheraddr[ETH_ADDR_LEN];
+
+    /* Tunnels. */
+    struct ofpbuf *options;
     struct netdev_tunnel_config tnl_cfg;
+
+    /* Patch Ports. */
+    struct netdev_stats stats;
+    char *peer;
 };
 
 struct vport_class {
     enum ovs_vport_type type;
     struct netdev_class netdev_class;
-    int (*parse_config)(const char *name, const char *type,
-                        const struct smap *args, struct ofpbuf *options,
-                        struct netdev_tunnel_config *tnl_cfg);
-    int (*unparse_config)(const char *name, const char *type,
-                          const struct nlattr *options, size_t options_len,
-                          struct smap *args);
 };
 
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -140,6 +141,12 @@ netdev_vport_get_vport_type(const struct netdev *netdev)
             : OVS_VPORT_TYPE_UNSPEC);
 }
 
+bool
+netdev_vport_is_patch(const struct netdev *netdev)
+{
+    return netdev_vport_get_vport_type(netdev) == OVS_VPORT_TYPE_PATCH;
+}
+
 static uint32_t
 get_u32_or_zero(const struct nlattr *a)
 {
@@ -220,6 +227,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_)
 
     ofpbuf_delete(netdev_dev->options);
     route_table_unregister();
+    free(netdev_dev->peer);
     free(netdev_dev);
 }
 
@@ -238,83 +246,6 @@ netdev_vport_close(struct netdev *netdev)
 }
 
 static int
-netdev_vport_get_config(struct netdev_dev *dev_, struct smap *args)
-{
-    const struct netdev_class *netdev_class = netdev_dev_get_class(dev_);
-    const struct vport_class *vport_class = vport_class_cast(netdev_class);
-    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
-    const char *name = netdev_dev_get_name(dev_);
-    int error;
-
-    if (!dev->options) {
-        struct dpif_linux_vport reply;
-        struct ofpbuf *buf;
-
-        error = dpif_linux_vport_get(name, &reply, &buf);
-        if (error) {
-            VLOG_ERR_RL(&rl, "%s: vport query failed (%s)",
-                        name, strerror(error));
-            return error;
-        }
-
-        dev->options = ofpbuf_clone_data(reply.options, reply.options_len);
-        ofpbuf_delete(buf);
-    }
-
-    error = vport_class->unparse_config(name, netdev_class->type,
-                                        dev->options->data,
-                                        dev->options->size,
-                                        args);
-    if (error) {
-        VLOG_ERR_RL(&rl, "%s: failed to parse kernel config (%s)",
-                    name, strerror(error));
-    }
-    return error;
-}
-
-static int
-netdev_vport_set_config(struct netdev_dev *dev_, const struct smap *args)
-{
-    const struct netdev_class *netdev_class = netdev_dev_get_class(dev_);
-    const struct vport_class *vport_class = vport_class_cast(netdev_class);
-    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
-    const char *name = netdev_dev_get_name(dev_);
-    struct netdev_tunnel_config tnl_cfg;
-    struct ofpbuf *options;
-    int error;
-
-    options = ofpbuf_new(64);
-    error = vport_class->parse_config(name, netdev_dev_get_type(dev_),
-                                      args, options, &tnl_cfg);
-    if (!error
-        && (!dev->options
-            || options->size != dev->options->size
-            || memcmp(options->data, dev->options->data, options->size))) {
-        struct dpif_linux_vport vport;
-
-        dpif_linux_vport_init(&vport);
-        vport.cmd = OVS_VPORT_CMD_SET;
-        vport.name = name;
-        vport.options = options->data;
-        vport.options_len = options->size;
-        error = dpif_linux_vport_transact(&vport, NULL, NULL);
-        if (!error || error == ENODEV) {
-            /* Either reconfiguration succeeded or this vport is not installed
-             * in the kernel (e.g. it hasn't been added to a dpif yet with
-             * dpif_port_add()). */
-            ofpbuf_delete(dev->options);
-            dev->options = options;
-            dev->tnl_cfg = tnl_cfg;
-            options = NULL;
-            error = 0;
-        }
-    }
-    ofpbuf_delete(options);
-
-    return error;
-}
-
-static int
 netdev_vport_set_etheraddr(struct netdev *netdev,
                            const uint8_t mac[ETH_ADDR_LEN])
 {
@@ -449,7 +380,7 @@ netdev_vport_poll_notify(const struct netdev *netdev)
     }
 }
 
-/* Code specific to individual vport types. */
+/* Code specific to tunnel types. */
 
 static ovs_be64
 parse_key(const struct smap *args, const char *name,
@@ -479,13 +410,16 @@ parse_key(const struct smap *args, const char *name,
 }
 
 static int
-parse_tunnel_config(const char *name, const char *type,
-                    const struct smap *args, struct ofpbuf *options,
-                    struct netdev_tunnel_config *tnl_cfg_)
+set_tunnel_config(struct netdev_dev *dev_, const struct smap *args)
 {
+    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
+    const char *name = netdev_dev_get_name(dev_);
+    const char *type = netdev_dev_get_type(dev_);
     bool ipsec_mech_set, needs_dst_port, has_csum;
     struct netdev_tunnel_config tnl_cfg;
     struct smap_node *node;
+    struct ofpbuf *options;
+    int error = EINVAL;
     uint8_t flags;
 
     flags = TNL_F_DF_DEFAULT;
@@ -493,6 +427,8 @@ parse_tunnel_config(const char *name, const char *type,
     ipsec_mech_set = false;
     memset(&tnl_cfg, 0, sizeof tnl_cfg);
 
+    options = ofpbuf_new(64);
+
     if (!strcmp(type, "capwap")) {
         VLOG_WARN_ONCE("CAPWAP tunnel support is deprecated.");
     }
@@ -572,7 +508,7 @@ parse_tunnel_config(const char *name, const char *type,
                 if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
                     VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
                              name);
-                    return EINVAL;
+                    goto exit;
                 }
                 ipsec_mech_set = true;
             }
@@ -610,25 +546,25 @@ parse_tunnel_config(const char *name, const char *type,
         if (pid < 0) {
             VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
                      name);
-            return EINVAL;
+            goto exit;
         }
 
         if (smap_get(args, "peer_cert") && smap_get(args, "psk")) {
             VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
-            return EINVAL;
+            goto exit;
         }
 
         if (!ipsec_mech_set) {
             VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
                      name);
-            return EINVAL;
+            goto exit;
         }
     }
 
     if (!tnl_cfg.ip_dst) {
         VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
                  name, type);
-        return EINVAL;
+        goto exit;
     }
     nl_msg_put_be32(options, OVS_TUNNEL_ATTR_DST_IPV4, tnl_cfg.ip_dst);
 
@@ -660,9 +596,34 @@ parse_tunnel_config(const char *name, const char *type,
     }
     nl_msg_put_u32(options, OVS_TUNNEL_ATTR_FLAGS, flags);
 
-    *tnl_cfg_ = tnl_cfg;
+    dev->tnl_cfg = tnl_cfg;
 
-    return 0;
+    error = 0;
+    if (!dev->options
+        || options->size != dev->options->size
+        || memcmp(options->data, dev->options->data, options->size)) {
+        struct dpif_linux_vport vport;
+
+        dpif_linux_vport_init(&vport);
+        vport.cmd = OVS_VPORT_CMD_SET;
+        vport.name = name;
+        vport.options = options->data;
+        vport.options_len = options->size;
+        error = dpif_linux_vport_transact(&vport, NULL, NULL);
+        if (!error || error == ENODEV) {
+            /* Either reconfiguration succeeded or this vport is not installed
+             * in the kernel (e.g. it hasn't been added to a dpif yet with
+             * dpif_port_add()). */
+            ofpbuf_delete(dev->options);
+            dev->options = options;
+            options = NULL;
+            error = 0;
+        }
+    }
+
+exit:
+    ofpbuf_delete(options);
+    return error;
 }
 
 static int
@@ -696,16 +657,34 @@ get_be64_or_zero(const struct nlattr *a)
 }
 
 static int
-unparse_tunnel_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
-                      const struct nlattr *options, size_t options_len,
-                      struct smap *args)
+get_tunnel_config(struct netdev_dev *dev_, struct smap *args)
 {
+    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
+    const char *name = netdev_dev_get_name(dev_);
     struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
     uint32_t flags;
     int error;
 
-    error = tnl_port_config_from_nlattr(options, options_len, a);
+    if (!dev->options) {
+        struct dpif_linux_vport reply;
+        struct ofpbuf *buf;
+
+        error = dpif_linux_vport_get(name, &reply, &buf);
+        if (error) {
+            VLOG_ERR_RL(&rl, "%s: vport query failed (%s)", name,
+                        strerror(error));
+            return error;
+        }
+
+        dev->options = ofpbuf_clone_data(reply.options, reply.options_len);
+        ofpbuf_delete(buf);
+    }
+
+    error = tnl_port_config_from_nlattr(dev->options->data, dev->options->size,
+                                        a);
     if (error) {
+        VLOG_ERR_RL(&rl, "%s: failed to parse kernel config (%s)",
+                    name, strerror(error));
         return error;
     }
 
@@ -779,15 +758,56 @@ unparse_tunnel_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
 
     return 0;
 }
+
+/* Code specific to patch ports. */
+
+const char *
+netdev_vport_patch_peer(const struct netdev *netdev)
+{
+    return netdev_vport_is_patch(netdev)
+        ? netdev_vport_get_dev(netdev)->peer
+        : NULL;
+}
+
+void
+netdev_vport_patch_inc_rx(const struct netdev *netdev,
+                          const struct dpif_flow_stats *stats)
+{
+    if (netdev_vport_is_patch(netdev)) {
+        struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
+        dev->stats.rx_packets += stats->n_packets;
+        dev->stats.rx_bytes += stats->n_bytes;
+    }
+}
+
+void
+netdev_vport_patch_inc_tx(const struct netdev *netdev,
+                          const struct dpif_flow_stats *stats)
+{
+    if (netdev_vport_is_patch(netdev)) {
+        struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
+        dev->stats.tx_packets += stats->n_packets;
+        dev->stats.tx_bytes += stats->n_bytes;
+    }
+}
 
 static int
-parse_patch_config(const char *name, const char *type OVS_UNUSED,
-                   const struct smap *args, struct ofpbuf *options,
-                   struct netdev_tunnel_config *tnl_cfg)
+get_patch_config(struct netdev_dev *dev_, struct smap *args)
 {
-    const char *peer;
+    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
 
-    memset(tnl_cfg, 0, sizeof *tnl_cfg);
+    if (dev->peer) {
+        smap_add(args, "peer", dev->peer);
+    }
+    return 0;
+}
+
+static int
+set_patch_config(struct netdev_dev *dev_, const struct smap *args)
+{
+    struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
+    const char *name = netdev_dev_get_name(dev_);
+    const char *peer;
 
     peer = smap_get(args, "peer");
     if (!peer) {
@@ -810,44 +830,31 @@ parse_patch_config(const char *name, const char *type OVS_UNUSED,
         return EINVAL;
     }
 
-    nl_msg_put_string(options, OVS_PATCH_ATTR_PEER, peer);
+    free(dev->peer);
+    dev->peer = xstrdup(peer);
 
     return 0;
 }
 
 static int
-unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
-                     const struct nlattr *options, size_t options_len,
-                     struct smap *args)
-{
-    static const struct nl_policy ovs_patch_policy[] = {
-        [OVS_PATCH_ATTR_PEER] = { .type = NL_A_STRING,
-                               .max_len = IFNAMSIZ,
-                               .optional = false }
-    };
-
-    struct nlattr *a[ARRAY_SIZE(ovs_patch_policy)];
-    struct ofpbuf buf;
-
-    ofpbuf_use_const(&buf, options, options_len);
-    if (!nl_policy_parse(&buf, 0, ovs_patch_policy,
-                         a, ARRAY_SIZE(ovs_patch_policy))) {
-        return EINVAL;
-    }
-
-    smap_add(args, "peer", nl_attr_get_string(a[OVS_PATCH_ATTR_PEER]));
+patch_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
+{
+    struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
+    memcpy(stats, &dev->stats, sizeof *stats);
     return 0;
 }
 
-#define VPORT_FUNCTIONS(GET_TUNNEL_CONFIG, GET_STATUS)      \
+#define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG,             \
+                        GET_TUNNEL_CONFIG, GET_STATS,       \
+                        GET_STATUS)                         \
     NULL,                                                   \
     netdev_vport_run,                                       \
     netdev_vport_wait,                                      \
                                                             \
     netdev_vport_create,                                    \
     netdev_vport_destroy,                                   \
-    netdev_vport_get_config,                                \
-    netdev_vport_set_config,                                \
+    GET_CONFIG,                                             \
+    SET_CONFIG,                                             \
     GET_TUNNEL_CONFIG,                                      \
                                                             \
     netdev_vport_open,                                      \
@@ -869,7 +876,7 @@ unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
     NULL,                       /* get_carrier */           \
     NULL,                       /* get_carrier_resets */    \
     NULL,                       /* get_miimon */            \
-    netdev_vport_get_stats,                                 \
+    GET_STATS,                                              \
     NULL,                       /* set_stats */             \
                                                             \
     NULL,                       /* get_features */          \
@@ -901,9 +908,11 @@ unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
 
 #define TUNNEL_CLASS(NAME, VPORT_TYPE)                      \
     { VPORT_TYPE,                                           \
-        { NAME, VPORT_FUNCTIONS(get_netdev_tunnel_config,   \
-                                tunnel_get_status) },       \
-            parse_tunnel_config, unparse_tunnel_config }
+        { NAME, VPORT_FUNCTIONS(get_tunnel_config,          \
+                                set_tunnel_config,          \
+                                get_netdev_tunnel_config,   \
+                                netdev_vport_get_stats,     \
+                                tunnel_get_status) }}
 
 void
 netdev_vport_register(void)
@@ -917,8 +926,11 @@ netdev_vport_register(void)
         TUNNEL_CLASS("vxlan", OVS_VPORT_TYPE_VXLAN),
 
         { OVS_VPORT_TYPE_PATCH,
-          { "patch", VPORT_FUNCTIONS(NULL, NULL) },
-          parse_patch_config, unparse_patch_config }
+          { "patch", VPORT_FUNCTIONS(get_patch_config,
+                                     set_patch_config,
+                                     NULL,
+                                     patch_get_stats,
+                                     NULL) }},
     };
 
     int i;
diff --git a/lib/netdev-vport.h b/lib/netdev-vport.h
index 31c1198..b372a74 100644
--- a/lib/netdev-vport.h
+++ b/lib/netdev-vport.h
@@ -18,9 +18,9 @@
 #define NETDEV_VPORT_H 1
 
 #include <stdbool.h>
-#include "openvswitch/types.h"
 
 struct dpif_linux_vport;
+struct dpif_flow_stats;
 struct netdev;
 struct netdev_stats;
 
@@ -30,7 +30,15 @@ const struct ofpbuf *netdev_vport_get_options(const struct netdev *);
 
 enum ovs_vport_type netdev_vport_get_vport_type(const struct netdev *);
 const char *netdev_vport_get_netdev_type(const struct dpif_linux_vport *);
+bool netdev_vport_is_patch(const struct netdev *);
 
 int netdev_vport_get_stats(const struct netdev *, struct netdev_stats *);
 
+const char *netdev_vport_patch_peer(const struct netdev *netdev);
+
+void netdev_vport_patch_inc_rx(const struct netdev *,
+                               const struct dpif_flow_stats *);
+void netdev_vport_patch_inc_tx(const struct netdev *,
+                               const struct dpif_flow_stats *);
+
 #endif /* netdev-vport.h */
diff --git a/lib/netdev.c b/lib/netdev.c
index 0a2e7c5..3909ab2 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -1441,6 +1441,14 @@ netdev_get_type(const struct netdev *netdev)
     return netdev_get_dev(netdev)->netdev_class->type;
 }
 
+
+const char *
+netdev_get_type_from_name(const char *name)
+{
+    const struct netdev_dev *dev = netdev_dev_from_name(name);
+    return dev ? netdev_dev_get_type(dev) : NULL;
+}
+
 struct netdev_dev *
 netdev_get_dev(const struct netdev *netdev)
 {
diff --git a/lib/netdev.h b/lib/netdev.h
index a544131..a691d70 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -127,6 +127,7 @@ const struct netdev_tunnel_config *
 /* Basic properties. */
 const char *netdev_get_name(const struct netdev *);
 const char *netdev_get_type(const struct netdev *);
+const char *netdev_get_type_from_name(const char *);
 int netdev_get_mtu(const struct netdev *, int *mtup);
 int netdev_set_mtu(const struct netdev *, int mtu);
 int netdev_get_ifindex(const struct netdev *);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 5c5d261..7732f49 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -36,6 +36,7 @@
 #include "mac-learning.h"
 #include "meta-flow.h"
 #include "multipath.h"
+#include "netdev-vport.h"
 #include "netdev.h"
 #include "netlink.h"
 #include "nx-match.h"
@@ -295,6 +296,8 @@ static void xlate_actions(struct action_xlate_ctx *,
 static void xlate_actions_for_side_effects(struct action_xlate_ctx *,
                                            const struct ofpact *ofpacts,
                                            size_t ofpacts_len);
+static void xlate_table_action(struct action_xlate_ctx *, uint16_t in_port,
+                               uint8_t table_id, bool may_packet_in);
 
 static size_t put_userspace_action(const struct ofproto_dpif *,
                                    struct ofpbuf *odp_actions,
@@ -676,7 +679,8 @@ struct ofproto_dpif {
     struct hmap vlandev_map;     /* vlandev -> (realdev,vid). */
 
     /* Ports. */
-    struct sset ports;             /* Set of port names. */
+    struct sset ports;             /* Set of standard port names. */
+    struct sset ghost_ports;       /* Ports with no datapath port. */
     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
     int port_poll_errno;           /* Last errno for port_poll() reply. */
 };
@@ -1157,6 +1161,7 @@ construct(struct ofproto *ofproto_)
     hmap_init(&ofproto->realdev_vid_map);
 
     sset_init(&ofproto->ports);
+    sset_init(&ofproto->ghost_ports);
     sset_init(&ofproto->port_poll_set);
     ofproto->port_poll_errno = 0;
 
@@ -1301,6 +1306,7 @@ destruct(struct ofproto *ofproto_)
     hmap_destroy(&ofproto->realdev_vid_map);
 
     sset_destroy(&ofproto->ports);
+    sset_destroy(&ofproto->ghost_ports);
     sset_destroy(&ofproto->port_poll_set);
 
     close_dpif_backer(ofproto->backer);
@@ -1533,6 +1539,12 @@ port_construct(struct ofport *port_)
     port->vlandev_vid = 0;
     port->carrier_seq = netdev_get_carrier_resets(port->up.netdev);
 
+    if (netdev_vport_is_patch(port->up.netdev)) {
+        /* XXX By bailing out here, we don't do required sFlow work. */
+        port->odp_port = OVSP_NONE;
+        return 0;
+    }
+
     error = dpif_port_query_by_name(ofproto->backer->dpif,
                                     netdev_get_name(port->up.netdev),
                                     &dpif_port);
@@ -1575,8 +1587,12 @@ port_destruct(struct ofport *port_)
         dpif_port_del(ofproto->backer->dpif, port->odp_port);
     }
 
+    if (port->odp_port != OVSP_NONE) {
+        hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
+    }
+
     sset_find_and_delete(&ofproto->ports, devname);
-    hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
+    sset_find_and_delete(&ofproto->ghost_ports, devname);
     ofproto->backer->need_revalidate = REV_RECONFIGURE;
     bundle_remove(port_);
     set_cfm(port_, NULL);
@@ -2818,6 +2834,28 @@ ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
 }
 
+static struct ofport_dpif *
+ofport_get_peer(const struct ofport_dpif *ofport_dpif)
+{
+    const struct ofproto_dpif *ofproto;
+    const char *peer;
+
+    peer = netdev_vport_patch_peer(ofport_dpif->up.netdev);
+    if (!peer) {
+        return NULL;
+    }
+
+    HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+        struct ofport *ofport;
+
+        ofport = shash_find_data(&ofproto->up.port_by_name, peer);
+        if (ofport && ofport->ofproto->ofproto_class == &ofproto_dpif_class) {
+            return ofport_dpif_cast(ofport);
+        }
+    }
+    return NULL;
+}
+
 static void
 port_run_fast(struct ofport_dpif *ofport)
 {
@@ -2886,6 +2924,24 @@ port_query_by_name(const struct ofproto *ofproto_, const char *devname,
     struct dpif_port dpif_port;
     int error;
 
+    if (sset_contains(&ofproto->ghost_ports, devname)) {
+        const char *type = netdev_get_type_from_name(devname);
+
+        /* We may be called before ofproto->up.port_by_name is populated with
+         * the appropriate ofport.  For this reason, we must get the name and
+         * type from the netdev layer directly. */
+        if (type) {
+            const struct ofport *ofport;
+
+            ofport = shash_find_data(&ofproto->up.port_by_name, devname);
+            ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
+            ofproto_port->name = xstrdup(devname);
+            ofproto_port->type = xstrdup(type);
+            return 0;
+        }
+        return ENODEV;
+    }
+
     if (!sset_contains(&ofproto->ports, devname)) {
         return ENODEV;
     }
@@ -2904,6 +2960,11 @@ port_add(struct ofproto *ofproto_, struct netdev *netdev)
     uint32_t odp_port = UINT32_MAX;
     int error;
 
+    if (netdev_vport_is_patch(netdev)) {
+        sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
+        return 0;
+    }
+
     error = dpif_port_add(ofproto->backer->dpif, netdev, &odp_port);
     if (!error) {
         sset_add(&ofproto->ports, netdev_get_name(netdev));
@@ -2994,16 +3055,13 @@ ofproto_update_local_port_stats(const struct ofproto *ofproto_,
 struct port_dump_state {
     uint32_t bucket;
     uint32_t offset;
+    bool ghost;
 };
 
 static int
 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
 {
-    struct port_dump_state *state;
-
-    *statep = state = xmalloc(sizeof *state);
-    state->bucket = 0;
-    state->offset = 0;
+    *statep = xzalloc(sizeof(struct port_dump_state));
     return 0;
 }
 
@@ -3013,10 +3071,11 @@ port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_,
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
     struct port_dump_state *state = state_;
+    const struct sset *sset;
     struct sset_node *node;
 
-    while ((node = sset_at_position(&ofproto->ports, &state->bucket,
-                               &state->offset))) {
+    sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
+    while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
         int error;
 
         error = port_query_by_name(ofproto_, node->name, port);
@@ -3025,6 +3084,13 @@ port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_,
         }
     }
 
+    if (!state->ghost) {
+        state->ghost = true;
+        state->bucket = 0;
+        state->offset = 0;
+        return port_dump_next(ofproto_, state_, port);
+    }
+
     return EOF;
 }
 
@@ -4101,9 +4167,7 @@ facet_free(struct facet *facet)
 }
 
 /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on
- * 'packet', which arrived on 'in_port'.
- *
- * Takes ownership of 'packet'. */
+ * 'packet', which arrived on 'in_port'. */
 static bool
 execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
                     const struct nlattr *odp_actions, size_t actions_len,
@@ -4119,8 +4183,6 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
 
     error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
                          odp_actions, actions_len, packet);
-
-    ofpbuf_delete(packet);
     return !error;
 }
 
@@ -5172,11 +5234,10 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
     }
 }
 
-static enum ofperr
-rule_execute(struct rule *rule_, const struct flow *flow,
-             struct ofpbuf *packet)
+static void
+rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
+                  struct ofpbuf *packet)
 {
-    struct rule_dpif *rule = rule_dpif_cast(rule_);
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
 
     struct dpif_flow_stats stats;
@@ -5198,7 +5259,14 @@ rule_execute(struct rule *rule_, const struct flow *flow,
                         odp_actions.size, packet);
 
     ofpbuf_uninit(&odp_actions);
+}
 
+static enum ofperr
+rule_execute(struct rule *rule, const struct flow *flow,
+             struct ofpbuf *packet)
+{
+    rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
+    ofpbuf_delete(packet);
     return 0;
 }
 
@@ -5224,6 +5292,29 @@ send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
     int error;
 
     flow_extract(packet, 0, 0, NULL, OFPP_LOCAL, &flow);
+    if (netdev_vport_is_patch(ofport->up.netdev)) {
+        struct ofproto_dpif *peer_ofproto;
+        struct dpif_flow_stats stats;
+        struct ofport_dpif *peer;
+        struct rule_dpif *rule;
+
+        peer = ofport_get_peer(ofport);
+        if (!peer) {
+            return ENODEV;
+        }
+
+        dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
+        netdev_vport_patch_inc_tx(ofport->up.netdev, &stats);
+        netdev_vport_patch_inc_rx(peer->up.netdev, &stats);
+
+        flow.in_port = peer->up.ofp_port;
+        peer_ofproto = ofproto_dpif_cast(peer->up.ofproto);
+        rule = rule_dpif_lookup(peer_ofproto, &flow);
+        rule_dpif_execute(rule, &flow, packet);
+
+        return 0;
+    }
+
     odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port,
                                       flow.vlan_tci);
     if (odp_port != ofport->odp_port) {
@@ -5410,11 +5501,10 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
                         bool check_stp)
 {
     const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
-    uint32_t odp_port = ofp_port_to_odp_port(ctx->ofproto, ofp_port);
     ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
     uint8_t flow_nw_tos = ctx->flow.nw_tos;
     struct priority_to_dscp *pdscp;
-    uint32_t out_port;
+    uint32_t out_port, odp_port;
 
     if (!ofport) {
         xlate_report(ctx, "Nonexistent output port");
@@ -5427,12 +5517,43 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
         return;
     }
 
+    if (netdev_vport_is_patch(ofport->up.netdev)) {
+        struct ofport_dpif *peer = ofport_get_peer(ofport);
+        uint16_t old_in_port = ctx->flow.in_port;
+        const struct ofproto_dpif *peer_ofproto;
+
+        if (!peer) {
+            xlate_report(ctx, "Nonexistent patch port peer");
+            return;
+        }
+
+        peer_ofproto = ofproto_dpif_cast(peer->up.ofproto);
+        if (peer_ofproto->backer != ctx->ofproto->backer) {
+            xlate_report(ctx, "Patch port peer on a different datapath");
+            return;
+        }
+
+        ctx->ofproto = ofproto_dpif_cast(peer->up.ofproto);
+        ctx->flow.in_port = peer->up.ofp_port;
+        xlate_table_action(ctx, ctx->flow.in_port, 0, true);
+        ctx->flow.in_port = old_in_port;
+        ctx->ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+
+        if (ctx->resubmit_stats) {
+            netdev_vport_patch_inc_tx(ofport->up.netdev, ctx->resubmit_stats);
+            netdev_vport_patch_inc_rx(peer->up.netdev, ctx->resubmit_stats);
+        }
+
+        return;
+    }
+
     pdscp = get_priority(ofport, ctx->flow.skb_priority);
     if (pdscp) {
         ctx->flow.nw_tos &= ~IP_DSCP_MASK;
         ctx->flow.nw_tos |= pdscp->dscp;
     }
 
+    odp_port = ofp_port_to_odp_port(ctx->ofproto, ofp_port);
     out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,
                                       ctx->flow.vlan_tci);
     if (out_port != odp_port) {
@@ -7542,9 +7663,17 @@ show_dp_format(const struct ofproto_dpif *ofproto, struct ds *ds)
         struct ofport *ofport = node->data;
         const char *name = netdev_get_name(ofport->netdev);
         const char *type = netdev_get_type(ofport->netdev);
+        uint32_t odp_port;
+
+        ds_put_format(ds, "\t%s %u/", name, ofport->ofp_port);
+
+        odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
+        if (odp_port != OVSP_NONE) {
+            ds_put_format(ds, "%"PRIu32":", odp_port);
+        } else {
+            ds_put_cstr(ds, "none:");
+        }
 
-        ds_put_format(ds, "\t%s %u/%u:", name, ofport->ofp_port,
-                      ofp_port_to_odp_port(ofproto, ofport->ofp_port));
         if (strcmp(type, "system")) {
             struct netdev *netdev;
             int error;
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index a14c412..067c1da 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -1324,3 +1324,60 @@ in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - patch ports])
+OVS_VSWITCHD_START([add-br br1 \
+-- set bridge br1 datapath-type=dummy fail-mode=secure \
+-- add-port br1 pbr1 -- set int pbr1 type=patch options:peer=pbr0 \
+-- add-port br0 pbr0 -- set int pbr0 type=patch options:peer=pbr1])
+
+ADD_OF_PORTS([br0], [2])
+ADD_OF_PORTS([br1], [3])
+
+AT_CHECK([ovs-ofctl add-flow br0 actions=LOCAL,output:1,output:2])
+AT_CHECK([ovs-ofctl add-flow br1 actions=LOCAL,output:1,output:3])
+
+for i in $(seq 1 10); do
+    ovs-appctl netdev-dummy/receive br0 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'
+done
+
+for i in $(seq 1 5); do
+    ovs-appctl netdev-dummy/receive br1 'in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'
+done
+
+AT_CHECK([ovs-appctl dpif/show], [0], [dnl
+br0 (dummy at ovs-dummy):
+	lookups: hit:13 missed:2 lost:0
+	flows: 1
+	br0 65534/100: (dummy)
+	p2 2/2: (dummy)
+	pbr0 1/none: (patch: peer=pbr1)
+br1 (dummy at ovs-dummy):
+	lookups: hit:13 missed:2 lost:0
+	flows: 1
+	br1 65534/101: (dummy)
+	p3 3/3: (dummy)
+	pbr1 1/none: (patch: peer=pbr0)
+])
+
+AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_USED], [0], [dnl
+in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:9, bytes:540, used:0.0s, actions:101,3,2
+]),
+AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_USED], [0], [dnl
+in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:4, bytes:240, used:0.0s, actions:100,2,3
+])
+
+AT_CHECK([ovs-ofctl dump-ports br0 pbr0], [0], [dnl
+OFPST_PORT reply (xid=0x4): 1 ports
+  port  1: rx pkts=5, bytes=300, drop=0, errs=0, frame=0, over=0, crc=0
+           tx pkts=10, bytes=600, drop=0, errs=0, coll=0
+])
+
+AT_CHECK([ovs-ofctl dump-ports br1 pbr1], [0], [dnl
+OFPST_PORT reply (xid=0x4): 1 ports
+  port  1: rx pkts=10, bytes=600, drop=0, errs=0, frame=0, over=0, crc=0
+           tx pkts=5, bytes=300, drop=0, errs=0, coll=0
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
-- 
1.7.9.5




More information about the dev mailing list