[ovs-dev] [PATCH] ofproto: Avoid user->kernel->user round-trip for many controller actions.

Ben Pfaff blp at nicira.com
Wed Aug 4 21:09:22 UTC 2010


When an OpenFlow flow says to send packets to the controller, until now
ofproto has executed that using dpif_execute(), which passes the packet up
to the kernel.  The kernel queues the packet into its "action" queue, and
then later ofproto pulls the packet back down from the kernel and sends it
to the controller.

However, this is unnecessary.  Open vSwitch can just recognize in advance
that it will get the packet back and handle it directly, skipping the round
trip.  This commit implements this optimization.

This generally affects only the first packet in a flow, since generally the
rest come directly down from the kernel.  It only optimizes the "easy" case
where the first action in a flow is to send the packet to the controller,
since this seems to be the common case in the flows that I'm looking at
now.
---
 lib/dpif-provider.h |    8 --------
 lib/dpif.h          |   12 +++++++++++-
 ofproto/ofproto.c   |   37 +++++++++++++++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index b2f9d4b..1106db8 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -324,14 +324,6 @@ struct dpif_class {
     void (*recv_wait)(struct dpif *dpif);
 };
 
-/* Minimum number of bytes of headroom for a packet returned by the 'recv'
- * member function (see above).  This headroom allows "struct odp_msg" to be
- * replaced by "struct ofp_packet_in" without copying the buffer. */
-#define DPIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \
-                               - sizeof(struct odp_msg))
-BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct odp_msg));
-BUILD_ASSERT_DECL(DPIF_RECV_MSG_PADDING % 4 == 0);
-
 extern const struct dpif_class dpif_linux_class;
 extern const struct dpif_class dpif_netdev_class;
 
diff --git a/lib/dpif.h b/lib/dpif.h
index a639725..1496c22 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -18,10 +18,12 @@
 #ifndef DPIF_H
 #define DPIF_H 1
 
-#include "openvswitch/datapath-protocol.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
+#include "openflow/openflow.h"
+#include "openvswitch/datapath-protocol.h"
+#include "util.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -90,6 +92,14 @@ int dpif_execute(struct dpif *, uint16_t in_port,
                  const union odp_action[], size_t n_actions,
                  const struct ofpbuf *);
 
+/* Minimum number of bytes of headroom for a packet returned by dpif_recv()
+ * member function.  This headroom allows "struct odp_msg" to be replaced by
+ * "struct ofp_packet_in" without copying the buffer. */
+#define DPIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \
+                               - sizeof(struct odp_msg))
+BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct odp_msg));
+BUILD_ASSERT_DECL(DPIF_RECV_MSG_PADDING % 4 == 0);
+
 int dpif_recv_get_mask(const struct dpif *, int *listen_mask);
 int dpif_recv_set_mask(struct dpif *, int listen_mask);
 int dpif_get_sflow_probability(const struct dpif *, uint32_t *probability);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 461053a..73cea30 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1830,6 +1830,39 @@ rule_has_out_port(const struct rule *rule, uint16_t out_port)
     return false;
 }
 
+static bool
+execute_odp_actions(struct ofproto *ofproto, uint16_t in_port,
+                    const union odp_action *actions, size_t n_actions,
+                    const struct ofpbuf *packet)
+{
+    if (n_actions > 0 && actions[0].type == ODPAT_CONTROLLER) {
+        /* As an optimization, avoid a round-trip from userspace to kernel to
+         * userspace.  This also avoids possibly filling up kernel packet
+         * buffers along the way. */
+        struct ofpbuf *copy;
+        struct odp_msg *msg;
+
+        copy = ofpbuf_new(DPIF_RECV_MSG_PADDING + sizeof(struct odp_msg)
+                          + packet->size);
+        ofpbuf_reserve(copy, DPIF_RECV_MSG_PADDING);
+        msg = ofpbuf_put_uninit(copy, sizeof *msg);
+        msg->type = _ODPL_ACTION_NR;
+        msg->length = sizeof(struct odp_msg) + packet->size;
+        msg->port = in_port;
+        msg->reserved = 0;
+        msg->arg = actions[0].controller.arg;
+        ofpbuf_put(copy, packet->data, packet->size);
+
+        send_packet_in(ofproto, copy);
+
+        actions++;
+        n_actions--;
+    }
+
+    return !n_actions || !dpif_execute(ofproto->dpif, in_port,
+                                       actions, n_actions, packet);
+}
+
 /* Executes the actions indicated by 'rule' on 'packet', which is in flow
  * 'flow' and is considered to have arrived on ODP port 'in_port'.
  *
@@ -1872,8 +1905,8 @@ rule_execute(struct ofproto *ofproto, struct rule *rule,
     }
 
     /* Execute the ODP actions. */
-    if (!dpif_execute(ofproto->dpif, flow->in_port,
-                      actions, n_actions, packet)) {
+    if (execute_odp_actions(ofproto, flow->in_port,
+                            actions, n_actions, packet)) {
         struct odp_flow_stats stats;
         flow_extract_stats(flow, packet, &stats);
         update_stats(ofproto, rule, &stats);
-- 
1.7.1





More information about the dev mailing list