[ovs-dev] [PATCH 1/5] dpif: Change dpif API to allow multiple handler threads read upcall.

Alex Wang alexw at nicira.com
Thu Feb 27 19:44:22 UTC 2014


This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw at nicira.com>
---
 lib/dpif-linux.c              |   24 ++++++++++++++------
 lib/dpif-netdev.c             |   17 ++++++++++----
 lib/dpif-provider.h           |   36 ++++++++++++++++++++---------
 lib/dpif.c                    |   50 ++++++++++++++++++++++++-----------------
 lib/dpif.h                    |   28 ++++++++++++++++++-----
 ofproto/ofproto-dpif-upcall.c |    6 ++---
 ofproto/ofproto-dpif-xlate.c  |    2 +-
 ofproto/ofproto-dpif.c        |    8 ++++---
 8 files changed, 117 insertions(+), 54 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 18de118..1c9869e 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -171,7 +171,7 @@ static unsigned int ovs_vport_mcgroup;
 static int dpif_linux_init(void);
 static int open_dpif(const struct dpif_linux_dp *, struct dpif **);
 static uint32_t dpif_linux_port_get_pid(const struct dpif *,
-                                        odp_port_t port_no);
+                                        odp_port_t port_no, uint32_t hash);
 static int dpif_linux_refresh_channels(struct dpif *);
 
 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
@@ -304,7 +304,7 @@ destroy_channels(struct dpif_linux *dpif)
     dpif->n_events = dpif->event_offset = 0;
 
     /* Don't close dpif->epoll_fd since that would cause other threads that
-     * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */
+     * call dpif_recv_wait() to wait on an arbitrary fd or a closed fd. */
 }
 
 static int
@@ -675,7 +675,8 @@ dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
 }
 
 static uint32_t
-dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
+dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
+                        uint32_t hash OVS_UNUSED)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     uint32_t port_idx = odp_to_u32(port_no);
@@ -1399,7 +1400,8 @@ dpif_linux_recv_set__(struct dpif *dpif_, bool enable)
 }
 
 static int
-dpif_linux_recv_set(struct dpif *dpif_, bool enable)
+dpif_linux_recv_set(struct dpif *dpif_, bool enable,
+                    uint32_t n_handlers OVS_UNUSED)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     int error;
@@ -1412,6 +1414,13 @@ dpif_linux_recv_set(struct dpif *dpif_, bool enable)
 }
 
 static int
+dpif_linux_handlers_set(struct dpif *dpif OVS_UNUSED,
+                        uint32_t n_handlers OVS_UNUSED)
+{
+    return 0;
+}
+
+static int
 dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
                              uint32_t queue_id, uint32_t *priority)
 {
@@ -1557,8 +1566,8 @@ dpif_linux_recv__(struct dpif *dpif_, struct dpif_upcall *upcall,
 }
 
 static int
-dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall,
-                struct ofpbuf *buf)
+dpif_linux_recv(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED,
+                struct dpif_upcall *upcall, struct ofpbuf *buf)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     int error;
@@ -1571,7 +1580,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall,
 }
 
 static void
-dpif_linux_recv_wait(struct dpif *dpif_)
+dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 
@@ -1631,6 +1640,7 @@ const struct dpif_class dpif_linux_class = {
     dpif_linux_execute,
     dpif_linux_operate,
     dpif_linux_recv_set,
+    dpif_linux_handlers_set,
     dpif_linux_queue_to_priority,
     dpif_linux_recv,
     dpif_linux_recv_wait,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c4ba646..ee8be43 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1431,7 +1431,15 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
 }
 
 static int
-dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
+dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED,
+                     uint32_t n_handlers OVS_UNUSED)
+{
+    return 0;
+}
+
+static int
+dpif_netdev_handlers_set(struct dpif *dpif OVS_UNUSED,
+                         uint32_t n_handlers OVS_UNUSED)
 {
     return 0;
 }
@@ -1460,8 +1468,8 @@ find_nonempty_queue(struct dp_netdev *dp)
 }
 
 static int
-dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
-                 struct ofpbuf *buf)
+dpif_netdev_recv(struct dpif *dpif, uint32_t n_handlers OVS_UNUSED,
+                 struct dpif_upcall *upcall, struct ofpbuf *buf)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_queue *q;
@@ -1487,7 +1495,7 @@ dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
 }
 
 static void
-dpif_netdev_recv_wait(struct dpif *dpif)
+dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id OVS_UNUSED)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     uint64_t seq;
@@ -1860,6 +1868,7 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_execute,
     NULL,                       /* operate */
     dpif_netdev_recv_set,
+    dpif_netdev_handlers_set,
     dpif_netdev_queue_to_priority,
     dpif_netdev_recv,
     dpif_netdev_recv_wait,
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index adc5242..db7b49a 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -148,6 +148,9 @@ struct dpif_class {
      * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
      * flows whose packets arrived on port 'port_no'.
      *
+     * Since there can be multiple Netlink sockets for the port.  A 'hash'
+     * (e.g. 5-tuple hash of flow) is provided for selecting a specific PID.
+     *
      * A 'port_no' of UINT32_MAX should be treated as a special case.  The
      * implementation should return a reserved PID, not allocated to any port,
      * that the client may use for special purposes.
@@ -158,7 +161,8 @@ struct dpif_class {
      *
      * A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
      * for this function.  This is equivalent to always returning 0. */
-    uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no);
+    uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no,
+                             uint32_t hash);
 
     /* Attempts to begin dumping the ports in a dpif.  On success, returns 0
      * and initializes '*statep' with any data needed for iteration.  On
@@ -323,17 +327,26 @@ struct dpif_class {
     /* Enables or disables receiving packets with dpif_recv() for 'dpif'.
      * Turning packet receive off and then back on is allowed to change Netlink
      * PID assignments (see ->port_get_pid()).  The client is responsible for
-     * updating flows as necessary if it does this. */
-    int (*recv_set)(struct dpif *dpif, bool enable);
+     * updating flows as necessary if it does this.
+     *
+     * Since multiple handlers can read upcall simultaneously, each port
+     * should have a Netlink sockets for each handler.  So, the 'n_handlers'
+     * is used to specify that. */
+    int (*recv_set)(struct dpif *dpif, bool enable, uint32_t n_handlers);
+
+    /* If upcall reception is enabled, updates the number of handlers and
+     * Netlink sockets for each port in 'dpif'. */
+    int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers);
 
     /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
      * priority value used for setting packet priority. */
     int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id,
                              uint32_t *priority);
 
-    /* Polls for an upcall from 'dpif'.  If successful, stores the upcall into
-     * '*upcall', using 'buf' for storage.  Should only be called if 'recv_set'
-     * has been used to enable receiving packets from 'dpif'.
+    /* Polls for an upcall from 'dpif' for handler with 'handler_id'.  If
+     * successful, stores the upcall into '*upcall', using 'buf' for storage.
+     * Should only be called if 'recv_set' has been used to enable receiving
+     * packets from 'dpif'.
      *
      * The implementation should point 'upcall->key' and 'upcall->userdata'
      * (if any) into data in the caller-provided 'buf'.  The implementation may
@@ -349,12 +362,13 @@ struct dpif_class {
      *
      * This function must not block.  If no upcall is pending when it is
      * called, it should return EAGAIN without blocking. */
-    int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall,
-                struct ofpbuf *buf);
+    int (*recv)(struct dpif *dpif, uint32_t handler_id,
+                struct dpif_upcall *upcall, struct ofpbuf *buf);
 
-    /* Arranges for the poll loop to wake up when 'dpif' has a message queued
-     * to be received with the recv member function. */
-    void (*recv_wait)(struct dpif *dpif);
+    /* Arranges for the poll loop for handler with 'handler_id' to wake up
+     * when 'dpif' has a message queued to be received with the recv member
+     * function of the handler. */
+    void (*recv_wait)(struct dpif *dpif, uint32_t handler_id);
 
     /* Throws away any queued upcalls that 'dpif' currently has ready to
      * return. */
diff --git a/lib/dpif.c b/lib/dpif.c
index 2b79a6e..4f10c8f 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -636,6 +636,9 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
  * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
  * packets arrived on port 'port_no'.
  *
+ * Since there can be multiple Netlink sockets for the port.  A 'hash'
+ * (e.g. 5-tuple hash of flow) is provided for selecting a specific PID.
+ *
  * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
  * allocated to any port, that the client may use for special purposes.
  *
@@ -645,10 +648,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
  * update all of the flows that it installed that contain
  * OVS_ACTION_ATTR_USERSPACE actions. */
 uint32_t
-dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
+dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
 {
     return (dpif->dpif_class->port_get_pid
-            ? (dpif->dpif_class->port_get_pid)(dpif, port_no)
+            ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
             : 0);
 }
 
@@ -1248,24 +1251,29 @@ dpif_upcall_type_to_string(enum dpif_upcall_type type)
     }
 }
 
-/* Enables or disables receiving packets with dpif_recv() on 'dpif'.  Returns 0
- * if successful, otherwise a positive errno value.
- *
- * Turning packet receive off and then back on may change the Netlink PID
- * assignments returned by dpif_port_get_pid().  If the client does this, it
- * must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions
- * using the new PID assignment. */
 int
-dpif_recv_set(struct dpif *dpif, bool enable)
+dpif_recv_set(struct dpif *dpif, bool enable, uint32_t n_handlers)
 {
-    int error = dpif->dpif_class->recv_set(dpif, enable);
+    int error = dpif->dpif_class->recv_set(dpif, enable, n_handlers);
     log_operation(dpif, "recv_set", error);
     return error;
 }
 
-/* Polls for an upcall from 'dpif'.  If successful, stores the upcall into
- * '*upcall', using 'buf' for storage.  Should only be called if
- * dpif_recv_set() has been used to enable receiving packets on 'dpif'.
+/* If upcall reception is enabled, updates the number of handlers and
+ * Netlink sockets for each port in 'dpif'. */
+int
+dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers)
+{
+    int error = dpif->dpif_class->handlers_set(dpif, n_handlers);
+    log_operation(dpif, "handlers_set", error);
+    return error;
+}
+
+
+/* Polls for an upcall from 'dpif' for handler with 'handler_id'.  If
+ * successful, stores the upcall into '*upcall', using 'buf' for storage.
+ * Should only be called if dpif_recv_set() has been used to enable receiving
+ * packets on 'dpif'.
  *
  * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
  * 'buf', so their memory cannot be freed separately from 'buf'.
@@ -1280,9 +1288,10 @@ dpif_recv_set(struct dpif *dpif, bool enable)
  * Returns 0 if successful, otherwise a positive errno value.  Returns EAGAIN
  * if no upcall is immediately available. */
 int
-dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf)
+dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall,
+          struct ofpbuf *buf)
 {
-    int error = dpif->dpif_class->recv(dpif, upcall, buf);
+    int error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf);
     if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) {
         struct ds flow;
         char *packet;
@@ -1316,12 +1325,13 @@ dpif_recv_purge(struct dpif *dpif)
     }
 }
 
-/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
- * received with dpif_recv(). */
+/* Arranges for the poll loop for handler with 'handler_id' to wake up when
+ * 'dpif' has a message queued to be received with dpif_recv() of the handler.
+ */
 void
-dpif_recv_wait(struct dpif *dpif)
+dpif_recv_wait(struct dpif *dpif, uint32_t handler_id)
 {
-    dpif->dpif_class->recv_wait(dpif);
+    dpif->dpif_class->recv_wait(dpif, handler_id);
 }
 
 /* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
diff --git a/lib/dpif.h b/lib/dpif.h
index 7f986f9..cf32277 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -61,7 +61,7 @@
  *      "internal" (for a simulated port used to connect to the TCP/IP stack),
  *      and "gre" (for a GRE tunnel).
  *
- *    - A Netlink PID (see "Upcall Queuing and Ordering" below).
+ *    - A Netlink PID for each client (see "Upcall Queuing and Ordering" below).
  *
  * The dpif interface has functions for adding and deleting ports.  When a
  * datapath implements these (e.g. as the Linux and netdev datapaths do), then
@@ -276,6 +276,21 @@
  *    - Upcalls that specify the "special" Netlink PID are queued separately.
  *
  *
+ * Multiple Clients
+ * ----------------
+ * A datapath may have more than one client and multiple clients may want
+ * to read upcalls simultaneously.  In such situation, the additional
+ * requirements on the datapath are:
+ *
+ *    - The datapath must provide a set of per-port Netlink PIDs to each
+ *      client.  The distribution of "miss" upcalls must guarantee that the
+ *      same flow should always go to the same socket's receive queue.
+ *
+ *    - For "action" upcalls, the client can specify its own Netlink PID or
+ *      other client's Netlink PID of the same port for offloading purpose
+ *      (e.g. in a "round robin" manner).
+ *
+ *
  * Packet Format
  * =============
  *
@@ -445,7 +460,8 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname,
                             struct dpif_port *);
 int dpif_port_get_name(struct dpif *, odp_port_t port_no,
                        char *name, size_t name_size);
-uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
+uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
+                           uint32_t hash);
 
 struct dpif_port_dump {
     const struct dpif *dpif;
@@ -613,10 +629,12 @@ struct dpif_upcall {
     struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
 };
 
-int dpif_recv_set(struct dpif *, bool enable);
-int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *);
+int dpif_recv_set(struct dpif *, bool enable, uint32_t n_handlers);
+int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
+int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
+              struct ofpbuf *);
 void dpif_recv_purge(struct dpif *);
-void dpif_recv_wait(struct dpif *);
+void dpif_recv_wait(struct dpif *, uint32_t handler_id);
 
 /* Miscellaneous. */
 
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index e4f81a1..62238c0 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -528,7 +528,7 @@ udpif_dispatcher(void *arg)
     set_subprogram_name("dispatcher");
     while (!latch_is_set(&udpif->exit_latch)) {
         recv_upcalls(udpif);
-        dpif_recv_wait(udpif->dpif);
+        dpif_recv_wait(udpif->dpif, 0);
         latch_wait(&udpif->exit_latch);
         poll_block();
     }
@@ -800,7 +800,7 @@ recv_upcalls(struct udpif *udpif)
         upcall = xmalloc(sizeof *upcall);
         ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
                         sizeof upcall->upcall_stub);
-        error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
+        error = dpif_recv(udpif->dpif, 0, &upcall->dpif_upcall,
                           &upcall->upcall_buf);
         if (error) {
             /* upcall_destroy() can only be called on successfully received
@@ -888,7 +888,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
     port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
         ? ODPP_NONE
         : odp_in_port;
-    pid = dpif_port_get_pid(udpif->dpif, port);
+    pid = dpif_port_get_pid(udpif->dpif, port, 0);
     odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
 }
 
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 89d92af..e5e88e8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1484,7 +1484,7 @@ compose_sample_action(const struct xbridge *xbridge,
     actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
 
     odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port);
-    pid = dpif_port_get_pid(xbridge->dpif, odp_port);
+    pid = dpif_port_get_pid(xbridge->dpif, odp_port, 0);
     cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
 
     nl_msg_end_nested(odp_actions, actions_offset);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index c597114..4e109a4 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -472,7 +472,7 @@ type_run(const char *type)
 
         backer->recv_set_enable = true;
 
-        error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
+        error = dpif_recv_set(backer->dpif, backer->recv_set_enable, 1);
         if (error) {
             VLOG_ERR("Failed to enable receiving packets in dpif.");
             return error;
@@ -482,6 +482,7 @@ type_run(const char *type)
     }
 
     if (backer->recv_set_enable) {
+        dpif_handlers_set(backer->dpif, 1);
         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
     }
 
@@ -887,7 +888,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
 
     shash_add(&all_dpif_backers, type, backer);
 
-    error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
+    error = dpif_recv_set(backer->dpif, backer->recv_set_enable, 1);
     if (error) {
         VLOG_ERR("failed to listen on datapath of type %s: %s",
                  type, ovs_strerror(error));
@@ -898,6 +899,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     backer->max_mpls_depth = check_max_mpls_depth(backer);
 
     if (backer->recv_set_enable) {
+        dpif_handlers_set(backer->dpif, 1);
         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
     }
 
@@ -932,7 +934,7 @@ check_variable_length_userdata(struct dpif_backer *backer)
     ofpbuf_init(&actions, 64);
     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
-                   dpif_port_get_pid(backer->dpif, ODPP_NONE));
+                   dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
     nl_msg_end_nested(&actions, start);
 
-- 
1.7.9.5




More information about the dev mailing list