[ovs-dev] [PATCH 1/5] dpif: Change dpif API to allow multiple handler threads read upcall.
Alex Wang
alexw at nicira.com
Thu Feb 27 19:44:22 UTC 2014
This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.
Signed-off-by: Alex Wang <alexw at nicira.com>
---
lib/dpif-linux.c | 24 ++++++++++++++------
lib/dpif-netdev.c | 17 ++++++++++----
lib/dpif-provider.h | 36 ++++++++++++++++++++---------
lib/dpif.c | 50 ++++++++++++++++++++++++-----------------
lib/dpif.h | 28 ++++++++++++++++++-----
ofproto/ofproto-dpif-upcall.c | 6 ++---
ofproto/ofproto-dpif-xlate.c | 2 +-
ofproto/ofproto-dpif.c | 8 ++++---
8 files changed, 117 insertions(+), 54 deletions(-)
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 18de118..1c9869e 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -171,7 +171,7 @@ static unsigned int ovs_vport_mcgroup;
static int dpif_linux_init(void);
static int open_dpif(const struct dpif_linux_dp *, struct dpif **);
static uint32_t dpif_linux_port_get_pid(const struct dpif *,
- odp_port_t port_no);
+ odp_port_t port_no, uint32_t hash);
static int dpif_linux_refresh_channels(struct dpif *);
static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
@@ -304,7 +304,7 @@ destroy_channels(struct dpif_linux *dpif)
dpif->n_events = dpif->event_offset = 0;
/* Don't close dpif->epoll_fd since that would cause other threads that
- * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */
+ * call dpif_recv_wait() to wait on an arbitrary fd or a closed fd. */
}
static int
@@ -675,7 +675,8 @@ dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
}
static uint32_t
-dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
+dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
+ uint32_t hash OVS_UNUSED)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
uint32_t port_idx = odp_to_u32(port_no);
@@ -1399,7 +1400,8 @@ dpif_linux_recv_set__(struct dpif *dpif_, bool enable)
}
static int
-dpif_linux_recv_set(struct dpif *dpif_, bool enable)
+dpif_linux_recv_set(struct dpif *dpif_, bool enable,
+ uint32_t n_handlers OVS_UNUSED)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
int error;
@@ -1412,6 +1414,13 @@ dpif_linux_recv_set(struct dpif *dpif_, bool enable)
}
static int
+dpif_linux_handlers_set(struct dpif *dpif OVS_UNUSED,
+ uint32_t n_handlers OVS_UNUSED)
+{
+ return 0;
+}
+
+static int
dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
uint32_t queue_id, uint32_t *priority)
{
@@ -1557,8 +1566,8 @@ dpif_linux_recv__(struct dpif *dpif_, struct dpif_upcall *upcall,
}
static int
-dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall,
- struct ofpbuf *buf)
+dpif_linux_recv(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED,
+ struct dpif_upcall *upcall, struct ofpbuf *buf)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
int error;
@@ -1571,7 +1580,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall,
}
static void
-dpif_linux_recv_wait(struct dpif *dpif_)
+dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
@@ -1631,6 +1640,7 @@ const struct dpif_class dpif_linux_class = {
dpif_linux_execute,
dpif_linux_operate,
dpif_linux_recv_set,
+ dpif_linux_handlers_set,
dpif_linux_queue_to_priority,
dpif_linux_recv,
dpif_linux_recv_wait,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c4ba646..ee8be43 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1431,7 +1431,15 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
}
static int
-dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
+dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED,
+ uint32_t n_handlers OVS_UNUSED)
+{
+ return 0;
+}
+
+static int
+dpif_netdev_handlers_set(struct dpif *dpif OVS_UNUSED,
+ uint32_t n_handlers OVS_UNUSED)
{
return 0;
}
@@ -1460,8 +1468,8 @@ find_nonempty_queue(struct dp_netdev *dp)
}
static int
-dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
- struct ofpbuf *buf)
+dpif_netdev_recv(struct dpif *dpif, uint32_t n_handlers OVS_UNUSED,
+ struct dpif_upcall *upcall, struct ofpbuf *buf)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_queue *q;
@@ -1487,7 +1495,7 @@ dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
}
static void
-dpif_netdev_recv_wait(struct dpif *dpif)
+dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id OVS_UNUSED)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
uint64_t seq;
@@ -1860,6 +1868,7 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_execute,
NULL, /* operate */
dpif_netdev_recv_set,
+ dpif_netdev_handlers_set,
dpif_netdev_queue_to_priority,
dpif_netdev_recv,
dpif_netdev_recv_wait,
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index adc5242..db7b49a 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -148,6 +148,9 @@ struct dpif_class {
* actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
* flows whose packets arrived on port 'port_no'.
*
+ * Since there can be multiple Netlink sockets for the port. A 'hash'
+ * (e.g. 5-tuple hash of flow) is provided for selecting a specific PID.
+ *
* A 'port_no' of UINT32_MAX should be treated as a special case. The
* implementation should return a reserved PID, not allocated to any port,
* that the client may use for special purposes.
@@ -158,7 +161,8 @@ struct dpif_class {
*
* A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
* for this function. This is equivalent to always returning 0. */
- uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no);
+ uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no,
+ uint32_t hash);
/* Attempts to begin dumping the ports in a dpif. On success, returns 0
* and initializes '*statep' with any data needed for iteration. On
@@ -323,17 +327,26 @@ struct dpif_class {
/* Enables or disables receiving packets with dpif_recv() for 'dpif'.
* Turning packet receive off and then back on is allowed to change Netlink
* PID assignments (see ->port_get_pid()). The client is responsible for
- * updating flows as necessary if it does this. */
- int (*recv_set)(struct dpif *dpif, bool enable);
+ * updating flows as necessary if it does this.
+ *
+ * Since multiple handlers can read upcall simultaneously, each port
+ * should have a Netlink sockets for each handler. So, the 'n_handlers'
+ * is used to specify that. */
+ int (*recv_set)(struct dpif *dpif, bool enable, uint32_t n_handlers);
+
+ /* If upcall reception is enabled, updates the number of handlers and
+ * Netlink sockets for each port in 'dpif'. */
+ int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers);
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
* priority value used for setting packet priority. */
int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id,
uint32_t *priority);
- /* Polls for an upcall from 'dpif'. If successful, stores the upcall into
- * '*upcall', using 'buf' for storage. Should only be called if 'recv_set'
- * has been used to enable receiving packets from 'dpif'.
+ /* Polls for an upcall from 'dpif' for handler with 'handler_id'. If
+ * successful, stores the upcall into '*upcall', using 'buf' for storage.
+ * Should only be called if 'recv_set' has been used to enable receiving
+ * packets from 'dpif'.
*
* The implementation should point 'upcall->key' and 'upcall->userdata'
* (if any) into data in the caller-provided 'buf'. The implementation may
@@ -349,12 +362,13 @@ struct dpif_class {
*
* This function must not block. If no upcall is pending when it is
* called, it should return EAGAIN without blocking. */
- int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall,
- struct ofpbuf *buf);
+ int (*recv)(struct dpif *dpif, uint32_t handler_id,
+ struct dpif_upcall *upcall, struct ofpbuf *buf);
- /* Arranges for the poll loop to wake up when 'dpif' has a message queued
- * to be received with the recv member function. */
- void (*recv_wait)(struct dpif *dpif);
+ /* Arranges for the poll loop for handler with 'handler_id' to wake up
+ * when 'dpif' has a message queued to be received with the recv member
+ * function of the handler. */
+ void (*recv_wait)(struct dpif *dpif, uint32_t handler_id);
/* Throws away any queued upcalls that 'dpif' currently has ready to
* return. */
diff --git a/lib/dpif.c b/lib/dpif.c
index 2b79a6e..4f10c8f 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -636,6 +636,9 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
* as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
* packets arrived on port 'port_no'.
*
+ * Since there can be multiple Netlink sockets for the port. A 'hash'
+ * (e.g. 5-tuple hash of flow) is provided for selecting a specific PID.
+ *
* A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
* allocated to any port, that the client may use for special purposes.
*
@@ -645,10 +648,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
* update all of the flows that it installed that contain
* OVS_ACTION_ATTR_USERSPACE actions. */
uint32_t
-dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
+dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
{
return (dpif->dpif_class->port_get_pid
- ? (dpif->dpif_class->port_get_pid)(dpif, port_no)
+ ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
: 0);
}
@@ -1248,24 +1251,29 @@ dpif_upcall_type_to_string(enum dpif_upcall_type type)
}
}
-/* Enables or disables receiving packets with dpif_recv() on 'dpif'. Returns 0
- * if successful, otherwise a positive errno value.
- *
- * Turning packet receive off and then back on may change the Netlink PID
- * assignments returned by dpif_port_get_pid(). If the client does this, it
- * must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions
- * using the new PID assignment. */
int
-dpif_recv_set(struct dpif *dpif, bool enable)
+dpif_recv_set(struct dpif *dpif, bool enable, uint32_t n_handlers)
{
- int error = dpif->dpif_class->recv_set(dpif, enable);
+ int error = dpif->dpif_class->recv_set(dpif, enable, n_handlers);
log_operation(dpif, "recv_set", error);
return error;
}
-/* Polls for an upcall from 'dpif'. If successful, stores the upcall into
- * '*upcall', using 'buf' for storage. Should only be called if
- * dpif_recv_set() has been used to enable receiving packets on 'dpif'.
+/* If upcall reception is enabled, updates the number of handlers and
+ * Netlink sockets for each port in 'dpif'. */
+int
+dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers)
+{
+ int error = dpif->dpif_class->handlers_set(dpif, n_handlers);
+ log_operation(dpif, "handlers_set", error);
+ return error;
+}
+
+
+/* Polls for an upcall from 'dpif' for handler with 'handler_id'. If
+ * successful, stores the upcall into '*upcall', using 'buf' for storage.
+ * Should only be called if dpif_recv_set() has been used to enable receiving
+ * packets on 'dpif'.
*
* 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
* 'buf', so their memory cannot be freed separately from 'buf'.
@@ -1280,9 +1288,10 @@ dpif_recv_set(struct dpif *dpif, bool enable)
* Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
* if no upcall is immediately available. */
int
-dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf)
+dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall,
+ struct ofpbuf *buf)
{
- int error = dpif->dpif_class->recv(dpif, upcall, buf);
+ int error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf);
if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) {
struct ds flow;
char *packet;
@@ -1316,12 +1325,13 @@ dpif_recv_purge(struct dpif *dpif)
}
}
-/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
- * received with dpif_recv(). */
+/* Arranges for the poll loop for handler with 'handler_id' to wake up when
+ * 'dpif' has a message queued to be received with dpif_recv() of the handler.
+ */
void
-dpif_recv_wait(struct dpif *dpif)
+dpif_recv_wait(struct dpif *dpif, uint32_t handler_id)
{
- dpif->dpif_class->recv_wait(dpif);
+ dpif->dpif_class->recv_wait(dpif, handler_id);
}
/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
diff --git a/lib/dpif.h b/lib/dpif.h
index 7f986f9..cf32277 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -61,7 +61,7 @@
* "internal" (for a simulated port used to connect to the TCP/IP stack),
* and "gre" (for a GRE tunnel).
*
- * - A Netlink PID (see "Upcall Queuing and Ordering" below).
+ * - A Netlink PID for each client (see "Upcall Queuing and Ordering" below).
*
* The dpif interface has functions for adding and deleting ports. When a
* datapath implements these (e.g. as the Linux and netdev datapaths do), then
@@ -276,6 +276,21 @@
* - Upcalls that specify the "special" Netlink PID are queued separately.
*
*
+ * Multiple Clients
+ * ----------------
+ * A datapath may have more than one client and multiple clients may want
+ * to read upcalls simultaneously. In such situation, the additional
+ * requirements on the datapath are:
+ *
+ * - The datapath must provide a set of per-port Netlink PIDs to each
+ * client. The distribution of "miss" upcalls must guarantee that the
+ * same flow should always go to the same socket's receive queue.
+ *
+ * - For "action" upcalls, the client can specify its own Netlink PID or
+ * other client's Netlink PID of the same port for offloading purpose
+ * (e.g. in a "round robin" manner).
+ *
+ *
* Packet Format
* =============
*
@@ -445,7 +460,8 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname,
struct dpif_port *);
int dpif_port_get_name(struct dpif *, odp_port_t port_no,
char *name, size_t name_size);
-uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
+uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
+ uint32_t hash);
struct dpif_port_dump {
const struct dpif *dpif;
@@ -613,10 +629,12 @@ struct dpif_upcall {
struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
};
-int dpif_recv_set(struct dpif *, bool enable);
-int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *);
+int dpif_recv_set(struct dpif *, bool enable, uint32_t n_handlers);
+int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
+int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
+ struct ofpbuf *);
void dpif_recv_purge(struct dpif *);
-void dpif_recv_wait(struct dpif *);
+void dpif_recv_wait(struct dpif *, uint32_t handler_id);
/* Miscellaneous. */
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index e4f81a1..62238c0 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -528,7 +528,7 @@ udpif_dispatcher(void *arg)
set_subprogram_name("dispatcher");
while (!latch_is_set(&udpif->exit_latch)) {
recv_upcalls(udpif);
- dpif_recv_wait(udpif->dpif);
+ dpif_recv_wait(udpif->dpif, 0);
latch_wait(&udpif->exit_latch);
poll_block();
}
@@ -800,7 +800,7 @@ recv_upcalls(struct udpif *udpif)
upcall = xmalloc(sizeof *upcall);
ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
sizeof upcall->upcall_stub);
- error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
+ error = dpif_recv(udpif->dpif, 0, &upcall->dpif_upcall,
&upcall->upcall_buf);
if (error) {
/* upcall_destroy() can only be called on successfully received
@@ -888,7 +888,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
? ODPP_NONE
: odp_in_port;
- pid = dpif_port_get_pid(udpif->dpif, port);
+ pid = dpif_port_get_pid(udpif->dpif, port, 0);
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
}
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 89d92af..e5e88e8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1484,7 +1484,7 @@ compose_sample_action(const struct xbridge *xbridge,
actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port);
- pid = dpif_port_get_pid(xbridge->dpif, odp_port);
+ pid = dpif_port_get_pid(xbridge->dpif, odp_port, 0);
cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
nl_msg_end_nested(odp_actions, actions_offset);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index c597114..4e109a4 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -472,7 +472,7 @@ type_run(const char *type)
backer->recv_set_enable = true;
- error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
+ error = dpif_recv_set(backer->dpif, backer->recv_set_enable, 1);
if (error) {
VLOG_ERR("Failed to enable receiving packets in dpif.");
return error;
@@ -482,6 +482,7 @@ type_run(const char *type)
}
if (backer->recv_set_enable) {
+ dpif_handlers_set(backer->dpif, 1);
udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
@@ -887,7 +888,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
shash_add(&all_dpif_backers, type, backer);
- error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
+ error = dpif_recv_set(backer->dpif, backer->recv_set_enable, 1);
if (error) {
VLOG_ERR("failed to listen on datapath of type %s: %s",
type, ovs_strerror(error));
@@ -898,6 +899,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
backer->max_mpls_depth = check_max_mpls_depth(backer);
if (backer->recv_set_enable) {
+ dpif_handlers_set(backer->dpif, 1);
udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
@@ -932,7 +934,7 @@ check_variable_length_userdata(struct dpif_backer *backer)
ofpbuf_init(&actions, 64);
start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
- dpif_port_get_pid(backer->dpif, ODPP_NONE));
+ dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
nl_msg_end_nested(&actions, start);
--
1.7.9.5
More information about the dev
mailing list