[ovs-dev] [PATCH 1/9] netdev: Extend rx_recv to pass multiple packets.
Pravin
pshelar at nicira.com
Tue Mar 18 20:53:06 UTC 2014
DPDK can receive multiple packets but current netdev API does
not allow that. Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port. This will be
used by dpdk-netdev.
Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
lib/automake.mk | 1 +
lib/dpif-netdev.c | 51 ++++++++++++++++++-------------------------------
lib/dpif-netdev.h | 40 ++++++++++++++++++++++++++++++++++++++
lib/netdev-dummy.c | 26 +++++++++----------------
lib/netdev-linux.c | 20 ++++++++++++++++---
lib/netdev-provider.h | 24 +++++------------------
lib/netdev.c | 24 +++++------------------
lib/netdev.h | 2 +-
lib/packets.c | 9 +++++++++
lib/packets.h | 1 +
10 files changed, 107 insertions(+), 91 deletions(-)
create mode 100644 lib/dpif-netdev.h
diff --git a/lib/automake.mk b/lib/automake.mk
index e22165c..832b7f9 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -52,6 +52,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/dhparams.h \
lib/dirs.h \
lib/dpif-netdev.c \
+ lib/dpif-netdev.h \
lib/dpif-provider.h \
lib/dpif.c \
lib/dpif.h \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3bbfd2a..d78fb25 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -34,6 +34,7 @@
#include "classifier.h"
#include "csum.h"
#include "dpif.h"
+#include "dpif-netdev.h"
#include "dpif-provider.h"
#include "dummy.h"
#include "dynamic-string.h"
@@ -68,10 +69,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
/* Configuration parameters. */
enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
-/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
- * headers to be aligned on a 4-byte boundary. */
-enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
-
/* Queues. */
enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
@@ -1443,6 +1440,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct pkt_metadata *md = &execute->md;
+ struct ofpbuf *packet;
struct flow key;
if (execute->packet->size < ETH_HEADER_LEN ||
@@ -1453,8 +1451,9 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
/* Extract flow key. */
flow_extract(execute->packet, md, &key);
+ packet = ofpbuf_clone(execute->packet);
ovs_rwlock_rdlock(&dp->port_rwlock);
- dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions,
+ dp_netdev_execute_actions(dp, &key, packet, md, execute->actions,
execute->actions_len);
ovs_rwlock_unlock(&dp->port_rwlock);
@@ -1586,12 +1585,10 @@ dp_forwarder_main(void *f_)
{
struct dp_forwarder *f = f_;
struct dp_netdev *dp = f->dp;
- struct ofpbuf packet;
f->name = xasprintf("forwarder_%u", ovsthread_id_self());
set_subprogram_name("%s", f->name);
- ofpbuf_init(&packet, 0);
while (!latch_is_set(&dp->exit_latch)) {
bool received_anything;
int i;
@@ -1605,25 +1602,19 @@ dp_forwarder_main(void *f_)
if (port->rx
&& port->node.hash >= f->min_hash
&& port->node.hash <= f->max_hash) {
- int buf_size;
+ struct ofpbuf *packets[MAX_RX_BATCH];
+ int count;
int error;
- int mtu;
-
- if (netdev_get_mtu(port->netdev, &mtu)) {
- mtu = ETH_PAYLOAD_MAX;
- }
- buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu;
-
- ofpbuf_clear(&packet);
- ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM,
- buf_size);
- error = netdev_rx_recv(port->rx, &packet);
+ error = netdev_rx_recv(port->rx, packets, &count);
if (!error) {
+ int i;
struct pkt_metadata md
= PKT_METADATA_INITIALIZER(port->port_no);
- dp_netdev_port_input(dp, &packet, &md);
+ for (i = 0; i < count; i++) {
+ dp_netdev_port_input(dp, packets[i], &md);
+ }
received_anything = true;
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl
@@ -1659,7 +1650,6 @@ dp_forwarder_main(void *f_)
poll_block();
}
- ofpbuf_uninit(&packet);
free(f->name);
@@ -1741,6 +1731,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
} else {
ovsthread_counter_inc(dp->n_missed, 1);
dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
+ ofpbuf_delete(packet);
}
}
@@ -1767,6 +1758,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
if (userdata) {
buf_size += NLA_ALIGN(userdata->nla_len);
}
+ buf_size += packet->size;
ofpbuf_init(buf, buf_size);
/* Put ODP flow. */
@@ -1780,10 +1772,8 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
NLA_ALIGN(userdata->nla_len));
}
- /* Steal packet data. */
- ovs_assert(packet->source == OFPBUF_MALLOC);
- upcall->packet = *packet;
- ofpbuf_use(packet, NULL, 0);
+ upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size);
+ upcall->packet.size = packet->size;
seq_change(dp->queue_seq);
@@ -1825,15 +1815,8 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
- /* Make a copy if we are not allowed to steal the packet's data. */
- if (!may_steal) {
- packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
- }
dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
userdata);
- if (!may_steal) {
- ofpbuf_uninit(packet);
- }
break;
}
case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1846,6 +1829,10 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
+
+ if (may_steal) {
+ ofpbuf_delete(packet);
+ }
}
static void
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
new file mode 100644
index 0000000..64c93ee
--- /dev/null
+++ b/lib/dpif-netdev.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_H
+#define DPIF_NETDEV_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "openvswitch/types.h"
+#include "packets.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
+ * headers to be aligned on a 4-byte boundary. */
+enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
+
+enum { MAX_RX_BATCH = 256 }; /* Maximum number of flows in flow table. */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* netdev.h */
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index f23fc9f..27487f9 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -755,12 +755,11 @@ netdev_dummy_rx_dealloc(struct netdev_rx *rx_)
}
static int
-netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf **arr, int *c)
{
struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
struct ofpbuf *packet;
- int retval;
ovs_mutex_lock(&netdev->mutex);
if (!list_is_empty(&rx->recv_queue)) {
@@ -774,22 +773,15 @@ netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
if (!packet) {
return EAGAIN;
}
+ ovs_mutex_lock(&netdev->mutex);
+ netdev->stats.rx_packets++;
+ netdev->stats.rx_bytes += packet->size;
+ ovs_mutex_unlock(&netdev->mutex);
- if (packet->size <= ofpbuf_tailroom(buffer)) {
- memcpy(buffer->data, packet->data, packet->size);
- buffer->size += packet->size;
- retval = 0;
-
- ovs_mutex_lock(&netdev->mutex);
- netdev->stats.rx_packets++;
- netdev->stats.rx_bytes += packet->size;
- ovs_mutex_unlock(&netdev->mutex);
- } else {
- retval = EMSGSIZE;
- }
- ofpbuf_delete(packet);
-
- return retval;
+ packet_set_size(packet, packet->size);
+ arr[0] = packet;
+ *c = 1;
+ return 0;
}
static void
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 75ce7c6..574a572 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -50,6 +50,7 @@
#include "connectivity.h"
#include "coverage.h"
#include "dpif-linux.h"
+#include "dpif-netdev.h"
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "hash.h"
@@ -461,6 +462,7 @@ static int af_packet_sock(void);
static bool netdev_linux_miimon_enabled(void);
static void netdev_linux_miimon_run(void);
static void netdev_linux_miimon_wait(void);
+static int netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup);
static bool
is_netdev_linux_class(const struct netdev_class *netdev_class)
@@ -984,10 +986,19 @@ netdev_linux_rx_recv_tap(int fd, struct ofpbuf *buffer)
}
static int
-netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf **packet, int *c)
{
struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
- int retval;
+ struct netdev *netdev = rx->up.netdev;
+ struct ofpbuf *buffer;
+ ssize_t retval;
+ int mtu;
+
+ if (netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu)) {
+ mtu = ETH_PAYLOAD_MAX;
+ }
+
+ buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu, DP_NETDEV_HEADROOM);
retval = (rx->is_tap
? netdev_linux_rx_recv_tap(rx->fd, buffer)
@@ -995,8 +1006,11 @@ netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
if (retval && retval != EAGAIN && retval != EMSGSIZE) {
VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
ovs_strerror(errno), netdev_rx_get_name(rx_));
+ } else {
+ packet_set_size(buffer, buffer->size);
+ packet[0] = buffer;
+ *c = 1;
}
-
return retval;
}
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 673d3ab..9bacaa0 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -634,28 +634,14 @@ struct netdev_class {
void (*rx_destruct)(struct netdev_rx *);
void (*rx_dealloc)(struct netdev_rx *);
- /* Attempts to receive a packet from 'rx' into the tailroom of 'buffer',
- * which should initially be empty. If successful, returns 0 and
- * increments 'buffer->size' by the number of bytes in the received packet,
- * otherwise a positive errno value. Returns EAGAIN immediately if no
- * packet is ready to be received.
- *
- * Must return EMSGSIZE, and discard the packet, if the received packet
- * is longer than 'ofpbuf_tailroom(buffer)'.
- *
- * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
- * add a VLAN header which is obtained out-of-band to the packet. If
- * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
- * available for the packet, otherwise it may be used for the packet
- * itself.
- *
- * It is advised that the tailroom of 'buffer' should be
- * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
- * out-of-band VLAN header to be added to the packet.
+ /* Attempts to receive batch of packets from 'rx' and place array of pointers
+ * into '*pkt'. netdev is responsible for allocating buffers.
+ * '*cnt' points to packet count for given batch. Once packets are returned
+ * to caller, netdev should give up ownership of ofbpuf data.
*
* This function may be set to null if it would always return EOPNOTSUPP
* anyhow. */
- int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf *buffer);
+ int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf **pkt, int *cnt);
/* Registers with the poll loop to wake up from the next call to
* poll_block() when a packet is ready to be received with netdev_rx_recv()
diff --git a/lib/netdev.c b/lib/netdev.c
index e9c8d8f..a058742 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -551,10 +551,7 @@ netdev_rx_close(struct netdev_rx *rx)
}
}
-/* Attempts to receive a packet from 'rx' into the tailroom of 'buffer', which
- * must initially be empty. If successful, returns 0 and increments
- * 'buffer->size' by the number of bytes in the received packet, otherwise a
- * positive errno value.
+/* Attempts to receive batch packet from 'rx'.
*
* Returns EAGAIN immediately if no packet is ready to be received.
*
@@ -562,10 +559,7 @@ netdev_rx_close(struct netdev_rx *rx)
* than 'ofpbuf_tailroom(buffer)'.
*
* Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
- * add a VLAN header which is obtained out-of-band to the packet. If
- * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
- * available for the packet, otherwise it may be used for the packet
- * itself.
+ * add a VLAN header which is obtained out-of-band to the packet.
*
* It is advised that the tailroom of 'buffer' should be
* VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
@@ -575,23 +569,15 @@ netdev_rx_close(struct netdev_rx *rx)
* This function may be set to null if it would always return EOPNOTSUPP
* anyhow. */
int
-netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
+netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt)
{
int retval;
- ovs_assert(buffer->size == 0);
- ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
-
- retval = rx->netdev->netdev_class->rx_recv(rx, buffer);
+ retval = rx->netdev->netdev_class->rx_recv(rx, buffers, cnt);
if (!retval) {
COVERAGE_INC(netdev_received);
- if (buffer->size < ETH_TOTAL_MIN) {
- ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size);
- }
- return 0;
- } else {
- return retval;
}
+ return retval;
}
/* Arranges for poll_block() to wake up when a packet is ready to be received
diff --git a/lib/netdev.h b/lib/netdev.h
index 410c35b..bfd8f91 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -161,7 +161,7 @@ void netdev_rx_close(struct netdev_rx *);
const char *netdev_rx_get_name(const struct netdev_rx *);
-int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *);
+int netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt);
void netdev_rx_wait(struct netdev_rx *);
int netdev_rx_drain(struct netdev_rx *);
diff --git a/lib/packets.c b/lib/packets.c
index 3f7d6eb..6f2fca6 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1012,3 +1012,12 @@ void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
pkt_metadata_init(md, &flow->tunnel, flow->skb_priority,
flow->pkt_mark, &flow->in_port);
}
+
+void
+packet_set_size(struct ofpbuf *b, int size)
+{
+ b->size = size;
+ if (b->size < ETH_TOTAL_MIN) {
+ ofpbuf_put_zeros(b, ETH_TOTAL_MIN - b->size);
+ }
+}
diff --git a/lib/packets.h b/lib/packets.h
index e6b3303..f41a934 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -671,5 +671,6 @@ void packet_set_sctp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);
uint16_t packet_get_tcp_flags(const struct ofpbuf *, const struct flow *);
void packet_format_tcp_flags(struct ds *, uint16_t);
const char *packet_tcp_flag_to_string(uint32_t flag);
+void packet_set_size(struct ofpbuf *b, int size);
#endif /* packets.h */
--
1.7.9.5
More information about the dev
mailing list