[ovs-dev] [PATCH 1/9] netdev: Extend rx_recv to pass multiple packets.

Pravin pshelar at nicira.com
Tue Mar 18 20:53:06 UTC 2014


DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
 lib/automake.mk       |    1 +
 lib/dpif-netdev.c     |   51 ++++++++++++++++++-------------------------------
 lib/dpif-netdev.h     |   40 ++++++++++++++++++++++++++++++++++++++
 lib/netdev-dummy.c    |   26 +++++++++----------------
 lib/netdev-linux.c    |   20 ++++++++++++++++---
 lib/netdev-provider.h |   24 +++++------------------
 lib/netdev.c          |   24 +++++------------------
 lib/netdev.h          |    2 +-
 lib/packets.c         |    9 +++++++++
 lib/packets.h         |    1 +
 10 files changed, 107 insertions(+), 91 deletions(-)
 create mode 100644 lib/dpif-netdev.h

diff --git a/lib/automake.mk b/lib/automake.mk
index e22165c..832b7f9 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -52,6 +52,7 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/dhparams.h \
 	lib/dirs.h \
 	lib/dpif-netdev.c \
+	lib/dpif-netdev.h \
 	lib/dpif-provider.h \
 	lib/dpif.c \
 	lib/dpif.h \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3bbfd2a..d78fb25 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -34,6 +34,7 @@
 #include "classifier.h"
 #include "csum.h"
 #include "dpif.h"
+#include "dpif-netdev.h"
 #include "dpif-provider.h"
 #include "dummy.h"
 #include "dynamic-string.h"
@@ -68,10 +69,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
 
-/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
- * headers to be aligned on a 4-byte boundary.  */
-enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
-
 /* Queues. */
 enum { N_QUEUES = 2 };          /* Number of queues for dpif_recv(). */
 enum { MAX_QUEUE_LEN = 128 };   /* Maximum number of packets per queue. */
@@ -1443,6 +1440,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct pkt_metadata *md = &execute->md;
+    struct ofpbuf *packet;
     struct flow key;
 
     if (execute->packet->size < ETH_HEADER_LEN ||
@@ -1453,8 +1451,9 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
     /* Extract flow key. */
     flow_extract(execute->packet, md, &key);
 
+    packet = ofpbuf_clone(execute->packet);
     ovs_rwlock_rdlock(&dp->port_rwlock);
-    dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions,
+    dp_netdev_execute_actions(dp, &key, packet, md, execute->actions,
                               execute->actions_len);
     ovs_rwlock_unlock(&dp->port_rwlock);
 
@@ -1586,12 +1585,10 @@ dp_forwarder_main(void *f_)
 {
     struct dp_forwarder *f = f_;
     struct dp_netdev *dp = f->dp;
-    struct ofpbuf packet;
 
     f->name = xasprintf("forwarder_%u", ovsthread_id_self());
     set_subprogram_name("%s", f->name);
 
-    ofpbuf_init(&packet, 0);
     while (!latch_is_set(&dp->exit_latch)) {
         bool received_anything;
         int i;
@@ -1605,25 +1602,19 @@ dp_forwarder_main(void *f_)
                 if (port->rx
                     && port->node.hash >= f->min_hash
                     && port->node.hash <= f->max_hash) {
-                    int buf_size;
+                    struct ofpbuf *packets[MAX_RX_BATCH];
+                    int count;
                     int error;
-                    int mtu;
-
-                    if (netdev_get_mtu(port->netdev, &mtu)) {
-                        mtu = ETH_PAYLOAD_MAX;
-                    }
-                    buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu;
-
-                    ofpbuf_clear(&packet);
-                    ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM,
-                                                 buf_size);
 
-                    error = netdev_rx_recv(port->rx, &packet);
+                    error = netdev_rx_recv(port->rx, packets, &count);
                     if (!error) {
+                        int i;
                         struct pkt_metadata md
                             = PKT_METADATA_INITIALIZER(port->port_no);
 
-                        dp_netdev_port_input(dp, &packet, &md);
+                        for (i = 0; i < count; i++) {
+                            dp_netdev_port_input(dp, packets[i], &md);
+                        }
                         received_anything = true;
                     } else if (error != EAGAIN && error != EOPNOTSUPP) {
                         static struct vlog_rate_limit rl
@@ -1659,7 +1650,6 @@ dp_forwarder_main(void *f_)
 
         poll_block();
     }
-    ofpbuf_uninit(&packet);
 
     free(f->name);
 
@@ -1741,6 +1731,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
     } else {
         ovsthread_counter_inc(dp->n_missed, 1);
         dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
+        ofpbuf_delete(packet);
     }
 }
 
@@ -1767,6 +1758,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
         if (userdata) {
             buf_size += NLA_ALIGN(userdata->nla_len);
         }
+        buf_size += packet->size;
         ofpbuf_init(buf, buf_size);
 
         /* Put ODP flow. */
@@ -1780,10 +1772,8 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
                                           NLA_ALIGN(userdata->nla_len));
         }
 
-        /* Steal packet data. */
-        ovs_assert(packet->source == OFPBUF_MALLOC);
-        upcall->packet = *packet;
-        ofpbuf_use(packet, NULL, 0);
+        upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size);
+        upcall->packet.size = packet->size;
 
         seq_change(dp->queue_seq);
 
@@ -1825,15 +1815,8 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
 
         userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
 
-        /* Make a copy if we are not allowed to steal the packet's data. */
-        if (!may_steal) {
-            packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
-        }
         dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
                                    userdata);
-        if (!may_steal) {
-            ofpbuf_uninit(packet);
-        }
         break;
     }
     case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1846,6 +1829,10 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     case __OVS_ACTION_ATTR_MAX:
         OVS_NOT_REACHED();
     }
+
+    if (may_steal) {
+        ofpbuf_delete(packet);
+    }
 }
 
 static void
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
new file mode 100644
index 0000000..64c93ee
--- /dev/null
+++ b/lib/dpif-netdev.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_H
+#define DPIF_NETDEV_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "openvswitch/types.h"
+#include "packets.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
+ * headers to be aligned on a 4-byte boundary.  */
+enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
+
+enum { MAX_RX_BATCH = 256 };     /* Maximum number of flows in flow table. */
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* netdev.h */
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index f23fc9f..27487f9 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -755,12 +755,11 @@ netdev_dummy_rx_dealloc(struct netdev_rx *rx_)
 }
 
 static int
-netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf **arr, int *c)
 {
     struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
     struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
     struct ofpbuf *packet;
-    int retval;
 
     ovs_mutex_lock(&netdev->mutex);
     if (!list_is_empty(&rx->recv_queue)) {
@@ -774,22 +773,15 @@ netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
     if (!packet) {
         return EAGAIN;
     }
+    ovs_mutex_lock(&netdev->mutex);
+    netdev->stats.rx_packets++;
+    netdev->stats.rx_bytes += packet->size;
+    ovs_mutex_unlock(&netdev->mutex);
 
-    if (packet->size <= ofpbuf_tailroom(buffer)) {
-        memcpy(buffer->data, packet->data, packet->size);
-        buffer->size += packet->size;
-        retval = 0;
-
-        ovs_mutex_lock(&netdev->mutex);
-        netdev->stats.rx_packets++;
-        netdev->stats.rx_bytes += packet->size;
-        ovs_mutex_unlock(&netdev->mutex);
-    } else {
-        retval = EMSGSIZE;
-    }
-    ofpbuf_delete(packet);
-
-    return retval;
+    packet_set_size(packet, packet->size);
+    arr[0] = packet;
+    *c = 1;
+    return 0;
 }
 
 static void
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 75ce7c6..574a572 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -50,6 +50,7 @@
 #include "connectivity.h"
 #include "coverage.h"
 #include "dpif-linux.h"
+#include "dpif-netdev.h"
 #include "dynamic-string.h"
 #include "fatal-signal.h"
 #include "hash.h"
@@ -461,6 +462,7 @@ static int af_packet_sock(void);
 static bool netdev_linux_miimon_enabled(void);
 static void netdev_linux_miimon_run(void);
 static void netdev_linux_miimon_wait(void);
+static int netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup);
 
 static bool
 is_netdev_linux_class(const struct netdev_class *netdev_class)
@@ -984,10 +986,19 @@ netdev_linux_rx_recv_tap(int fd, struct ofpbuf *buffer)
 }
 
 static int
-netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf **packet, int *c)
 {
     struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
-    int retval;
+    struct netdev *netdev = rx->up.netdev;
+    struct ofpbuf *buffer;
+    ssize_t retval;
+    int mtu;
+
+    if (netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu)) {
+        mtu = ETH_PAYLOAD_MAX;
+    }
+
+    buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu, DP_NETDEV_HEADROOM);
 
     retval = (rx->is_tap
               ? netdev_linux_rx_recv_tap(rx->fd, buffer)
@@ -995,8 +1006,11 @@ netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
     if (retval && retval != EAGAIN && retval != EMSGSIZE) {
         VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
                      ovs_strerror(errno), netdev_rx_get_name(rx_));
+    } else {
+        packet_set_size(buffer, buffer->size);
+        packet[0] = buffer;
+        *c = 1;
     }
-
     return retval;
 }
 
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 673d3ab..9bacaa0 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -634,28 +634,14 @@ struct netdev_class {
     void (*rx_destruct)(struct netdev_rx *);
     void (*rx_dealloc)(struct netdev_rx *);
 
-    /* Attempts to receive a packet from 'rx' into the tailroom of 'buffer',
-     * which should initially be empty.  If successful, returns 0 and
-     * increments 'buffer->size' by the number of bytes in the received packet,
-     * otherwise a positive errno value.  Returns EAGAIN immediately if no
-     * packet is ready to be received.
-     *
-     * Must return EMSGSIZE, and discard the packet, if the received packet
-     * is longer than 'ofpbuf_tailroom(buffer)'.
-     *
-     * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
-     * add a VLAN header which is obtained out-of-band to the packet. If
-     * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
-     * available for the packet, otherwise it may be used for the packet
-     * itself.
-     *
-     * It is advised that the tailroom of 'buffer' should be
-     * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
-     * out-of-band VLAN header to be added to the packet.
+    /* Attempts to receive batch of packets from 'rx' and place array of pointers
+     * into '*pkt'. netdev is responsible for allocating buffers.
+     * '*cnt' points to packet count for given batch. Once packets are returned
+     * to caller, netdev should give up ownership of ofbpuf data.
      *
      * This function may be set to null if it would always return EOPNOTSUPP
      * anyhow. */
-    int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf *buffer);
+    int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf **pkt, int *cnt);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when a packet is ready to be received with netdev_rx_recv()
diff --git a/lib/netdev.c b/lib/netdev.c
index e9c8d8f..a058742 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -551,10 +551,7 @@ netdev_rx_close(struct netdev_rx *rx)
     }
 }
 
-/* Attempts to receive a packet from 'rx' into the tailroom of 'buffer', which
- * must initially be empty.  If successful, returns 0 and increments
- * 'buffer->size' by the number of bytes in the received packet, otherwise a
- * positive errno value.
+/* Attempts to receive batch packet from 'rx'.
  *
  * Returns EAGAIN immediately if no packet is ready to be received.
  *
@@ -562,10 +559,7 @@ netdev_rx_close(struct netdev_rx *rx)
  * than 'ofpbuf_tailroom(buffer)'.
  *
  * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
- * add a VLAN header which is obtained out-of-band to the packet. If
- * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
- * available for the packet, otherwise it may be used for the packet
- * itself.
+ * add a VLAN header which is obtained out-of-band to the packet.
  *
  * It is advised that the tailroom of 'buffer' should be
  * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
@@ -575,23 +569,15 @@ netdev_rx_close(struct netdev_rx *rx)
  * This function may be set to null if it would always return EOPNOTSUPP
  * anyhow. */
 int
-netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
+netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt)
 {
     int retval;
 
-    ovs_assert(buffer->size == 0);
-    ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
-
-    retval = rx->netdev->netdev_class->rx_recv(rx, buffer);
+    retval = rx->netdev->netdev_class->rx_recv(rx, buffers, cnt);
     if (!retval) {
         COVERAGE_INC(netdev_received);
-        if (buffer->size < ETH_TOTAL_MIN) {
-            ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size);
-        }
-        return 0;
-    } else {
-        return retval;
     }
+    return retval;
 }
 
 /* Arranges for poll_block() to wake up when a packet is ready to be received
diff --git a/lib/netdev.h b/lib/netdev.h
index 410c35b..bfd8f91 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -161,7 +161,7 @@ void netdev_rx_close(struct netdev_rx *);
 
 const char *netdev_rx_get_name(const struct netdev_rx *);
 
-int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *);
+int netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt);
 void netdev_rx_wait(struct netdev_rx *);
 int netdev_rx_drain(struct netdev_rx *);
 
diff --git a/lib/packets.c b/lib/packets.c
index 3f7d6eb..6f2fca6 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1012,3 +1012,12 @@ void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
     pkt_metadata_init(md, &flow->tunnel, flow->skb_priority,
                            flow->pkt_mark, &flow->in_port);
 }
+
+void
+packet_set_size(struct ofpbuf *b, int size)
+{
+    b->size = size;
+    if (b->size < ETH_TOTAL_MIN) {
+        ofpbuf_put_zeros(b, ETH_TOTAL_MIN - b->size);
+    }
+}
diff --git a/lib/packets.h b/lib/packets.h
index e6b3303..f41a934 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -671,5 +671,6 @@ void packet_set_sctp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);
 uint16_t packet_get_tcp_flags(const struct ofpbuf *, const struct flow *);
 void packet_format_tcp_flags(struct ds *, uint16_t);
 const char *packet_tcp_flag_to_string(uint32_t flag);
+void packet_set_size(struct ofpbuf *b, int size);
 
 #endif /* packets.h */
-- 
1.7.9.5




More information about the dev mailing list