[ovs-dev] [PATCH v6] dpif-netdev: Remove an extra memcpy of packet data from datapath-upcall interface for bridges with datapath_type=netdev.

Ryan Wilson wryan at nicira.com
Wed Jun 4 18:00:30 UTC 2014


When a bridge of datatype type netdev receives a packet, it copies the
packet from the NIC to a buffer in userspace. Currently, when making
an upcall, the packet is again copied to the upcall's buffer. However,
this extra copy is not necessary when the datapath exists in userspace
as the upcall can directly access the packet data.

This patch eliminates this extra copy of the packet data in most cases.
In cases where the packet may still be used later by callers of
dp_netdev_execute_actions, making a copy of the packet data is still
necessary.

This patch also adds a dpdk_buf field to 'struct ofpbuf' when using
DPDK. This field holds a pointer to the allocated DPDK buffer in the
rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can
now share data and free memory of a rte_mempool allocated ofpbuf.

Signed-off-by: Ryan Wilson <wryan at nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
v2: Addressed Jarno's comment to use direct pointer assignment for
upcall->packet instead of ofpbuf_set().
v3: Addressed issues with DPDK mentioned by Pravin.
v4: Fixed various bugs found in perf test
v5: Addressed Jarno's comments with regards to DPDK buf attribute.
v6: Fix commit message
---
 lib/dpif-netdev.c |   15 +++++----------
 lib/netdev-dpdk.c |    2 +-
 lib/ofpbuf.c      |    9 ++++++++-
 lib/ofpbuf.h      |    3 +++
 4 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 91c83d6..c89ae20 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
                                    miniflow_hash_5tuple(&key.flow, 0)
                                    % dp->n_handlers,
                                    DPIF_UC_MISS, &key.flow, NULL);
-        ofpbuf_delete(packet);
     }
 }
 
@@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
         if (userdata) {
             buf_size += NLA_ALIGN(userdata->nla_len);
         }
-        buf_size += ofpbuf_size(packet);
         ofpbuf_init(buf, buf_size);
 
         /* Put ODP flow. */
@@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
                                           NLA_ALIGN(userdata->nla_len));
         }
 
-        ofpbuf_set_data(&upcall->packet,
-                        ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet)));
-        ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
+        upcall->packet = *packet;
 
         seq_change(q->seq);
 
         error = 0;
     } else {
         dp_netdev_count_packet(dp, DP_STAT_LOST);
+        ofpbuf_delete(packet);
         error = ENOBUFS;
     }
     ovs_mutex_unlock(&q->mutex);
@@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
         break;
 
     case OVS_ACTION_ATTR_USERSPACE: {
+        struct ofpbuf *userspace_packet;
         const struct nlattr *userdata;
 
         userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
+        userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
 
-        dp_netdev_output_userspace(aux->dp, packet,
+        dp_netdev_output_userspace(aux->dp, userspace_packet,
                                    miniflow_hash_5tuple(aux->key, 0)
                                        % aux->dp->n_handlers,
                                    DPIF_UC_ACTION, aux->key,
                                    userdata);
-
-        if (may_steal) {
-            ofpbuf_delete(packet);
-        }
         break;
     }
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ba41d2e..9370e5f 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz)
 void
 free_dpdk_buf(struct ofpbuf *b)
 {
-    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
+    struct rte_mbuf *pkt = (struct rte_mbuf *) b->dpdk_buf;
 
     rte_mempool_put(pkt->pool, pkt);
 }
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 1f4b61d..fb59ea5 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -117,6 +117,9 @@ void
 ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated)
 {
     ofpbuf_init__(b, allocated, OFPBUF_DPDK);
+#ifdef DPDK_NETDEV
+    b->dpdk_buf = b;
+#endif
 }
 
 /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size'
@@ -134,8 +137,12 @@ ofpbuf_uninit(struct ofpbuf *b)
     if (b) {
         if (b->source == OFPBUF_MALLOC) {
             free(ofpbuf_base(b));
+        } else if (b->source == OFPBUF_DPDK) {
+#ifdef DPDK_NETDEV
+            ovs_assert(b != b->dpdk_buf);
+#endif
+            free_dpdk_buf(b);
         }
-        ovs_assert(b->source != OFPBUF_DPDK);
     }
 }
 
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 85be899..13a3e9d 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source {
 struct ofpbuf {
 #ifdef DPDK_NETDEV
     struct rte_mbuf mbuf;       /* DPDK mbuf */
+    void *dpdk_buf;             /* First byte of allocated DPDK buffer. */
 #else
     void *base_;                 /* First byte of allocated space. */
     void *data_;                 /* First byte actually in use. */
@@ -354,6 +355,8 @@ static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *b)
 }
 
 #ifdef DPDK_NETDEV
+BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0);
+
 static inline void * ofpbuf_data(const struct ofpbuf *b)
 {
     return b->mbuf.pkt.data;
-- 
1.7.9.5




More information about the dev mailing list