[ovs-dev] [PATCH v6] dpif-netdev: Remove an extra memcpy of packet data from datapath-upcall interface for bridges with datapath_type=netdev.

Pravin Shelar pshelar at nicira.com
Wed Jun 4 19:27:41 UTC 2014


Can you make subject shorter?

On Wed, Jun 4, 2014 at 11:00 AM, Ryan Wilson <wryan at nicira.com> wrote:
> When a bridge of datatype type netdev receives a packet, it copies the
> packet from the NIC to a buffer in userspace. Currently, when making
> an upcall, the packet is again copied to the upcall's buffer. However,
> this extra copy is not necessary when the datapath exists in userspace
> as the upcall can directly access the packet data.
>
> This patch eliminates this extra copy of the packet data in most cases.
> In cases where the packet may still be used later by callers of
> dp_netdev_execute_actions, making a copy of the packet data is still
> necessary.
>
> This patch also adds a dpdk_buf field to 'struct ofpbuf' when using
> DPDK. This field holds a pointer to the allocated DPDK buffer in the
> rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can
> now share data and free memory of a rte_mempool allocated ofpbuf.
>
> Signed-off-by: Ryan Wilson <wryan at nicira.com>
> Acked-by: Jarno Rajahalme <jrajahalme at nicira.com>

Otherwise looks good.
Acked-by: Pravin B Shelar <pshelar at nicira.com>

> ---
> v2: Addressed Jarno's comment to use direct pointer assignment for
> upcall->packet instead of ofpbuf_set().
> v3: Addressed issues with DPDK mentioned by Pravin.
> v4: Fixed various bugs found in perf test
> v5: Addressed Jarno's comments with regards to DPDK buf attribute.
> v6: Fix commit message
> ---
>  lib/dpif-netdev.c |   15 +++++----------
>  lib/netdev-dpdk.c |    2 +-
>  lib/ofpbuf.c      |    9 ++++++++-
>  lib/ofpbuf.h      |    3 +++
>  4 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 91c83d6..c89ae20 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
>                                     miniflow_hash_5tuple(&key.flow, 0)
>                                     % dp->n_handlers,
>                                     DPIF_UC_MISS, &key.flow, NULL);
> -        ofpbuf_delete(packet);
>      }
>  }
>
> @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
>          if (userdata) {
>              buf_size += NLA_ALIGN(userdata->nla_len);
>          }
> -        buf_size += ofpbuf_size(packet);
>          ofpbuf_init(buf, buf_size);
>
>          /* Put ODP flow. */
> @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
>                                            NLA_ALIGN(userdata->nla_len));
>          }
>
> -        ofpbuf_set_data(&upcall->packet,
> -                        ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet)));
> -        ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> +        upcall->packet = *packet;
>
>          seq_change(q->seq);
>
>          error = 0;
>      } else {
>          dp_netdev_count_packet(dp, DP_STAT_LOST);
> +        ofpbuf_delete(packet);
>          error = ENOBUFS;
>      }
>      ovs_mutex_unlock(&q->mutex);
> @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
>          break;
>
>      case OVS_ACTION_ATTR_USERSPACE: {
> +        struct ofpbuf *userspace_packet;
>          const struct nlattr *userdata;
>
>          userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
> +        userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
>
> -        dp_netdev_output_userspace(aux->dp, packet,
> +        dp_netdev_output_userspace(aux->dp, userspace_packet,
>                                     miniflow_hash_5tuple(aux->key, 0)
>                                         % aux->dp->n_handlers,
>                                     DPIF_UC_ACTION, aux->key,
>                                     userdata);
> -
> -        if (may_steal) {
> -            ofpbuf_delete(packet);
> -        }
>          break;
>      }
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index ba41d2e..9370e5f 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz)
>  void
>  free_dpdk_buf(struct ofpbuf *b)
>  {
> -    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> +    struct rte_mbuf *pkt = (struct rte_mbuf *) b->dpdk_buf;
>
>      rte_mempool_put(pkt->pool, pkt);
>  }
> diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> index 1f4b61d..fb59ea5 100644
> --- a/lib/ofpbuf.c
> +++ b/lib/ofpbuf.c
> @@ -117,6 +117,9 @@ void
>  ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated)
>  {
>      ofpbuf_init__(b, allocated, OFPBUF_DPDK);
> +#ifdef DPDK_NETDEV
> +    b->dpdk_buf = b;
> +#endif
>  }
>
>  /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size'
> @@ -134,8 +137,12 @@ ofpbuf_uninit(struct ofpbuf *b)
>      if (b) {
>          if (b->source == OFPBUF_MALLOC) {
>              free(ofpbuf_base(b));
> +        } else if (b->source == OFPBUF_DPDK) {
> +#ifdef DPDK_NETDEV
> +            ovs_assert(b != b->dpdk_buf);
> +#endif
> +            free_dpdk_buf(b);
>          }
> -        ovs_assert(b->source != OFPBUF_DPDK);
>      }
>  }
>
> diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
> index 85be899..13a3e9d 100644
> --- a/lib/ofpbuf.h
> +++ b/lib/ofpbuf.h
> @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source {
>  struct ofpbuf {
>  #ifdef DPDK_NETDEV
>      struct rte_mbuf mbuf;       /* DPDK mbuf */
> +    void *dpdk_buf;             /* First byte of allocated DPDK buffer. */
>  #else
>      void *base_;                 /* First byte of allocated space. */
>      void *data_;                 /* First byte actually in use. */
> @@ -354,6 +355,8 @@ static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *b)
>  }
>
>  #ifdef DPDK_NETDEV
> +BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0);
> +
>  static inline void * ofpbuf_data(const struct ofpbuf *b)
>  {
>      return b->mbuf.pkt.data;
> --
> 1.7.9.5
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list