[ovs-dev] [PATCH] dpif-netdev: Remove an extra memcpy of packet data from datapath-upcall interface for bridges with datapath_type=netdev.

Jarno Rajahalme jrajahalme at nicira.com
Tue Jun 3 17:10:28 UTC 2014


Ryan,

See comments below.

  Jarno

On Jun 2, 2014, at 3:54 PM, Ryan Wilson <wryan at nicira.com> wrote:

> When a bridge of datatype type netdev receives a packet, it copies the
> packet from the NIC to a buffer in userspace. Currently, when making
> an upcall, the packet is again copied to the upcall's buffer. However,
> this extra copy is not necessary when the datapath exists in userspace
> as the upcall can directly access the packet data.
> 
> This patch eliminates this extra copy of the packet data in most cases.
> In cases where the packet may still be used later by callers of
> dp_netdev_execute_actions, making a copy of the packet data is still
> necessary.
> ---
> lib/dpif-netdev.c |   15 +++++----------
> lib/ofpbuf.c      |   18 ++++++++++++++++++
> lib/ofpbuf.h      |    2 ++
> 3 files changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 91c83d6..7cb0478 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
>                                    miniflow_hash_5tuple(&key.flow, 0)
>                                    % dp->n_handlers,
>                                    DPIF_UC_MISS, &key.flow, NULL);
> -        ofpbuf_delete(packet);
>     }
> }
> 
> @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
>         if (userdata) {
>             buf_size += NLA_ALIGN(userdata->nla_len);
>         }
> -        buf_size += ofpbuf_size(packet);
>         ofpbuf_init(buf, buf_size);
> 
>         /* Put ODP flow. */
> @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
>                                           NLA_ALIGN(userdata->nla_len));
>         }
> 
> -        ofpbuf_set_data(&upcall->packet,
> -                        ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet)));
> -        ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> +        ofpbuf_set(&upcall->packet, packet);
> 

Did you consider the possibility to just assign the packet instead? We do this elsewhere, so it should work here too:

    upcall->packet = *packet;

>         seq_change(q->seq);
> 
>         error = 0;
>     } else {
>         dp_netdev_count_packet(dp, DP_STAT_LOST);
> +        ofpbuf_delete(packet);
>         error = ENOBUFS;
>     }
>     ovs_mutex_unlock(&q->mutex);
> @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
>         break;
> 
>     case OVS_ACTION_ATTR_USERSPACE: {
> +        struct ofpbuf *userspace_packet;
>         const struct nlattr *userdata;
> 
>         userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
> +        userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
> 
> -        dp_netdev_output_userspace(aux->dp, packet,
> +        dp_netdev_output_userspace(aux->dp, userspace_packet,
>                                    miniflow_hash_5tuple(aux->key, 0)
>                                        % aux->dp->n_handlers,
>                                    DPIF_UC_ACTION, aux->key,
>                                    userdata);
> -
> -        if (may_steal) {
> -            ofpbuf_delete(packet);
> -        }
>         break;
>     }
> 
> diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> index 1f4b61d..ade940b 100644
> --- a/lib/ofpbuf.c
> +++ b/lib/ofpbuf.c
> @@ -555,3 +555,21 @@ ofpbuf_resize_l2(struct ofpbuf *b, int increment)
>     ofpbuf_adjust_layer_offset(&b->l2_5_ofs, increment);
>     return b->frame;
> }
> +
> +/* Sets ofpbuf 'dst' equal to 'src'. Note that 'dst' and 'src' share the
> + * same buffer, so calling ofpbuf_delete('dst') will also delete 'src'. */
> +void
> +ofpbuf_set(struct ofpbuf *dst, struct ofpbuf *src)
> +{
> +    ofpbuf_set_base(dst, ofpbuf_base(src));
> +    ofpbuf_set_data(dst, ofpbuf_data(src));
> +    ofpbuf_set_size(dst, ofpbuf_size(src));
> +
> +    dst->allocated = src->allocated;
> +    dst->frame = src->frame;
> +    dst->l2_5_ofs = src->l2_5_ofs;
> +    dst->l3_ofs = src->l3_ofs;
> +    dst->l4_ofs = src->l4_ofs;
> +    dst->source = src->source;
> +    dst->list_node = src->list_node;
> +}

This would be unnecessary if the plain assignment works also for DPDK.

> diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
> index 85be899..ec08c27 100644
> --- a/lib/ofpbuf.h
> +++ b/lib/ofpbuf.h
> @@ -101,6 +101,8 @@ static inline const void *ofpbuf_get_udp_payload(const struct ofpbuf *);
> static inline const void *ofpbuf_get_sctp_payload(const struct ofpbuf *);
> static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *);
> 
> +void ofpbuf_set(struct ofpbuf *, struct ofpbuf *);
> +
> void ofpbuf_use(struct ofpbuf *, void *, size_t);
> void ofpbuf_use_stack(struct ofpbuf *, void *, size_t);
> void ofpbuf_use_stub(struct ofpbuf *, void *, size_t);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev




More information about the dev mailing list