[ovs-dev] [PATCH 5/5] dpif-netdev: batch packet sending

Pravin Shelar pshelar at nicira.com
Fri May 30 23:55:01 UTC 2014


On Fri, May 23, 2014 at 11:04 AM, Daniele Di Proietto
<ddiproietto at vmware.com> wrote:
> Signed-off-by: Daniele Di Proietto <ddiproietto at vmware.com>
> ---
>  lib/dpif-netdev.c | 238 +++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 157 insertions(+), 81 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 43bfc20..f1f8b54 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -331,14 +331,14 @@ static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
>      OVS_REQ_WRLOCK(dp->queue_rwlock);
>  static int dpif_netdev_open(const struct dpif_class *, const char *name,
>                              bool create, struct dpif **);
> -static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
> -                                      int queue_no, int type,
> +static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **,
> +                                      int c, int queue_no, int type,
>                                        const struct miniflow *,
>                                        const struct nlattr *userdata);
>  static void dp_netdev_execute_actions(struct dp_netdev *dp,
>                                        const struct miniflow *,
> -                                      struct ofpbuf *, bool may_steal,
> -                                      struct pkt_metadata *,
> +                                      struct ofpbuf **, int c, bool may_steal,
> +                                      struct pkt_metadata **,
>                                        const struct nlattr *actions,
>                                        size_t actions_len);
>  static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
> @@ -1533,7 +1533,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
>      miniflow_initialize(&key.flow, key.buf);
>      miniflow_extract(execute->packet, md, &key.flow);
>
> -    dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md,
> +    dp_netdev_execute_actions(dp, &key.flow, &execute->packet, 1, false, &md,
>                                execute->actions, execute->actions_len);
>
>      return 0;
> @@ -1986,46 +1986,108 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int packets
>      ovs_mutex_unlock(&bucket->mutex);
>  }
>
> +#define MAX_BATCH 256
> +
> +struct batch_pkt_execute {
> +    unsigned int packet_count;
> +    unsigned int byte_count;
> +
> +    struct dp_netdev_flow *flow;
> +    struct ofpbuf *packets[MAX_BATCH];
> +    struct pkt_metadata *md[MAX_BATCH];
> +    const struct miniflow *mf;
> +};
> +
> +static inline void
> +packet_batch_init(struct batch_pkt_execute *batch, struct dp_netdev_flow *flow,
> +                  struct ofpbuf *packet,
> +                  struct pkt_metadata *md, const struct miniflow *mf)
> +{
> +    batch->flow = flow;
> +    batch->mf = mf;
> +    batch->md[0] = md;
> +    batch->packets[0] = packet;
> +
> +    /* no need to update time and tcp_flags, its same in given input batch. */
> +    batch->packet_count = 1;
> +    batch->byte_count = ofpbuf_size(packet);
> +}
> +
> +static inline void
> +packet_batch_update(struct batch_pkt_execute *batch, struct ofpbuf *packet, struct pkt_metadata *md)
> +{
> +
> +    batch->md[batch->packet_count] = md;
> +    batch->packets[batch->packet_count++] = packet;
> +    batch->byte_count += ofpbuf_size(packet);
> +}
> +
> +static inline void
> +packet_batch_execute(struct batch_pkt_execute *batch, struct dp_netdev *dp)
> +{
> +    struct dp_netdev_actions *actions;
> +    struct dp_netdev_flow *flow = batch->flow;
> +
> +    dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, batch->mf);
> +
> +    actions = ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
> +
> +    dp_netdev_execute_actions(dp, batch->mf, batch->packets,
> +                              batch->packet_count, true,
> +                              batch->md, actions->actions, actions->size);
> +
> +    dp_netdev_count_packet(dp, DP_STAT_HIT, batch->packet_count);
> +}
> +
>  static void
>  dp_netdev_input(struct dp_netdev *dp, struct ofpbuf **packets, int c,
> -                struct pkt_metadata *md)
> +                struct pkt_metadata **md)
>  {
> +    struct batch_pkt_execute batch;
> +
> +    struct {
> +        struct miniflow flow;
> +        uint32_t buf[FLOW_U32S];
> +    } keys[c];
> +
>      int i;
>
> +    memset(&batch, 0, sizeof batch);
> +
Is there need to reset batch? can batch_init() take care of it?

>      for (i = 0; i < c; i++) {
>          struct dp_netdev_flow *netdev_flow;
> -        struct {
> -            struct miniflow flow;
> -            uint32_t buf[FLOW_U32S];
> -        } key;
>
>          if (ofpbuf_size(packets[i]) < ETH_HEADER_LEN) {
>              ofpbuf_delete(packets[i]);
>              continue;
>          }
> -        miniflow_initialize(&key.flow, key.buf);
> -        miniflow_extract(packets[i], &md[i], &key.flow);
> +        miniflow_initialize(&keys[i].flow, keys[i].buf);
> +        miniflow_extract(packets[i], md[i], &keys[i].flow);
>
> -        netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
> +        netdev_flow = dp_netdev_lookup_flow(dp, &keys[i].flow);
>
>          if (netdev_flow) {
> -            struct dp_netdev_actions *actions;
> -
> -            dp_netdev_flow_used(netdev_flow, 1, ofpbuf_size(packets[i]), &key.flow);
> -
> -            actions = dp_netdev_flow_get_actions(netdev_flow);
> -            dp_netdev_execute_actions(dp, &key.flow, packets[i], true, &md[i],
> -                                      actions->actions, actions->size);
> -            dp_netdev_count_packet(dp, DP_STAT_HIT, 1);
> +            if (!batch.flow) {
> +                packet_batch_init(&batch, netdev_flow, packets[i], md[i], &keys[i].flow);
> +            } else if (batch.flow == netdev_flow) {
> +                packet_batch_update(&batch, packets[i], md[i]);
> +            } else {
> +                packet_batch_execute(&batch, dp);
> +                packet_batch_init(&batch, netdev_flow, packets[i], md[i], &keys[i].flow);
> +            }
>          } else if (dp->handler_queues) {
>              dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
> -            dp_netdev_output_userspace(dp, packets[i],
> -                                       miniflow_hash_5tuple(&key.flow, 0)
> +            dp_netdev_output_userspace(dp, &packets[i], 1,
> +                                       miniflow_hash_5tuple(&keys[i].flow, 0)
>                                         % dp->n_handlers,
> -                                       DPIF_UC_MISS, &key.flow, NULL);
> +                                       DPIF_UC_MISS, &keys[i].flow, NULL);
>              ofpbuf_delete(packets[i]);
>          }
>      }
> +
> +    if (batch.flow) {
> +        packet_batch_execute(&batch, dp);
> +    }
>  }
>
>  static void
> @@ -2034,67 +2096,74 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
>  {
>      uint32_t *recirc_depth = recirc_depth_get();
>      struct pkt_metadata md[c];
> +    struct pkt_metadata *pmd[c];
>
>      int i;
>
>      for (i = 0; i < c; i++) {
>          md[i] = PKT_METADATA_INITIALIZER(port_no);
> +        pmd[i] = &md[i];
>      }
>
>      *recirc_depth = 0;
> -    dp_netdev_input(dp, packets, c, md);
> +    dp_netdev_input(dp, packets, c, pmd);
>  }
>
>  static int
> -dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
> +dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **packets, int c,
>                             int queue_no, int type, const struct miniflow *key,
>                             const struct nlattr *userdata)
>  {
>      struct dp_netdev_queue *q;
>      int error;
> +    int i;
>
>      fat_rwlock_rdlock(&dp->queue_rwlock);
>      q = &dp->handler_queues[queue_no];
>      ovs_mutex_lock(&q->mutex);
> -    if (q->head - q->tail < MAX_QUEUE_LEN) {
> -        struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
> -        struct dpif_upcall *upcall = &u->upcall;
> -        struct ofpbuf *buf = &u->buf;
> -        size_t buf_size;
> -        struct flow flow;
> -
> -        upcall->type = type;
> -
> -        /* Allocate buffer big enough for everything. */
> -        buf_size = ODPUTIL_FLOW_KEY_BYTES;
> -        if (userdata) {
> -            buf_size += NLA_ALIGN(userdata->nla_len);
> -        }
> -        buf_size += ofpbuf_size(packet);
> -        ofpbuf_init(buf, buf_size);
> -
> -        /* Put ODP flow. */
> -        miniflow_expand(key, &flow);
> -        odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
> -        upcall->key = ofpbuf_data(buf);
> -        upcall->key_len = ofpbuf_size(buf);
> -
> -        /* Put userdata. */
> -        if (userdata) {
> -            upcall->userdata = ofpbuf_put(buf, userdata,
> -                                          NLA_ALIGN(userdata->nla_len));
> -        }
> +    for (i = 0; i < c; i++) {
> +        struct ofpbuf * packet = packets[i];
extra space before packet.
need blank line after declaratation.
> +        if (q->head - q->tail < MAX_QUEUE_LEN) {
> +            struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
> +            struct dpif_upcall *upcall = &u->upcall;
> +            struct ofpbuf *buf = &u->buf;
> +            size_t buf_size;
> +            struct flow flow;
> +
> +            upcall->type = type;
> +
> +            /* Allocate buffer big enough for everything. */
> +            buf_size = ODPUTIL_FLOW_KEY_BYTES;
> +            if (userdata) {
> +                buf_size += NLA_ALIGN(userdata->nla_len);
> +            }
> +            buf_size += ofpbuf_size(packet);
> +            ofpbuf_init(buf, buf_size);
> +
> +            /* Put ODP flow. */
> +            miniflow_expand(key, &flow);
> +            odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
> +            upcall->key = ofpbuf_data(buf);
> +            upcall->key_len = ofpbuf_size(buf);
> +
> +            /* Put userdata. */
> +            if (userdata) {
> +                upcall->userdata = ofpbuf_put(buf, userdata,
> +                                              NLA_ALIGN(userdata->nla_len));
> +            }
>
> -        ofpbuf_set_data(&upcall->packet,
> -                        ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet)));
> -        ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> +            ofpbuf_set_data(&upcall->packet,
> +                            ofpbuf_put(buf, ofpbuf_data(packet),
> +                            ofpbuf_size(packet)));
> +            ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
>
> -        seq_change(q->seq);
> +            seq_change(q->seq);
>
> -        error = 0;
> -    } else {
> -        dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
> -        error = ENOBUFS;
> +            error = 0;
> +        } else {
> +            dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
> +            error = ENOBUFS;
> +        }
Can you make separate function to send a packet for upcall processing?
it is easier to read that way.
>      }
>      ovs_mutex_unlock(&q->mutex);
>      fat_rwlock_unlock(&dp->queue_rwlock);
> @@ -2117,19 +2186,13 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>      int type = nl_attr_type(a);
>      struct dp_netdev_port *p;
>      uint32_t *depth = recirc_depth_get();
> -    struct ofpbuf * packet;
> -    struct pkt_metadata * md;
> -
> -    ovs_assert(c==1);
> -
> -    packet = packets[0];
> -    md = pmd[0];
> +    int i;
>
>      switch ((enum ovs_action_attr)type) {
>      case OVS_ACTION_ATTR_OUTPUT:
>          p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
>          if (p) {
> -            netdev_send(p->netdev, packet, may_steal);
> +            netdev_send_batch(p->netdev, packets, c, may_steal);
>          }
>          break;
>
> @@ -2138,14 +2201,16 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>
>          userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
>
> -        dp_netdev_output_userspace(aux->dp, packet,
> +        dp_netdev_output_userspace(aux->dp, packets, c,
>                                     miniflow_hash_5tuple(aux->key, 0)
>                                         % aux->dp->n_handlers,
>                                     DPIF_UC_ACTION, aux->key,
>                                     userdata);
>
>          if (may_steal) {
> -            ofpbuf_delete(packet);
> +            for (i = 0; i < c; i++) {
> +                ofpbuf_delete(packets[i]);
> +            }
>          }
>          break;
>      }
> @@ -2168,20 +2233,31 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>              hash = 2;
>          }
>
> -        md->dp_hash = hash;
> +        for (i = 0; i < c; i++) {
> +            pmd[i]->dp_hash = hash;
> +        }
>          break;
>      }
>
>      case OVS_ACTION_ATTR_RECIRC:
>          if (*depth < MAX_RECIRC_DEPTH) {
> -            struct pkt_metadata recirc_md = *md;
> -            struct ofpbuf *recirc_packet;
> +            struct pkt_metadata recirc_md[c];
> +            struct pkt_metadata *recirc_pmd[c];
> +            struct ofpbuf *recirc_packets[c];
>
> -            recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
> -            recirc_md.recirc_id = nl_attr_get_u32(a);
> +            if (!may_steal) {
> +                for (i = 0; i < c; i++)
> +                    recirc_packets[i] = ofpbuf_clone(packets[i]);
> +            }
> +
> +            for (i = 0; i < c; i++) {
> +                recirc_md[i] = *pmd[i];
> +                recirc_md[i].recirc_id = nl_attr_get_u32(a);
> +                recirc_pmd[i] = &recirc_md[i];
> +            }
>
>              (*depth)++;
> -            dp_netdev_input(aux->dp, &recirc_packet, 1, &recirc_md);
> +            dp_netdev_input(aux->dp, may_steal ? packets : recirc_packets, c, recirc_pmd);
>              (*depth)--;
>
>              break;
> @@ -2204,13 +2280,13 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>
>  static void
>  dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key,
> -                          struct ofpbuf *packet, bool may_steal,
> -                          struct pkt_metadata *md,
> +                          struct ofpbuf **packets, int c, bool may_steal,
> +                          struct pkt_metadata **md,
>                            const struct nlattr *actions, size_t actions_len)
>  {
>      struct dp_netdev_execute_aux aux = {dp, key};
>
> -    odp_execute_actions(&aux, &packet, 1, may_steal, &md,
> +    odp_execute_actions(&aux, packets, c, may_steal, md,
>                          actions, actions_len, dp_execute_cb);
>  }
>
> --
> 2.0.0.rc0
>



More information about the dev mailing list