[ovs-dev] [PATCH 5/5] dpif-netdev: batch packet sending
Pravin Shelar
pshelar at nicira.com
Fri May 30 23:55:01 UTC 2014
On Fri, May 23, 2014 at 11:04 AM, Daniele Di Proietto
<ddiproietto at vmware.com> wrote:
> Signed-off-by: Daniele Di Proietto <ddiproietto at vmware.com>
> ---
> lib/dpif-netdev.c | 238 +++++++++++++++++++++++++++++++++++-------------------
> 1 file changed, 157 insertions(+), 81 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 43bfc20..f1f8b54 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -331,14 +331,14 @@ static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
> OVS_REQ_WRLOCK(dp->queue_rwlock);
> static int dpif_netdev_open(const struct dpif_class *, const char *name,
> bool create, struct dpif **);
> -static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
> - int queue_no, int type,
> +static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **,
> + int c, int queue_no, int type,
> const struct miniflow *,
> const struct nlattr *userdata);
> static void dp_netdev_execute_actions(struct dp_netdev *dp,
> const struct miniflow *,
> - struct ofpbuf *, bool may_steal,
> - struct pkt_metadata *,
> + struct ofpbuf **, int c, bool may_steal,
> + struct pkt_metadata **,
> const struct nlattr *actions,
> size_t actions_len);
> static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
> @@ -1533,7 +1533,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
> miniflow_initialize(&key.flow, key.buf);
> miniflow_extract(execute->packet, md, &key.flow);
>
> - dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md,
> + dp_netdev_execute_actions(dp, &key.flow, &execute->packet, 1, false, &md,
> execute->actions, execute->actions_len);
>
> return 0;
> @@ -1986,46 +1986,108 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int packets
> ovs_mutex_unlock(&bucket->mutex);
> }
>
> +#define MAX_BATCH 256
> +
> +struct batch_pkt_execute {
> + unsigned int packet_count;
> + unsigned int byte_count;
> +
> + struct dp_netdev_flow *flow;
> + struct ofpbuf *packets[MAX_BATCH];
> + struct pkt_metadata *md[MAX_BATCH];
> + const struct miniflow *mf;
> +};
> +
> +static inline void
> +packet_batch_init(struct batch_pkt_execute *batch, struct dp_netdev_flow *flow,
> + struct ofpbuf *packet,
> + struct pkt_metadata *md, const struct miniflow *mf)
> +{
> + batch->flow = flow;
> + batch->mf = mf;
> + batch->md[0] = md;
> + batch->packets[0] = packet;
> +
> + /* no need to update time and tcp_flags, its same in given input batch. */
> + batch->packet_count = 1;
> + batch->byte_count = ofpbuf_size(packet);
> +}
> +
> +static inline void
> +packet_batch_update(struct batch_pkt_execute *batch, struct ofpbuf *packet, struct pkt_metadata *md)
> +{
> +
> + batch->md[batch->packet_count] = md;
> + batch->packets[batch->packet_count++] = packet;
> + batch->byte_count += ofpbuf_size(packet);
> +}
> +
> +static inline void
> +packet_batch_execute(struct batch_pkt_execute *batch, struct dp_netdev *dp)
> +{
> + struct dp_netdev_actions *actions;
> + struct dp_netdev_flow *flow = batch->flow;
> +
> + dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, batch->mf);
> +
> + actions = ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
> +
> + dp_netdev_execute_actions(dp, batch->mf, batch->packets,
> + batch->packet_count, true,
> + batch->md, actions->actions, actions->size);
> +
> + dp_netdev_count_packet(dp, DP_STAT_HIT, batch->packet_count);
> +}
> +
> static void
> dp_netdev_input(struct dp_netdev *dp, struct ofpbuf **packets, int c,
> - struct pkt_metadata *md)
> + struct pkt_metadata **md)
> {
> + struct batch_pkt_execute batch;
> +
> + struct {
> + struct miniflow flow;
> + uint32_t buf[FLOW_U32S];
> + } keys[c];
> +
> int i;
>
> + memset(&batch, 0, sizeof batch);
> +
Is there need to reset batch? can batch_init() take care of it?
> for (i = 0; i < c; i++) {
> struct dp_netdev_flow *netdev_flow;
> - struct {
> - struct miniflow flow;
> - uint32_t buf[FLOW_U32S];
> - } key;
>
> if (ofpbuf_size(packets[i]) < ETH_HEADER_LEN) {
> ofpbuf_delete(packets[i]);
> continue;
> }
> - miniflow_initialize(&key.flow, key.buf);
> - miniflow_extract(packets[i], &md[i], &key.flow);
> + miniflow_initialize(&keys[i].flow, keys[i].buf);
> + miniflow_extract(packets[i], md[i], &keys[i].flow);
>
> - netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
> + netdev_flow = dp_netdev_lookup_flow(dp, &keys[i].flow);
>
> if (netdev_flow) {
> - struct dp_netdev_actions *actions;
> -
> - dp_netdev_flow_used(netdev_flow, 1, ofpbuf_size(packets[i]), &key.flow);
> -
> - actions = dp_netdev_flow_get_actions(netdev_flow);
> - dp_netdev_execute_actions(dp, &key.flow, packets[i], true, &md[i],
> - actions->actions, actions->size);
> - dp_netdev_count_packet(dp, DP_STAT_HIT, 1);
> + if (!batch.flow) {
> + packet_batch_init(&batch, netdev_flow, packets[i], md[i], &keys[i].flow);
> + } else if (batch.flow == netdev_flow) {
> + packet_batch_update(&batch, packets[i], md[i]);
> + } else {
> + packet_batch_execute(&batch, dp);
> + packet_batch_init(&batch, netdev_flow, packets[i], md[i], &keys[i].flow);
> + }
> } else if (dp->handler_queues) {
> dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
> - dp_netdev_output_userspace(dp, packets[i],
> - miniflow_hash_5tuple(&key.flow, 0)
> + dp_netdev_output_userspace(dp, &packets[i], 1,
> + miniflow_hash_5tuple(&keys[i].flow, 0)
> % dp->n_handlers,
> - DPIF_UC_MISS, &key.flow, NULL);
> + DPIF_UC_MISS, &keys[i].flow, NULL);
> ofpbuf_delete(packets[i]);
> }
> }
> +
> + if (batch.flow) {
> + packet_batch_execute(&batch, dp);
> + }
> }
>
> static void
> @@ -2034,67 +2096,74 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
> {
> uint32_t *recirc_depth = recirc_depth_get();
> struct pkt_metadata md[c];
> + struct pkt_metadata *pmd[c];
>
> int i;
>
> for (i = 0; i < c; i++) {
> md[i] = PKT_METADATA_INITIALIZER(port_no);
> + pmd[i] = &md[i];
> }
>
> *recirc_depth = 0;
> - dp_netdev_input(dp, packets, c, md);
> + dp_netdev_input(dp, packets, c, pmd);
> }
>
> static int
> -dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
> +dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **packets, int c,
> int queue_no, int type, const struct miniflow *key,
> const struct nlattr *userdata)
> {
> struct dp_netdev_queue *q;
> int error;
> + int i;
>
> fat_rwlock_rdlock(&dp->queue_rwlock);
> q = &dp->handler_queues[queue_no];
> ovs_mutex_lock(&q->mutex);
> - if (q->head - q->tail < MAX_QUEUE_LEN) {
> - struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
> - struct dpif_upcall *upcall = &u->upcall;
> - struct ofpbuf *buf = &u->buf;
> - size_t buf_size;
> - struct flow flow;
> -
> - upcall->type = type;
> -
> - /* Allocate buffer big enough for everything. */
> - buf_size = ODPUTIL_FLOW_KEY_BYTES;
> - if (userdata) {
> - buf_size += NLA_ALIGN(userdata->nla_len);
> - }
> - buf_size += ofpbuf_size(packet);
> - ofpbuf_init(buf, buf_size);
> -
> - /* Put ODP flow. */
> - miniflow_expand(key, &flow);
> - odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
> - upcall->key = ofpbuf_data(buf);
> - upcall->key_len = ofpbuf_size(buf);
> -
> - /* Put userdata. */
> - if (userdata) {
> - upcall->userdata = ofpbuf_put(buf, userdata,
> - NLA_ALIGN(userdata->nla_len));
> - }
> + for (i = 0; i < c; i++) {
> + struct ofpbuf * packet = packets[i];
extra space before packet.
need blank line after declaratation.
> + if (q->head - q->tail < MAX_QUEUE_LEN) {
> + struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
> + struct dpif_upcall *upcall = &u->upcall;
> + struct ofpbuf *buf = &u->buf;
> + size_t buf_size;
> + struct flow flow;
> +
> + upcall->type = type;
> +
> + /* Allocate buffer big enough for everything. */
> + buf_size = ODPUTIL_FLOW_KEY_BYTES;
> + if (userdata) {
> + buf_size += NLA_ALIGN(userdata->nla_len);
> + }
> + buf_size += ofpbuf_size(packet);
> + ofpbuf_init(buf, buf_size);
> +
> + /* Put ODP flow. */
> + miniflow_expand(key, &flow);
> + odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
> + upcall->key = ofpbuf_data(buf);
> + upcall->key_len = ofpbuf_size(buf);
> +
> + /* Put userdata. */
> + if (userdata) {
> + upcall->userdata = ofpbuf_put(buf, userdata,
> + NLA_ALIGN(userdata->nla_len));
> + }
>
> - ofpbuf_set_data(&upcall->packet,
> - ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet)));
> - ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> + ofpbuf_set_data(&upcall->packet,
> + ofpbuf_put(buf, ofpbuf_data(packet),
> + ofpbuf_size(packet)));
> + ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
>
> - seq_change(q->seq);
> + seq_change(q->seq);
>
> - error = 0;
> - } else {
> - dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
> - error = ENOBUFS;
> + error = 0;
> + } else {
> + dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
> + error = ENOBUFS;
> + }
Can you make separate function to send a packet for upcall processing?
it is easier to read that way.
> }
> ovs_mutex_unlock(&q->mutex);
> fat_rwlock_unlock(&dp->queue_rwlock);
> @@ -2117,19 +2186,13 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
> int type = nl_attr_type(a);
> struct dp_netdev_port *p;
> uint32_t *depth = recirc_depth_get();
> - struct ofpbuf * packet;
> - struct pkt_metadata * md;
> -
> - ovs_assert(c==1);
> -
> - packet = packets[0];
> - md = pmd[0];
> + int i;
>
> switch ((enum ovs_action_attr)type) {
> case OVS_ACTION_ATTR_OUTPUT:
> p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
> if (p) {
> - netdev_send(p->netdev, packet, may_steal);
> + netdev_send_batch(p->netdev, packets, c, may_steal);
> }
> break;
>
> @@ -2138,14 +2201,16 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>
> userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
>
> - dp_netdev_output_userspace(aux->dp, packet,
> + dp_netdev_output_userspace(aux->dp, packets, c,
> miniflow_hash_5tuple(aux->key, 0)
> % aux->dp->n_handlers,
> DPIF_UC_ACTION, aux->key,
> userdata);
>
> if (may_steal) {
> - ofpbuf_delete(packet);
> + for (i = 0; i < c; i++) {
> + ofpbuf_delete(packets[i]);
> + }
> }
> break;
> }
> @@ -2168,20 +2233,31 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
> hash = 2;
> }
>
> - md->dp_hash = hash;
> + for (i = 0; i < c; i++) {
> + pmd[i]->dp_hash = hash;
> + }
> break;
> }
>
> case OVS_ACTION_ATTR_RECIRC:
> if (*depth < MAX_RECIRC_DEPTH) {
> - struct pkt_metadata recirc_md = *md;
> - struct ofpbuf *recirc_packet;
> + struct pkt_metadata recirc_md[c];
> + struct pkt_metadata *recirc_pmd[c];
> + struct ofpbuf *recirc_packets[c];
>
> - recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
> - recirc_md.recirc_id = nl_attr_get_u32(a);
> + if (!may_steal) {
> + for (i = 0; i < c; i++)
> + recirc_packets[i] = ofpbuf_clone(packets[i]);
> + }
> +
> + for (i = 0; i < c; i++) {
> + recirc_md[i] = *pmd[i];
> + recirc_md[i].recirc_id = nl_attr_get_u32(a);
> + recirc_pmd[i] = &recirc_md[i];
> + }
>
> (*depth)++;
> - dp_netdev_input(aux->dp, &recirc_packet, 1, &recirc_md);
> + dp_netdev_input(aux->dp, may_steal ? packets : recirc_packets, c, recirc_pmd);
> (*depth)--;
>
> break;
> @@ -2204,13 +2280,13 @@ dp_execute_cb(void *aux_, struct ofpbuf **packets, int c,
>
> static void
> dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key,
> - struct ofpbuf *packet, bool may_steal,
> - struct pkt_metadata *md,
> + struct ofpbuf **packets, int c, bool may_steal,
> + struct pkt_metadata **md,
> const struct nlattr *actions, size_t actions_len)
> {
> struct dp_netdev_execute_aux aux = {dp, key};
>
> - odp_execute_actions(&aux, &packet, 1, may_steal, &md,
> + odp_execute_actions(&aux, packets, c, may_steal, md,
> actions, actions_len, dp_execute_cb);
> }
>
> --
> 2.0.0.rc0
>
More information about the dev
mailing list