[ovs-dev] [PATCH v2 2/2] dpif-netdev: batch packet processing

Daniele Di Proietto ddiproietto at vmware.com
Thu Jun 5 17:24:18 UTC 2014


This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto at vmware.com>
---
 lib/dpif-netdev.c            | 336 ++++++++++++++++++++++++++++++-------------
 lib/dpif.c                   |  10 +-
 lib/odp-execute.c            |  62 +++++---
 lib/odp-execute.h            |   6 +-
 lib/ofpbuf.c                 |   1 +
 lib/ofpbuf.h                 |   2 +
 ofproto/ofproto-dpif-xlate.c |   4 +-
 7 files changed, 294 insertions(+), 127 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1b8d499..1343875 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -331,18 +331,17 @@ static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
     OVS_REQ_WRLOCK(dp->queue_rwlock);
 static int dpif_netdev_open(const struct dpif_class *, const char *name,
                             bool create, struct dpif **);
-static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
-                                      int queue_no, int type,
+static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **,
+                                      int cnt, int queue_no, int type,
                                       const struct miniflow *,
                                       const struct nlattr *userdata);
 static void dp_netdev_execute_actions(struct dp_netdev *dp,
                                       const struct miniflow *,
-                                      struct ofpbuf *, bool may_steal,
-                                      struct pkt_metadata *,
+                                      struct ofpbuf **, int cnt, bool may_steal,
                                       const struct nlattr *actions,
                                       size_t actions_len);
-static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
-                                 struct pkt_metadata *);
+static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
+                                 int cnt, odp_port_t port_no);
 
 static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);
 
@@ -1518,7 +1517,6 @@ static int
 dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
-    struct pkt_metadata *md = &execute->md;
     struct {
         struct miniflow flow;
         uint32_t buf[FLOW_U32S];
@@ -1531,10 +1529,12 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
 
     /* Extract flow key. */
     miniflow_initialize(&key.flow, key.buf);
-    miniflow_extract(execute->packet, md, &key.flow);
+    miniflow_extract(execute->packet, &execute->md, &key.flow);
 
-    dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md,
-                              execute->actions, execute->actions_len);
+    execute->packet->md = execute->md;
+    dp_netdev_execute_actions(dp, &key.flow, &execute->packet, 1, false,
+                              execute->actions,
+                              execute->actions_len);
 
     return 0;
 }
@@ -1747,17 +1747,12 @@ dp_netdev_process_rxq_port(struct dp_netdev *dp,
                           struct dp_netdev_port *port,
                           struct netdev_rxq *rxq)
 {
-    struct ofpbuf *packet[NETDEV_MAX_RX_BATCH];
-    int error, c;
+    struct ofpbuf *packets[NETDEV_MAX_RX_BATCH];
+    int error, cnt;
 
-    error = netdev_rxq_recv(rxq, packet, &c);
+    error = netdev_rxq_recv(rxq, packets, &cnt);
     if (!error) {
-        struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
-        int i;
-
-        for (i = 0; i < c; i++) {
-            dp_netdev_port_input(dp, packet[i], &md);
-        }
+        dp_netdev_port_input(dp, packets, cnt, port->port_no);
     } else if (error != EAGAIN && error != EOPNOTSUPP) {
         static struct vlog_rate_limit rl
             = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -1954,10 +1949,9 @@ dp_netdev_flow_stats_new_cb(void)
 
 static void
 dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
-                    const struct ofpbuf *packet,
-                    const struct miniflow *key)
+                    int cnt, int size,
+                    uint16_t tcp_flags)
 {
-    uint16_t tcp_flags = miniflow_get_tcp_flags(key);
     long long int now = time_msec();
     struct dp_netdev_flow_stats *bucket;
 
@@ -1966,8 +1960,8 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
 
     ovs_mutex_lock(&bucket->mutex);
     bucket->used = MAX(now, bucket->used);
-    bucket->packet_count++;
-    bucket->byte_count += ofpbuf_size(packet);
+    bucket->packet_count += cnt;
+    bucket->byte_count += size;
     bucket->tcp_flags |= tcp_flags;
     ovs_mutex_unlock(&bucket->mutex);
 }
@@ -1981,73 +1975,155 @@ dp_netdev_stats_new_cb(void)
 }
 
 static void
-dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
+dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt)
 {
     struct dp_netdev_stats *bucket;
 
     bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
     ovs_mutex_lock(&bucket->mutex);
-    bucket->n[type]++;
+    bucket->n[type] += cnt;
     ovs_mutex_unlock(&bucket->mutex);
 }
 
+struct batch_pkt_execute {
+    unsigned int packet_count;
+    unsigned int byte_count;
+    uint16_t tcp_flags;
+
+    struct dp_netdev_flow *flow;
+    const struct miniflow *mf;
+
+    struct ofpbuf *packets[NETDEV_MAX_RX_BATCH];
+};
+
+static inline void
+packet_batch_update(struct batch_pkt_execute *batch, struct ofpbuf *packet,
+                    const struct miniflow *mf)
+{
+    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
+    batch->packets[batch->packet_count++] = packet;
+    batch->byte_count += ofpbuf_size(packet);
+}
+
+static inline void
+packet_batch_init(struct batch_pkt_execute *batch, struct dp_netdev_flow *flow,
+                  struct ofpbuf *packet, const struct miniflow *mf)
+{
+    batch->flow = flow;
+    batch->mf = mf;
+    batch->packets[0] = packet;
+
+    batch->packet_count = 0;
+    batch->byte_count = 0;
+    batch->tcp_flags = 0;
+
+    packet_batch_update(batch, packet, mf);
+}
+
+static inline void
+packet_batch_execute(struct batch_pkt_execute *batch, struct dp_netdev *dp)
+{
+    struct dp_netdev_actions *actions;
+    struct dp_netdev_flow *flow = batch->flow;
+
+    dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count,
+                        batch->tcp_flags);
+
+    actions = dp_netdev_flow_get_actions(flow);
+
+    dp_netdev_execute_actions(dp, batch->mf, batch->packets,
+                              batch->packet_count, true,
+                              actions->actions, actions->size);
+
+    dp_netdev_count_packet(dp, DP_STAT_HIT, batch->packet_count);
+}
+
 static void
-dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
-                struct pkt_metadata *md)
+dp_netdev_input(struct dp_netdev *dp, struct ofpbuf **packets, int cnt,
+                const struct pkt_metadata *md)
 {
-    struct dp_netdev_flow *netdev_flow;
-    struct {
+    struct batch_pkt_execute batch;
+
+    struct miniflowkey{
         struct miniflow flow;
         uint32_t buf[FLOW_U32S];
-    } key;
+    } keys[2];
 
-    if (ofpbuf_size(packet) < ETH_HEADER_LEN) {
-        ofpbuf_delete(packet);
-        return;
+    /* To handle a batch of packets we need to store two miniflow 'keys':
+     * - one to extract the miniflow for the new packet,
+     * - the other to remember the miniflow for the current batch
+     *   (it may be needed for executing certain actions)
+     *
+     * To avoid a copy we use the 'mfk' pointer, which points to the memory
+     * that should be used to extract the miniflow for the new packet.
+     * Every time a new batch is initialized, the mfk pointer changes. */
+
+    struct miniflowkey *mfk = &keys[0];
+
+    int i;
+
+    batch.flow = NULL;
+
+    for (i = 0; i < ARRAY_SIZE(keys); i++) {
+        miniflow_initialize(&keys[i].flow, keys[i].buf);
     }
-    miniflow_initialize(&key.flow, key.buf);
-    miniflow_extract(packet, md, &key.flow);
 
-    netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
-    if (netdev_flow) {
-        struct dp_netdev_actions *actions;
+    for (i = 0; i < cnt; i++) {
+        struct dp_netdev_flow *netdev_flow;
 
-        dp_netdev_flow_used(netdev_flow, packet, &key.flow);
+        if (ofpbuf_size(packets[i]) < ETH_HEADER_LEN) {
+            ofpbuf_delete(packets[i]);
+            continue;
+        }
+
+        miniflow_extract(packets[i], md, &mfk->flow);
+        packets[i]->md = *md;
+
+        netdev_flow = dp_netdev_lookup_flow(dp, &mfk->flow);
 
-        actions = dp_netdev_flow_get_actions(netdev_flow);
-        dp_netdev_execute_actions(dp, &key.flow, packet, true, md,
-                                  actions->actions, actions->size);
-        dp_netdev_count_packet(dp, DP_STAT_HIT);
-    } else if (dp->handler_queues) {
-        dp_netdev_count_packet(dp, DP_STAT_MISS);
-        dp_netdev_output_userspace(dp, packet,
-                                   miniflow_hash_5tuple(&key.flow, 0)
-                                   % dp->n_handlers,
-                                   DPIF_UC_MISS, &key.flow, NULL);
+        if (netdev_flow) {
+            if (!batch.flow) {
+                packet_batch_init(&batch, netdev_flow, packets[i], &mfk->flow);
+                mfk = (batch.mf == &keys[0].flow) ? &keys[1] : &keys[0];
+            } else if (batch.flow == netdev_flow) {
+                packet_batch_update(&batch, packets[i], &mfk->flow);
+            } else {
+                packet_batch_execute(&batch, dp);
+                packet_batch_init(&batch, netdev_flow, packets[i], &mfk->flow);
+                mfk = (batch.mf == &keys[0].flow) ? &keys[1] : &keys[0];
+            }
+        } else if (dp->handler_queues) {
+            dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
+            dp_netdev_output_userspace(dp, &packets[i], 1,
+                                       miniflow_hash_5tuple(&mfk->flow, 0)
+                                       % dp->n_handlers,
+                                       DPIF_UC_MISS, &mfk->flow, NULL);
+        }
+    }
+
+    if (batch.flow) {
+        packet_batch_execute(&batch, dp);
     }
 }
 
 static void
-dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
-                     struct pkt_metadata *md)
+dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
+                     int cnt, odp_port_t port_no)
 {
     uint32_t *recirc_depth = recirc_depth_get();
+    struct pkt_metadata md = PKT_METADATA_INITIALIZER(port_no);
 
     *recirc_depth = 0;
-    dp_netdev_input(dp, packet, md);
+    dp_netdev_input(dp, packets, cnt, &md);
 }
 
 static int
-dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
-                           int queue_no, int type, const struct miniflow *key,
-                           const struct nlattr *userdata)
+dp_netdev_queue_userspace_packet(struct dp_netdev_queue *q,
+                                 struct ofpbuf *packet, int type,
+                                 const struct miniflow *key,
+                                 const struct nlattr *userdata)
+OVS_REQUIRES(q->mutex)
 {
-    struct dp_netdev_queue *q;
-    int error;
-
-    fat_rwlock_rdlock(&dp->queue_rwlock);
-    q = &dp->handler_queues[queue_no];
-    ovs_mutex_lock(&q->mutex);
     if (q->head - q->tail < MAX_QUEUE_LEN) {
         struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
         struct dpif_upcall *upcall = &u->upcall;
@@ -2073,18 +2149,42 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
         /* Put userdata. */
         if (userdata) {
             upcall->userdata = ofpbuf_put(buf, userdata,
-                                          NLA_ALIGN(userdata->nla_len));
+                    NLA_ALIGN(userdata->nla_len));
         }
 
         upcall->packet = *packet;
 
         seq_change(q->seq);
 
-        error = 0;
+        return 0;
     } else {
-        dp_netdev_count_packet(dp, DP_STAT_LOST);
         ofpbuf_delete(packet);
-        error = ENOBUFS;
+        return ENOBUFS;
+    }
+
+}
+
+static int
+dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **packets,
+                           int cnt, int queue_no, int type,
+                           const struct miniflow *key,
+                           const struct nlattr *userdata)
+{
+    struct dp_netdev_queue *q;
+    int error;
+    int i;
+
+    fat_rwlock_rdlock(&dp->queue_rwlock);
+    q = &dp->handler_queues[queue_no];
+    ovs_mutex_lock(&q->mutex);
+    for (i = 0; i < cnt; i++) {
+        struct ofpbuf *packet = packets[i];
+
+        error = dp_netdev_queue_userspace_packet(q, packet, type, key,
+                                                 userdata);
+        if (error == ENOBUFS) {
+            dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
+        }
     }
     ovs_mutex_unlock(&q->mutex);
     fat_rwlock_unlock(&dp->queue_rwlock);
@@ -2098,8 +2198,7 @@ struct dp_netdev_execute_aux {
 };
 
 static void
-dp_execute_cb(void *aux_, struct ofpbuf *packet,
-              struct pkt_metadata *md,
+dp_execute_cb(void *aux_, struct ofpbuf **packets, int cnt,
               const struct nlattr *a, bool may_steal)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
@@ -2107,27 +2206,47 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     int type = nl_attr_type(a);
     struct dp_netdev_port *p;
     uint32_t *depth = recirc_depth_get();
+    int i;
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
         p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
         if (p) {
-            netdev_send(p->netdev, &packet, 1, may_steal);
+            netdev_send(p->netdev, packets, cnt, may_steal);
         }
         break;
 
     case OVS_ACTION_ATTR_USERSPACE: {
-        struct ofpbuf *userspace_packet;
         const struct nlattr *userdata;
 
         userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
-        userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
 
-        dp_netdev_output_userspace(aux->dp, userspace_packet,
-                                   miniflow_hash_5tuple(aux->key, 0)
-                                       % aux->dp->n_handlers,
-                                   DPIF_UC_ACTION, aux->key,
-                                   userdata);
+        for (i = 0; i < cnt; i++) {
+            struct ofpbuf *userspace_packet;
+            struct miniflowkey {
+                struct miniflow flow;
+                uint32_t buf[FLOW_U32S];
+            } key;
+
+            const struct miniflow *mfk = aux->key;
+
+            if (i != 0) {
+                /* The miniflow that we are passed is the miniflow of the first
+                 * packet. If this is not the first packet reextract the
+                 * miniflow */
+
+                miniflow_initialize(&key.flow, key.buf);
+                miniflow_extract(packets[i], &packets[i]->md, &key.flow);
+                mfk = &key.flow;
+            }
+
+            userspace_packet = may_steal ? packets[i]: ofpbuf_clone(packets[i]);
+            dp_netdev_output_userspace(aux->dp, &userspace_packet, 1,
+                                       miniflow_hash_5tuple(mfk, 0)
+                                           % aux->dp->n_handlers,
+                                       DPIF_UC_ACTION, mfk,
+                                       userdata);
+        }
         break;
     }
 
@@ -2136,33 +2255,59 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
         uint32_t hash;
 
         hash_act = nl_attr_get(a);
-        if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
-            /* Hash need not be symmetric, nor does it need to include
-             * L2 fields. */
-            hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis);
+
+        for (i = 0; i < cnt; i++) {
+            struct miniflowkey {
+                struct miniflow flow;
+                uint32_t buf[FLOW_U32S];
+            } key;
+
+            const struct miniflow *mfk = aux->key;
+
+            if (i != 0) {
+                /* The miniflow that we are passed is the miniflow of the first
+                 * packet. If this is not the first packet reextract the
+                 * miniflow */
+
+                /* TODO: this is slow. Use RSS hash in the future */
+                miniflow_initialize(&key.flow, key.buf);
+                miniflow_extract(packets[i], &packets[i]->md, &key.flow);
+                mfk = &key.flow;
+            }
+
+            if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
+                /* Hash need not be symmetric, nor does it need to include
+                 * L2 fields. */
+                hash = miniflow_hash_5tuple(mfk, hash_act->hash_basis);
+            } else {
+                VLOG_WARN("Unknown hash algorithm specified for the hash action.");
+                hash = 2;
+            }
+
             if (!hash) {
                 hash = 1; /* 0 is not valid */
             }
-
-        } else {
-            VLOG_WARN("Unknown hash algorithm specified for the hash action.");
-            hash = 2;
+            packets[i]->md.dp_hash = hash;
         }
-
-        md->dp_hash = hash;
         break;
     }
 
     case OVS_ACTION_ATTR_RECIRC:
         if (*depth < MAX_RECIRC_DEPTH) {
-            struct pkt_metadata recirc_md = *md;
-            struct ofpbuf *recirc_packet;
-
-            recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
-            recirc_md.recirc_id = nl_attr_get_u32(a);
 
             (*depth)++;
-            dp_netdev_input(aux->dp, recirc_packet, &recirc_md);
+            for (i = 0; i < cnt; i++) {
+                struct ofpbuf *recirc_pkt;
+
+                recirc_pkt = (may_steal) ? packets[i]
+                                         : ofpbuf_clone(packets[i]);
+
+                recirc_pkt->md.recirc_id = nl_attr_get_u32(a);
+
+                /* TODO: dp_netdev_input makes again an unnecessary copy
+                 * of the metadata into the ofpbuf itself */
+                dp_netdev_input(aux->dp, &recirc_pkt, 1, &recirc_pkt->md);
+            }
             (*depth)--;
 
             break;
@@ -2185,14 +2330,13 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
 
 static void
 dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key,
-                          struct ofpbuf *packet, bool may_steal,
-                          struct pkt_metadata *md,
+                          struct ofpbuf **packets, int cnt, bool may_steal,
                           const struct nlattr *actions, size_t actions_len)
 {
     struct dp_netdev_execute_aux aux = {dp, key};
 
-    odp_execute_actions(&aux, packet, may_steal, md,
-                        actions, actions_len, dp_execute_cb);
+    odp_execute_actions(&aux, packets, cnt, may_steal, actions, actions_len,
+                        dp_execute_cb);
 }
 
 const struct dpif_class dpif_netdev_class = {
diff --git a/lib/dpif.c b/lib/dpif.c
index ac73be1..17e152b 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1058,13 +1058,15 @@ struct dpif_execute_helper_aux {
 /* This is called for actions that need the context of the datapath to be
  * meaningful. */
 static void
-dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
-                       struct pkt_metadata *md,
+dpif_execute_helper_cb(void *aux_, struct ofpbuf **packets, int cnt,
                        const struct nlattr *action, bool may_steal OVS_UNUSED)
 {
     struct dpif_execute_helper_aux *aux = aux_;
     struct dpif_execute execute;
     int type = nl_attr_type(action);
+    struct ofpbuf * packet = packets[0];
+
+    ovs_assert(cnt == 1);
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
@@ -1073,7 +1075,6 @@ dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
         execute.actions = action;
         execute.actions_len = NLA_ALIGN(action->nla_len);
         execute.packet = packet;
-        execute.md = *md;
         execute.needs_help = false;
         aux->error = aux->dpif->dpif_class->execute(aux->dpif, &execute);
         break;
@@ -1103,7 +1104,8 @@ dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute)
 
     COVERAGE_INC(dpif_execute_with_help);
 
-    odp_execute_actions(&aux, execute->packet, false, &execute->md,
+    execute->packet->md = execute->md;
+    odp_execute_actions(&aux, &execute->packet, 1, false,
                         execute->actions, execute->actions_len,
                         dpif_execute_helper_cb);
     return aux.error;
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index cc18536..5709bf8 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -152,14 +152,13 @@ odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
 }
 
 static void
-odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
-                      struct pkt_metadata *,
+odp_execute_actions__(void *dp, struct ofpbuf **packets, int cnt, bool steal,
                       const struct nlattr *actions, size_t actions_len,
                       odp_execute_cb dp_execute_action, bool more_actions);
 
 static void
 odp_execute_sample(void *dp, struct ofpbuf *packet, bool steal,
-                   struct pkt_metadata *md, const struct nlattr *action,
+                   const struct nlattr *action,
                    odp_execute_cb dp_execute_action, bool more_actions)
 {
     const struct nlattr *subactions = NULL;
@@ -187,20 +186,21 @@ odp_execute_sample(void *dp, struct ofpbuf *packet, bool steal,
         }
     }
 
-    odp_execute_actions__(dp, packet, steal, md, nl_attr_get(subactions),
+    odp_execute_actions__(dp, &packet, 1, steal, nl_attr_get(subactions),
                           nl_attr_get_size(subactions), dp_execute_action,
                           more_actions);
 }
 
 static void
-odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
-                      struct pkt_metadata *md,
+odp_execute_actions__(void *dp, struct ofpbuf **packets, int cnt, bool steal,
                       const struct nlattr *actions, size_t actions_len,
                       odp_execute_cb dp_execute_action, bool more_actions)
 {
     const struct nlattr *a;
     unsigned int left;
 
+    int i;
+
     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
         int type = nl_attr_type(a);
 
@@ -215,7 +215,7 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
                 bool may_steal = steal && (!more_actions
                                            && left <= NLA_ALIGN(a->nla_len)
                                            && type != OVS_ACTION_ATTR_RECIRC);
-                dp_execute_action(dp, packet, md, a, may_steal);
+                dp_execute_action(dp, packets, cnt, a, may_steal);
             }
             break;
 
@@ -230,9 +230,11 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
                 struct flow flow;
                 uint32_t hash;
 
-                flow_extract(packet, md, &flow);
-                hash = flow_hash_5tuple(&flow, hash_act->hash_basis);
-                md->dp_hash = hash ? hash : 1;
+                for (i = 0; i < cnt; i++) {
+                    flow_extract(packets[i], &packets[i]->md, &flow);
+                    hash = flow_hash_5tuple(&flow, hash_act->hash_basis);
+                    packets[i]->md.dp_hash = hash ? hash : 1;
+                }
             } else {
                 /* Assert on unknown hash algorithm.  */
                 OVS_NOT_REACHED();
@@ -242,31 +244,46 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
 
         case OVS_ACTION_ATTR_PUSH_VLAN: {
             const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
-            eth_push_vlan(packet, htons(ETH_TYPE_VLAN), vlan->vlan_tci);
+
+            for (i = 0; i < cnt; i++) {
+                eth_push_vlan(packets[i], htons(ETH_TYPE_VLAN), vlan->vlan_tci);
+            }
             break;
         }
 
         case OVS_ACTION_ATTR_POP_VLAN:
-            eth_pop_vlan(packet);
+            for (i = 0; i < cnt; i++) {
+                eth_pop_vlan(packets[i]);
+            }
             break;
 
         case OVS_ACTION_ATTR_PUSH_MPLS: {
             const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
-            push_mpls(packet, mpls->mpls_ethertype, mpls->mpls_lse);
+            for (i = 0; i < cnt; i++) {
+                push_mpls(packets[i], mpls->mpls_ethertype, mpls->mpls_lse);
+            }
             break;
          }
 
         case OVS_ACTION_ATTR_POP_MPLS:
-            pop_mpls(packet, nl_attr_get_be16(a));
+            for (i = 0; i < cnt; i++) {
+                pop_mpls(packets[i], nl_attr_get_be16(a));
+            }
             break;
 
         case OVS_ACTION_ATTR_SET:
-            odp_execute_set_action(packet, nl_attr_get(a), md);
+            for (i = 0; i < cnt; i++) {
+                odp_execute_set_action(packets[i], nl_attr_get(a),
+                                       &packets[i]->md);
+            }
             break;
 
         case OVS_ACTION_ATTR_SAMPLE:
-            odp_execute_sample(dp, packet, steal, md, a, dp_execute_action,
-                               more_actions || left > NLA_ALIGN(a->nla_len));
+            for (i = 0; i < cnt; i++) {
+                odp_execute_sample(dp, packets[i], steal, a, dp_execute_action,
+                                   more_actions ||
+                                   left > NLA_ALIGN(a->nla_len));
+            }
             break;
 
         case OVS_ACTION_ATTR_UNSPEC:
@@ -277,16 +294,19 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
 }
 
 void
-odp_execute_actions(void *dp, struct ofpbuf *packet, bool steal,
-                    struct pkt_metadata *md,
+odp_execute_actions(void *dp, struct ofpbuf **packets, int cnt, bool steal,
                     const struct nlattr *actions, size_t actions_len,
                     odp_execute_cb dp_execute_action)
 {
-    odp_execute_actions__(dp, packet, steal, md, actions, actions_len,
+    odp_execute_actions__(dp, packets, cnt, steal, actions, actions_len,
                           dp_execute_action, false);
 
     if (!actions_len && steal) {
         /* Drop action. */
-        ofpbuf_delete(packet);
+        int i = 0;
+
+        for (i = 0; i < cnt; i++) {
+            ofpbuf_delete(packets[i]);
+        }
     }
 }
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 91f0c51..1f0c4de 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -27,16 +27,14 @@ struct nlattr;
 struct ofpbuf;
 struct pkt_metadata;
 
-typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet,
-                               struct pkt_metadata *,
+typedef void (*odp_execute_cb)(void *dp, struct ofpbuf **packets, int cnt,
                                const struct nlattr *action, bool may_steal);
 
 /* Actions that need to be executed in the context of a datapath are handed
  * to 'dp_execute_action', if non-NULL.  Currently this is called only for
  * actions OVS_ACTION_ATTR_OUTPUT and OVS_ACTION_ATTR_USERSPACE so
  * 'dp_execute_action' needs to handle only these. */
-void odp_execute_actions(void *dp, struct ofpbuf *packet, bool steal,
-                    struct pkt_metadata *,
+void odp_execute_actions(void *dp, struct ofpbuf **packets, int cnt, bool steal,
                     const struct nlattr *actions, size_t actions_len,
                     odp_execute_cb dp_execute_action);
 #endif
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 446593d..ff1fbf2 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -204,6 +204,7 @@ ofpbuf_clone_with_headroom(const struct ofpbuf *buffer, size_t headroom)
     new_buffer->l2_5_ofs = buffer->l2_5_ofs;
     new_buffer->l3_ofs = buffer->l3_ofs;
     new_buffer->l4_ofs = buffer->l4_ofs;
+    new_buffer->md = buffer->md;
 
     return new_buffer;
 }
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 13a3e9d..9f4b30f 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -76,6 +76,8 @@ struct ofpbuf {
                                    or UINT16_MAX. */
     enum ofpbuf_source source;  /* Source of memory allocated as 'base'. */
     struct list list_node;      /* Private list element for use by owner. */
+
+    struct pkt_metadata md;     /* Packet metadata */
 };
 
 static inline void * ofpbuf_data(const struct ofpbuf *);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index eded9d8..792476c 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -2644,7 +2644,6 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
 {
     struct ofproto_packet_in *pin;
     struct ofpbuf *packet;
-    struct pkt_metadata md = PKT_METADATA_INITIALIZER(0);
 
     ctx->xout->slow |= SLOW_CONTROLLER;
     if (!ctx->xin->packet) {
@@ -2652,12 +2651,13 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
     }
 
     packet = ofpbuf_clone(ctx->xin->packet);
+    packet->md = PKT_METADATA_INITIALIZER(0);
 
     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
                                           &ctx->xout->odp_actions,
                                           &ctx->xout->wc);
 
-    odp_execute_actions(NULL, packet, false, &md,
+    odp_execute_actions(NULL, &packet, 1, false,
                         ofpbuf_data(&ctx->xout->odp_actions),
                         ofpbuf_size(&ctx->xout->odp_actions), NULL);
 
-- 
2.0.0.rc2




More information about the dev mailing list