[ovs-dev] [PATCH v10 3/7] dpif-netdev: retrieve flow directly from the flow mark
Shahaf Shuler
shahafs at mellanox.com
Fri May 18 09:14:13 UTC 2018
From: Yuanhan Liu <yliu at fridaylinux.org>
So that we could skip some very costly CPU operations, including but
not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus,
performance could be greatly improved.
A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and
1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that
260% performance boost.
Note that though the heavy miniflow_extract is skipped, we still have
to do per packet checking, due to we have to check the tcp_flags.
Co-authored-by: Finn Christensen <fc at napatech.com>
Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
Signed-off-by: Finn Christensen <fc at napatech.com>
Signed-off-by: Shahaf Shuler <shahafs at mellanox.com>
---
lib/dp-packet.h | 13 ++++++++++++
lib/dpif-netdev.c | 44 ++++++++++++++++++++++++++++++++++------
lib/flow.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++
lib/flow.h | 1 +
4 files changed, 106 insertions(+), 6 deletions(-)
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 21c8ca525..dd3f17b9f 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
#define reset_dp_packet_checksum_ol_flags(arg)
#endif
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+ uint32_t *mark OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+ if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
+ *mark = p->mbuf.hash.fdir.hi;
+ return true;
+ }
+#endif
+ return false;
+}
+
enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0f69d7d96..4748f70fa 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2118,6 +2118,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
}
}
+static struct dp_netdev_flow *
+mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
+ const uint32_t mark)
+{
+ struct dp_netdev_flow *flow;
+
+ CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),
+ &flow_mark.mark_to_flow) {
+ if (flow->mark == mark && flow->pmd_id == pmd->core_id &&
+ flow->dead == false) {
+ return flow;
+ }
+ }
+
+ return NULL;
+}
+
static void
dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_flow *flow)
@@ -5364,10 +5381,10 @@ struct packet_batch_per_flow {
static inline void
packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
struct dp_packet *packet,
- const struct miniflow *mf)
+ uint16_t tcp_flags)
{
batch->byte_count += dp_packet_size(packet);
- batch->tcp_flags |= miniflow_get_tcp_flags(mf);
+ batch->tcp_flags |= tcp_flags;
batch->array.packets[batch->array.count++] = packet;
}
@@ -5401,7 +5418,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
static inline void
dp_netdev_queue_batches(struct dp_packet *pkt,
- struct dp_netdev_flow *flow, const struct miniflow *mf,
+ struct dp_netdev_flow *flow, uint16_t tcp_flags,
struct packet_batch_per_flow *batches,
size_t *n_batches)
{
@@ -5412,7 +5429,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
packet_batch_per_flow_init(batch, flow);
}
- packet_batch_per_flow_update(batch, pkt, mf);
+ packet_batch_per_flow_update(batch, pkt, tcp_flags);
}
/* Try to process all ('cnt') the 'packets' using only the exact match cache
@@ -5443,6 +5460,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
const size_t cnt = dp_packet_batch_size(packets_);
uint32_t cur_min;
int i;
+ uint16_t tcp_flags;
atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
pmd_perf_update_counter(&pmd->perf_stats,
@@ -5451,6 +5469,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
struct dp_netdev_flow *flow;
+ uint32_t mark;
if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
dp_packet_delete(packet);
@@ -5458,6 +5477,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
continue;
}
+ if (dp_packet_has_flow_mark(packet, &mark)) {
+ flow = mark_to_flow_find(pmd, mark);
+ if (flow) {
+ tcp_flags = parse_tcp_flags(packet);
+ dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
+ n_batches);
+ continue;
+ }
+ }
+
if (i != cnt - 1) {
struct dp_packet **packets = packets_->packets;
/* Prefetch next packet data and metadata. */
@@ -5483,7 +5512,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
flow = NULL;
}
if (OVS_LIKELY(flow)) {
- dp_netdev_queue_batches(packet, flow, &key->mf, batches,
+ tcp_flags = miniflow_get_tcp_flags(&key->mf);
+ dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
n_batches);
} else {
/* Exact match cache missed. Group missed packets together at
@@ -5670,7 +5700,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
flow = dp_netdev_flow_cast(rules[i]);
emc_probabilistic_insert(pmd, &keys[i], flow);
- dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
+ dp_netdev_queue_batches(packet, flow,
+ miniflow_get_tcp_flags(&keys[i].mf),
+ batches, n_batches);
}
pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
diff --git a/lib/flow.c b/lib/flow.c
index 2c99a5ff4..5413a4ceb 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_, size_t size)
return parse_ethertype(&data, &size);
}
+uint16_t
+parse_tcp_flags(struct dp_packet *packet)
+{
+ const void *data = dp_packet_data(packet);
+ size_t size = dp_packet_size(packet);
+ ovs_be16 dl_type;
+ uint8_t nw_frag = 0, nw_proto = 0;
+
+ if (packet->packet_type != htonl(PT_ETH)) {
+ return 0;
+ }
+
+ data_pull(&data, &size, ETH_ADDR_LEN * 2);
+ dl_type = parse_ethertype(&data, &size);
+ if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
+ const struct ip_header *nh = data;
+ int ip_len;
+ uint16_t tot_len;
+
+ if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
+ return 0;
+ }
+ nw_proto = nh->ip_proto;
+ nw_frag = ipv4_get_nw_frag(nh);
+
+ size = tot_len; /* Never pull padding. */
+ data_pull(&data, &size, ip_len);
+ } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+ const struct ovs_16aligned_ip6_hdr *nh = data;
+
+ if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
+ return 0;
+ }
+ data_pull(&data, &size, sizeof *nh);
+
+ size = ntohs(nh->ip6_plen); /* Never pull padding. */
+ if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
+ return 0;
+ }
+ nw_proto = nh->ip6_nxt;
+ } else {
+ return 0;
+ }
+
+ if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP &&
+ size >= TCP_HEADER_LEN) {
+ const struct tcp_header *tcp = data;
+
+ return TCP_FLAGS_BE16(tcp->tcp_ctl);
+ }
+
+ return 0;
+}
+
/* For every bit of a field that is wildcarded in 'wildcards', sets the
* corresponding bit in 'flow' to zero. */
void
diff --git a/lib/flow.h b/lib/flow.h
index af829319e..888f8cc9b 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
uint8_t *nw_frag);
ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
+uint16_t parse_tcp_flags(struct dp_packet *packet);
static inline uint64_t
flow_get_xreg(const struct flow *flow, int idx)
--
2.12.0
More information about the dev
mailing list