[ovs-dev] [PATCH v2 2/2] lib/classifier: Optimize megaflows for single rule case.
Jarno Rajahalme
jrajahalme at nicira.com
Mon May 19 19:35:46 UTC 2014
When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.
This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule. For a miss that would be
all the rules. Implementing this is expensive for a more than a few
rules. This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.
Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
lib/classifier.c | 61 +++++++++++++++++++++++--------------------------
lib/flow.h | 18 +++++++++++++--
tests/classifier.at | 10 ++++----
tests/ofproto-dpif.at | 10 ++++----
4 files changed, 55 insertions(+), 44 deletions(-)
diff --git a/lib/classifier.c b/lib/classifier.c
index 2004266..ae34019 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1638,14 +1638,24 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
static inline bool
miniflow_and_mask_matches_flow(const struct miniflow *flow,
const struct minimask *mask,
- const struct flow *target)
-{
- const uint32_t *flowp = miniflow_get_u32_values(flow);
- const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
- uint32_t target_u32;
-
- FLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
- if ((*flowp++ ^ target_u32) & *maskp++) {
+ const struct flow *target,
+ struct flow_wildcards *wc)
+{
+ const ovs_be32 *flowp = miniflow_get_be32_values(flow);
+ const ovs_be32 *maskp = miniflow_get_be32_values(&mask->masks);
+ uint32_t idx;
+
+ MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
+ ovs_be32 diff = (*flowp++ ^ FLOW_BE32_VALUE(target, idx)) & *maskp++;
+
+ if (diff) {
+ /* Only unwildcard if none of the differing bits is already
+ * exact-matched. */
+ if (wc && !(((ovs_be32 *)&wc->masks)[idx] & diff)) {
+ /* Keep the highest bit of the difference. */
+ diff = htonl(1 << (log_2_floor(ntohl(diff))));
+ ((ovs_be32 *)&wc->masks)[idx] |= diff;
+ }
return false;
}
}
@@ -1661,7 +1671,7 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow,
HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
- flow)) {
+ flow, NULL)) {
return rule;
}
}
@@ -1675,7 +1685,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
struct flow_wildcards *wc)
{
uint32_t basis = 0, hash;
- struct cls_match *rule = NULL;
+ struct cls_match *rule;
int i;
struct range ofs;
@@ -1705,23 +1715,17 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
}
/* If we have narrowed down to a single rule already, check whether
- * that rule matches. If it does match, then we're done. If it does
- * not match, then we know that we will never get a match, but we do
- * not yet know how many wildcards we need to fold into 'wc' so we
- * continue iterating through indices to find that out. (We won't
- * waste time calling miniflow_and_mask_matches_flow() again because
- * we've set 'rule' nonnull.)
- *
- * This check shows a measurable benefit with non-trivial flow tables.
+ * that rule matches. Either way, we're done.
*
* (Rare) hash collisions may cause us to miss the opportunity for this
* optimization. */
- if (!inode->s && !rule) {
+ if (!inode->s) {
ASSIGN_CONTAINER(rule, inode - i, index_nodes);
if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
- flow)) {
+ flow, wc)) {
goto out;
}
+ goto range_out;
}
}
ofs.end = FLOW_U32S;
@@ -1729,16 +1733,9 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
goto range_out;
}
- if (!rule) {
- /* Multiple potential matches exist, look for one. */
- hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
- ofs.end, &basis);
- rule = find_match(subtable, flow, hash);
- } else {
- /* We already narrowed the matching candidates down to just 'rule',
- * but it didn't match. */
- rule = NULL;
- }
+ hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
+ ofs.end, &basis);
+ rule = find_match(subtable, flow, hash);
if (!rule && subtable->ports_mask_len) {
/* Ports are always part of the final range, if any.
* No match was found for the ports. Use the ports trie to figure out
@@ -1756,12 +1753,12 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
ofs.start = TP_PORTS_OFS32;
goto range_out;
}
- out:
+out:
/* Must unwildcard all the fields, as they were looked at. */
flow_wildcards_fold_minimask(wc, &subtable->mask);
return rule;
- range_out:
+range_out:
/* Must unwildcard the fields looked up so far, if any. */
if (ofs.start) {
flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start);
diff --git a/lib/flow.h b/lib/flow.h
index 0f3ffde..bc81d89 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -411,24 +411,38 @@ void miniflow_destroy(struct miniflow *);
void miniflow_expand(const struct miniflow *, struct flow *);
+#define FLOW_U32_VALUE(FLOW, INDEX) ((const uint32_t *)(FLOW))[INDEX]
+#define FLOW_BE32_VALUE(FLOW, INDEX) ((const ovs_be32 *)(FLOW))[INDEX]
+
static inline uint32_t
flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value)
{
if (map) {
- *value = ((const uint32_t *)flow)[raw_ctz(map)];
+ *value = FLOW_U32_VALUE(flow, raw_ctz(map));
return true;
}
return false;
}
/* Iterate through all flow u32 values specified by 'MAP'.
- * This works as the first statement in a block.*/
+ *
+ * This works as the first statement in a block.
+ * It would be nice to use C99 declaration in the iteration statement
+ * instead, but that is still prohibited by CodingStyle. */
#define FLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP) \
uint64_t map_; \
for (map_ = (MAP); \
flow_get_next_in_map(FLOW, map_, &(VALUE)); \
map_ = zero_rightmost_1bit(map_))
+/* Iterate through all struct flow u32 indices specified by 'MAP'.
+ * This works as the first statement in a block.*/
+#define MAP_FOR_EACH_INDEX(U32IDX, MAP) \
+ uint64_t map_; \
+ for (map_ = (MAP); \
+ ((U32IDX) = ctz64(map_)) < FLOW_U32S; \
+ map_ = zero_rightmost_1bit(map_))
+
#define FLOW_U32_SIZE(FIELD) \
DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint32_t))
diff --git a/tests/classifier.at b/tests/classifier.at
index 7bd0493..61b5024 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -45,7 +45,7 @@ Datapath actions: 1
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
- [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+ [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=128.0.0.0/1,nw_frag=no
Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
@@ -55,7 +55,7 @@ Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
- [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
+ [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x0/0x10
Datapath actions: 2
])
OVS_VSWITCHD_STOP
@@ -83,7 +83,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
# nw_dst and nw_src should be on by default
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
- [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+ [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
Datapath actions: drop
])
@@ -98,7 +98,7 @@ AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst,nw_dst], [1], [],
AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst], [0])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
- [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+ [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
@@ -113,7 +113,7 @@ Datapath actions: drop
])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
- [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
+ [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0x40,tp_dst=0x40/0xfff0
Datapath actions: 3
])
AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=none], [0])
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index fad8f2c..22db682 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -3874,7 +3874,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
sleep 1
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -3892,7 +3892,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
sleep 1
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.4,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
])
OVS_VSWITCHD_STOP
@@ -3912,7 +3912,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
sleep 1
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:1:2:3:4:5/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff,dst=fe80::2/::,label=0/0,proto=10/0,tclass=0x70/0,hlimit=128/0,frag=no/0xff), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/ffff:ffff:ffff:fffc::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/0:0:0:4::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: <del>
])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -4096,7 +4096,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
sleep 1
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -4297,7 +4297,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
sleep 1
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.4,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8/0,code=0/0), actions: <del>
])
OVS_VSWITCHD_STOP
--
1.7.10.4
More information about the dev
mailing list