[ovs-dev] [PATCH 5/8] lib/classifier: Optimize megaflows for single rule case.

Jarno Rajahalme jrajahalme at nicira.com
Mon Jun 9 18:53:52 UTC 2014


When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.

This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule.  For a miss that would be
all the rules.  Implementing this is expensive for a more than a few
rules.  This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
 lib/classifier.c      |   57 +++++++++++++++++++++++--------------------------
 lib/flow.h            |   12 ++++++++++-
 tests/classifier.at   |   10 ++++-----
 tests/ofproto-dpif.at |   10 ++++-----
 4 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index f448ded..b5416c6 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1379,19 +1379,29 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
  * value has the correct value in 'target'.
  *
  * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
- * target, mask) but it is faster because of the invariant that
- * flow->map and mask->masks.map are the same. */
+ * target, mask) but this is faster because of the invariant that
+ * flow->map and mask->masks.map are the same, and that this version
+ * takes the 'wc'. */
 static inline bool
 miniflow_and_mask_matches_flow(const struct miniflow *flow,
                                const struct minimask *mask,
-                               const struct flow *target)
+                               const struct flow *target,
+                               struct flow_wildcards *wc)
 {
     const uint32_t *flowp = miniflow_get_u32_values(flow);
     const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
-    uint32_t target_u32;
+    uint32_t idx;
 
-    FLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
-        if ((*flowp++ ^ target_u32) & *maskp++) {
+    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
+        uint32_t diff = (*flowp++ ^ FLOW_U32_VALUE(target, idx)) & *maskp++;
+
+        if (diff) {
+            /* Only unwildcard if none of the differing bits is already
+             * exact-matched. */
+            if (wc && !(FLOW_U32_VALUE(&wc->masks, idx) & diff)) {
+                /* Keep one bit of the difference. */
+                FLOW_U32_VALUE(&wc->masks, idx) |= rightmost_1bit(diff);
+            }
             return false;
         }
     }
@@ -1407,7 +1417,7 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow,
 
     HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
         if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
-                                           flow)) {
+                                           flow, NULL)) {
             return rule;
         }
     }
@@ -1421,7 +1431,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
               struct flow_wildcards *wc)
 {
     uint32_t basis = 0, hash;
-    struct cls_match *rule = NULL;
+    struct cls_match *rule;
     int i;
     struct range ofs;
 
@@ -1451,23 +1461,17 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
         }
 
         /* If we have narrowed down to a single rule already, check whether
-         * that rule matches.  If it does match, then we're done.  If it does
-         * not match, then we know that we will never get a match, but we do
-         * not yet know how many wildcards we need to fold into 'wc' so we
-         * continue iterating through indices to find that out.  (We won't
-         * waste time calling miniflow_and_mask_matches_flow() again because
-         * we've set 'rule' nonnull.)
-         *
-         * This check shows a measurable benefit with non-trivial flow tables.
+         * that rule matches.  Either way, we're done.
          *
          * (Rare) hash collisions may cause us to miss the opportunity for this
          * optimization. */
-        if (!inode->s && !rule) {
+        if (!inode->s) {
             ASSIGN_CONTAINER(rule, inode - i, index_nodes);
             if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
-                                               flow)) {
+                                               flow, wc)) {
                 goto out;
             }
+            goto range_out;
         }
     }
     ofs.end = FLOW_U32S;
@@ -1475,16 +1479,9 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
     if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
         goto range_out;
     }
-    if (!rule) {
-        /* Multiple potential matches exist, look for one. */
-        hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
-                                           ofs.end, &basis);
-        rule = find_match(subtable, flow, hash);
-    } else {
-        /* We already narrowed the matching candidates down to just 'rule',
-         * but it didn't match. */
-        rule = NULL;
-    }
+    hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
+                                       ofs.end, &basis);
+    rule = find_match(subtable, flow, hash);
     if (!rule && subtable->ports_mask_len) {
         /* Ports are always part of the final range, if any.
          * No match was found for the ports.  Use the ports trie to figure out
@@ -1502,12 +1499,12 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
         ofs.start = TP_PORTS_OFS32;
         goto range_out;
     }
- out:
+out:
     /* Must unwildcard all the fields, as they were looked at. */
     flow_wildcards_fold_minimask(wc, &subtable->mask);
     return rule;
 
- range_out:
+range_out:
     /* Must unwildcard the fields looked up so far, if any. */
     if (ofs.start) {
         flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start);
diff --git a/lib/flow.h b/lib/flow.h
index 139e7f6..60501a4 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -411,11 +411,15 @@ void miniflow_destroy(struct miniflow *);
 
 void miniflow_expand(const struct miniflow *, struct flow *);
 
+/* These work as lvalues, too. */
+#define FLOW_U32_VALUE(FLOW, INDEX) ((uint32_t *)(FLOW))[INDEX]
+#define FLOW_BE32_VALUE(FLOW, INDEX) ((ovs_be32 *)(FLOW))[INDEX]
+
 static inline bool
 flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value)
 {
     if (map) {
-        *value = ((const uint32_t *)flow)[raw_ctz(map)];
+        *value = FLOW_U32_VALUE(flow, raw_ctz(map));
         return true;
     }
     return false;
@@ -427,6 +431,12 @@ flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value)
          flow_get_next_in_map(FLOW, map__, &(VALUE));  \
          map__ = zero_rightmost_1bit(map__))
 
+/* Iterate through all struct flow u32 indices specified by 'MAP'. */
+#define MAP_FOR_EACH_INDEX(U32IDX, MAP)         \
+    for (uint64_t map__ = (MAP);                \
+         ((U32IDX) = ctz64(map__)) < FLOW_U32S; \
+         map__ = zero_rightmost_1bit(map__))
+
 #define FLOW_U32_SIZE(FIELD)                                            \
     DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint32_t))
 
diff --git a/tests/classifier.at b/tests/classifier.at
index 7bd0493..710060f 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -45,7 +45,7 @@ Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=0.0.0.0/2.0.0.0,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
@@ -55,7 +55,7 @@ Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x1/0x1
 Datapath actions: 2
 ])
 OVS_VSWITCHD_STOP
@@ -83,7 +83,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 # nw_dst and nw_src should be on by default
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: drop
 ])
 
@@ -98,7 +98,7 @@ AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst,nw_dst], [1], [],
 AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst], [0])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
@@ -113,7 +113,7 @@ Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0x1,tp_dst=0x40/0xfff0
 Datapath actions: 3
 ])
 AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=none], [0])
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index e587d0d..8875c89 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -3915,7 +3915,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 sleep 1
 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
 skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
 ])
 OVS_VSWITCHD_STOP
 AT_CLEANUP
@@ -3933,7 +3933,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 sleep 1
 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.2,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
 skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
 ])
 OVS_VSWITCHD_STOP
@@ -3953,7 +3953,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 sleep 1
 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
 skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:1:2:3:4:5/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff,dst=fe80::2/::,label=0/0,proto=10/0,tclass=0x70/0,hlimit=128/0,frag=no/0xff), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/ffff:ffff:ffff:fffc::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/0:0:0:4::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: <del>
 ])
 OVS_VSWITCHD_STOP
 AT_CLEANUP
@@ -4137,7 +4137,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 sleep 1
 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
 skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
 ])
 OVS_VSWITCHD_STOP
 AT_CLEANUP
@@ -4338,7 +4338,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 sleep 1
 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
-skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
+skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.2,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: <del>
 skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8/0,code=0/0), actions: <del>
 ])
 OVS_VSWITCHD_STOP
-- 
1.7.10.4




More information about the dev mailing list