[ovs-dev] [PATCH v2] lib/classifier: Use a prefix tree to optimize ports wildcarding.

Jarno Rajahalme jrajahalme at nicira.com
Wed Apr 30 21:12:15 UTC 2014


Pushed to master,

  Jarno

On Apr 16, 2014, at 2:40 PM, Jarno Rajahalme <jrajahalme at nicira.com> wrote:

> Thanks for your review! I'll hold on to this until we have figured out when to enable this feature.
> 
>  Jarno
> 
>> On Apr 16, 2014, at 2:28 PM, Ethan Jackson <ethan at nicira.com> wrote:
>> 
>> Acked-by: Ethan Jackson <ethan at nicira.com>
>> 
>> I suppose my only worry with this is if we should hold off until the
>> exact match cache is added to the linux kernel datapath as well . . .
>> 
>> Ethan
>> 
>>> On Mon, Apr 14, 2014 at 1:14 PM, Jarno Rajahalme <jrajahalme at nicira.com> wrote:
>>> This should optimize port masks for megaflows for typical port usage
>>> in matches.  Each subtable has it's own trie if the subtable matches
>>> any of the ports bits.  This trie is consulted only after failing
>>> lookup to determine the number of bits that needs to be unwildcarded
>>> to guarantee that any packet that should match on any of the other
>>> rules will NOT match this megaflow.
>>> 
>>> This version is not configurable and is always on.
>>> 
>>> Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
>>> ---
>>> v2: Rebase to master.
>>> 
>>> lib/classifier.c    |   90 +++++++++++++++++++++++++++++++++++++++++++++++----
>>> lib/classifier.h    |    2 ++
>>> tests/classifier.at |    6 ++--
>>> 3 files changed, 89 insertions(+), 9 deletions(-)
>>> 
>>> diff --git a/lib/classifier.c b/lib/classifier.c
>>> index 55ca5ea..ef20bd1 100644
>>> --- a/lib/classifier.c
>>> +++ b/lib/classifier.c
>>> @@ -30,6 +30,10 @@
>>> 
>>> VLOG_DEFINE_THIS_MODULE(classifier);
>>> 
>>> +/* Ports trie depends on both ports sharing the same ovs_be32. */
>>> +#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
>>> +BUILD_ASSERT_DECL((offsetof(struct flow, tp_dst) / 4) == TP_PORTS_OFS32);
>>> +
>>> struct trie_ctx;
>>> static struct cls_subtable *find_subtable(const struct classifier *,
>>>                                          const struct minimask *);
>>> @@ -71,10 +75,16 @@ static void trie_init(struct classifier *, int trie_idx,
>>>                      const struct mf_field *);
>>> static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
>>>                                unsigned int *checkbits);
>>> -
>>> +static unsigned int trie_lookup_value(const struct trie_node *,
>>> +                                      const ovs_be32 value[],
>>> +                                      unsigned int *checkbits);
>>> static void trie_destroy(struct trie_node *);
>>> static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
>>> +static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
>>> +                               int mlen);
>>> static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
>>> +static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
>>> +                               int mlen);
>>> static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
>>>                                 unsigned int nbits);
>>> static bool mask_prefix_bits_set(const struct flow_wildcards *,
>>> @@ -389,6 +399,23 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
>>>                trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
>>>            }
>>>        }
>>> +
>>> +        /* Ports trie. */
>>> +        if (subtable->ports_mask_len) {
>>> +            ovs_be32 ports, mask, masked_ports;
>>> +
>>> +            /* We mask the value to be inserted to always have the wildcarded
>>> +             * bits in known (zero) state, so we can include them in comparison
>>> +             * and they will always match (== their original value does not
>>> +             * matter). */
>>> +            ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
>>> +                                                     TP_PORTS_OFS32);
>>> +            mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>>> +            masked_ports = ports & mask;
>>> +
>>> +            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
>>> +                               subtable->ports_mask_len);
>>> +        }
>>>    } else {
>>>        rule->partition = old_rule->partition;
>>>    }
>>> @@ -421,6 +448,17 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
>>> 
>>>    subtable = find_subtable(cls, &rule->match.mask);
>>> 
>>> +    if (subtable->ports_mask_len) {
>>> +        ovs_be32 ports, mask, masked_ports;
>>> +
>>> +        ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
>>> +                                                 TP_PORTS_OFS32);
>>> +        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>>> +        masked_ports = ports & mask;
>>> +
>>> +        trie_remove_prefix(&subtable->ports_trie,
>>> +                           &masked_ports, subtable->ports_mask_len);
>>> +    }
>>>    for (i = 0; i < cls->n_tries; i++) {
>>>        if (subtable->trie_plen[i]) {
>>>            trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
>>> @@ -859,6 +897,11 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
>>>                                                         cls->tries[i].field);
>>>    }
>>> 
>>> +    /* Ports trie. */
>>> +    subtable->ports_trie = NULL;
>>> +    subtable->ports_mask_len = 32
>>> +        - ctz32(ntohl((OVS_FORCE ovs_be32)minimask_get(mask, TP_PORTS_OFS32)));
>>> +
>>>    return subtable;
>>> }
>>> 
>>> @@ -867,6 +910,9 @@ destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
>>> {
>>>    int i;
>>> 
>>> +    /* Port tries. */
>>> +    trie_destroy(subtable->ports_trie);
>>> +
>>>    for (i = 0; i < subtable->n_indices; i++) {
>>>        hindex_destroy(&subtable->indices[i]);
>>>    }
>>> @@ -1121,6 +1167,23 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
>>>         * but it didn't match. */
>>>        rule = NULL;
>>>    }
>>> +    if (!rule && subtable->ports_mask_len) {
>>> +        /* Ports are always part of the final range, if any.
>>> +         * No match was found for the ports.  Use the ports trie to figure out
>>> +         * which ports bits to unwildcard. */
>>> +        unsigned int mbits;
>>> +        ovs_be32 value, mask;
>>> +
>>> +        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>>> +        value = ((ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
>>> +        trie_lookup_value(subtable->ports_trie, &value, &mbits);
>>> +
>>> +        ((ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
>>> +            mask & htonl(~0 << (32 - mbits));
>>> +
>>> +        ofs.start = TP_PORTS_OFS32;
>>> +        goto range_out;
>>> +    }
>>> out:
>>>    /* Must unwildcard all the fields, as they were looked at. */
>>>    flow_wildcards_fold_minimask(wc, &subtable->mask);
>>> @@ -1513,14 +1576,18 @@ minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
>>> static void
>>> trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
>>> {
>>> -    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
>>> +    trie_insert_prefix(&trie->root,
>>> +                       minimatch_get_prefix(&rule->match, trie->field), mlen);
>>> +}
>>> +
>>> +static void
>>> +trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen)
>>> +{
>>>    struct trie_node *node;
>>> -    struct trie_node **edge;
>>>    int ofs = 0;
>>> 
>>>    /* Walk the tree. */
>>> -    for (edge = &trie->root;
>>> -         (node = *edge) != NULL;
>>> +    for (; (node = *edge) != NULL;
>>>         edge = trie_next_edge(node, prefix, ofs)) {
>>>        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
>>>        ofs += eqbits;
>>> @@ -1561,16 +1628,25 @@ trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
>>> static void
>>> trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
>>> {
>>> -    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
>>> +    trie_remove_prefix(&trie->root,
>>> +                       minimatch_get_prefix(&rule->match, trie->field), mlen);
>>> +}
>>> +
>>> +/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
>>> + * in 'rule'. */
>>> +static void
>>> +trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen)
>>> +{
>>>    struct trie_node *node;
>>>    struct trie_node **edges[sizeof(union mf_value) * 8];
>>>    int depth = 0, ofs = 0;
>>> 
>>>    /* Walk the tree. */
>>> -    for (edges[depth] = &trie->root;
>>> +    for (edges[0] = root;
>>>         (node = *edges[depth]) != NULL;
>>>         edges[++depth] = trie_next_edge(node, prefix, ofs)) {
>>>        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
>>> +
>>>        if (eqbits < node->nbits) {
>>>            /* Mismatch, nothing to be removed.  This should never happen, as
>>>             * only rules in the classifier are ever removed. */
>>> diff --git a/lib/classifier.h b/lib/classifier.h
>>> index c3c1c3b..a1f4e4a 100644
>>> --- a/lib/classifier.h
>>> +++ b/lib/classifier.h
>>> @@ -276,6 +276,8 @@ struct cls_subtable {
>>>    uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
>>>    struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
>>>    unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'. */
>>> +    struct trie_node *ports_trie; /* NULL if none. */
>>> +    int ports_mask_len;
>>> };
>>> 
>>> /* Returns true if 'table' is a "catch-all" subtable that will match every
>>> diff --git a/tests/classifier.at b/tests/classifier.at
>>> index b6c9352..5338a11 100644
>>> --- a/tests/classifier.at
>>> +++ b/tests/classifier.at
>>> @@ -55,7 +55,7 @@ Datapath actions: drop
>>> ])
>>> AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
>>> AT_CHECK([tail -2 stdout], [0],
>>> -  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
>>> +  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
>>> Datapath actions: 2
>>> ])
>>> OVS_VSWITCHD_STOP
>>> @@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl
>>> table=0 in_port=1 priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
>>> table=0 in_port=1 priority=32,tcp,nw_dst=10.1.2.0/255.255.255.0,tp_src=79,action=output(2)
>>> table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop
>>> +table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2)
>>> +table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2)
>>> table=0 in_port=1 priority=0,ip,action=drop
>>> table=0 in_port=2 priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1)
>>> table=0 in_port=2 priority=0,ip,action=drop
>>> @@ -102,7 +104,7 @@ Datapath actions: drop
>>> ])
>>> AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
>>> AT_CHECK([tail -2 stdout], [0],
>>> -  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
>>> +  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
>>> Datapath actions: 3
>>> ])
>>> OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
>>> --
>>> 1.7.10.4
>>> 
>>> _______________________________________________
>>> dev mailing list
>>> dev at openvswitch.org
>>> http://openvswitch.org/mailman/listinfo/dev




More information about the dev mailing list