[ovs-dev] [PATCH ovn v3 0/5] Implement matching expressions for OVN.

Russell Bryant rbryant at redhat.com
Thu Apr 9 18:55:45 UTC 2015


On 04/01/2015 12:52 AM, Ben Pfaff wrote:
>   expr: New module for Boolean expressions on fields, for use in OVN.

For some reason I didn't get patch 4/5, so I'm just replying here for
this patch.

> [PATCH ovn v3 4/5] expr: New module for Boolean expressions on fields, for use in OVN.
> 
> Known weaknesses:
> 
>     * String fields can't be converted to flows yet.
> 
>     * Flows aren't optimal in some situations likely to be common.
> 
> I don't think either of those is a reason not to commit this yet;
> this is a solid base to build on.
> 
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  ovn/TODO         |   25 -
>  ovn/automake.mk  |    2 +-
>  ovn/expr.c       | 2315 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  ovn/expr.h       |  367 +++++++++
>  ovn/ovn.xml      |  355 ++++++---
>  tests/ovn.at     |  253 ++++++
>  tests/test-ovn.c | 1175 +++++++++++++++++++++++++++
>  7 files changed, 4366 insertions(+), 126 deletions(-)
>  create mode 100644 ovn/expr.c
>  create mode 100644 ovn/expr.h
> 

Nice work!  This is really impressive stuff.  Thanks a bunch for the
extensive testing.  I'm relying on that pretty heavily for correctness
of most of the logic.  I was mostly reading through and leaving style
nits and looking for other subtle things.  Feel free to incorporate (or
not) whichever pieces you'd like.

Acked-by: Russell Bryant <rbryant at redhat.com>

> diff --git a/ovn/expr.c b/ovn/expr.c
> new file mode 100644
> index 0000000..cd6f729
> --- /dev/null
> +++ b/ovn/expr.c
> @@ -0,0 +1,2315 @@
> +/*
> + * Copyright (c) 2015 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include "expr.h"
> +#include "dynamic-string.h"
> +#include "json.h"
> +#include "lex.h"
> +#include "match.h"
> +#include "shash.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(expr);
> +
> +/* Returns the name of measurement level 'level'. */
> +const char *
> +expr_level_to_string(enum expr_level level)

As a general comment, I probably would have used an 'ovn_' prefix for
all the new types and functions exposed by libovn.  If I were you, I'd
probably pass on making this change at this point because it would be a
pain.  :-)

> +{
> +    switch (level) {
> +    case EXPR_L_NOMINAL: return "nominal";
> +    case EXPR_L_BOOLEAN: return "Boolean";
> +    case EXPR_L_ORDINAL: return "ordinal";
> +    default: OVS_NOT_REACHED();

This is a case where I'd drop default so you get the compiler warning if
a new entry is added to the enum but not to this switch statement.

I won't repeat this same comment for any other cases in this patch, but
it generally applies to any of the default: OVS_NOT_REACHED() cases.

> +    }
> +}
> +
> +bool
> +expr_relop_from_token(enum lex_type type, enum expr_relop *relop)
> +{
> +    enum expr_relop r;
> +
> +    switch ((int) type) {

I'm curious why the cast was needed here.

> +    case LEX_T_EQ: r = EXPR_R_EQ; break;
> +    case LEX_T_NE: r = EXPR_R_NE; break;
> +    case LEX_T_LT: r = EXPR_R_LT; break;
> +    case LEX_T_LE: r = EXPR_R_LE; break;
> +    case LEX_T_GT: r = EXPR_R_GT; break;
> +    case LEX_T_GE: r = EXPR_R_GE; break;
> +    default: return false;
> +    }
> +
> +    if (relop) {
> +        *relop = r;
> +    }
> +    return true;
> +}

> +/* Constructing and manipulating expressions. */
> +
> +/* Creates and returns a logical AND or OR expression (according to 'type',
> + * which must be EXPR_T_AND or EXPR_T_OR) that initially has no
> + * sub-expressions.  (To satisfy the invariants for expressions, the caller
> + * must add at least two sub-expressions whose types are different from
> + * 'type'.) */
> +struct expr *
> +expr_create_andor(enum expr_type type)
> +{
> +    struct expr *e = xmalloc(sizeof *e);
> +    e->type = type;
> +    list_init(&e->andor);
> +    return e;

This leaves e->node uninitialized.  Maybe use xzalloc() instead?

> +}
> +
> +/* Returns a logical AND or OR expression (according to 'type', which must be
> + * EXPR_T_AND or EXPR_T_OR) whose sub-expressions are 'a' and 'b', with some
> + * flexibility:
> + *
> + *     - If 'a' or 'b' is NULL, just returns the other one (which means that if
> + *       that other one is not of the given 'type', then the returned
> + *       expression is not either).
> + *
> + *     - If 'a' or 'b', or both, have type 'type', then they are combined into
> + *       a single node that satisfies the invariants for expressions. */
> +struct expr *
> +expr_combine(enum expr_type type, struct expr *a, struct expr *b)
> +{
> +    if (!a) {
> +        return b;
> +    } else if (!b) {
> +        return a;
> +    } else if (a->type == type) {
> +        if (b->type == type) {
> +            list_splice(&a->andor, b->andor.next, &b->andor);
> +            free(b);
> +        } else {
> +            list_push_back(&a->andor, &b->node);
> +        }
> +        return a;
> +    } else if (b->type == type) {
> +        list_push_front(&b->andor, &a->node);
> +        return b;
> +    } else {
> +        struct expr *e = expr_create_andor(type);
> +        list_push_back(&e->andor, &a->node);
> +        list_push_back(&e->andor, &b->node);
> +        return e;
> +    }
> +}
> +
> +static void
> +expr_insert_andor(struct expr *andor, struct expr *before, struct expr *new)
> +{
> +    if (new->type == andor->type) {
> +        if (andor->type == EXPR_T_AND) {
> +            /* Conjunction junction, what's your function? */
> +        }
> +        list_splice(&before->node, new->andor.next, &new->andor);
> +        free(new);
> +    } else {
> +        list_insert(&before->node, &new->node);
> +    }
> +}
> +
> +/* Returns an EXPR_T_BOOLEAN expression with value 'b'. */
> +struct expr *
> +expr_create_boolean(bool b)
> +{
> +    struct expr *e = xmalloc(sizeof *e);

Maybe use xzalloc() here to avoid e->node being uninitialized?

> +    e->type = EXPR_T_BOOLEAN;
> +    e->boolean = b;
> +    return e;
> +}

> +static void
> +expr_format_string(const char *s, struct ds *ds)
> +{
> +    struct json json;
> +    json.type = JSON_STRING;
> +    json.u.string = CONST_CAST(char *, s);

This could be:

    struct json json = {
        .type = JSON_STRING,
        .u.string = CONST_CAST(char *, s),
    };

It doesn't make a difference now, but would ensure the whole struct is
initialized if more was ever added to struct json.

> +    json_to_ds(&json, 0, ds);
> +}
> +
> +static void
> +expr_format_cmp(const struct expr *e, struct ds *s)
> +{
> +    if (e->cmp.symbol->width) {

really minor style nit ... I'd be tempted to change this to:

    if (!e->cmp.symbol->width) {
        ds_put_format(s, "%s %s ", e->cmp.symbol->name,
                      expr_relop_to_string(e->cmp.relop));
        expr_format_string(e->cmp.string, s);
        return;
    }

and then bring the majority of the function back to not being nested.

> +        int ofs, n;
> +
> +        find_bitwise_range(&e->cmp.mask, e->cmp.symbol->width, &ofs, &n);
> +        if (n == 1
> +            && (e->cmp.relop == EXPR_R_EQ || e->cmp.relop == EXPR_R_NE)) {
> +            bool positive;
> +
> +            positive = bitwise_get_bit(&e->cmp.value, sizeof e->cmp.value,
> +                                       ofs);
> +            positive ^= e->cmp.relop == EXPR_R_NE;
> +            if (!positive) {
> +                ds_put_char(s, '!');
> +            }
> +            ds_put_cstr(s, e->cmp.symbol->name);
> +            if (e->cmp.symbol->width > 1) {
> +                ds_put_format(s, "[%d]", ofs);
> +            }
> +            return;
> +        }
> +
> +        ds_put_cstr(s, e->cmp.symbol->name);
> +        if (n > 0 && n < e->cmp.symbol->width) {
> +            if (n > 1) {
> +                ds_put_format(s, "[%d..%d]", ofs, ofs + n - 1);
> +            } else {
> +                ds_put_format(s, "[%d]", ofs);
> +            }
> +        }
> +
> +        ds_put_format(s, " %s ", expr_relop_to_string(e->cmp.relop));
> +
> +        if (n) {
> +            union mf_subvalue value;
> +
> +            memset(&value, 0, sizeof value);

I prefer something like the following:

    union mf_subvalue value = {
        .integer = 0,
    };

It results in the same thing as the memset(), but gives the opportunity
for the memset to be optimized out if possible.  There are some other
cases where the same change could be made if you like it.

> +            bitwise_copy(&e->cmp.value, sizeof e->cmp.value, ofs,
> +                         &value, sizeof value, 0,
> +                         n);
> +            mf_format_subvalue(&value, s);
> +        } else {
> +            mf_format_subvalue(&e->cmp.value, s);
> +            ds_put_char(s, '/');
> +            mf_format_subvalue(&e->cmp.mask, s);
> +        }
> +    } else {
> +        ds_put_format(s, "%s %s ", e->cmp.symbol->name,
> +                      expr_relop_to_string(e->cmp.relop));
> +        expr_format_string(e->cmp.string, s);
> +    }
> +}

> +static void OVS_PRINTF_FORMAT(2, 3)
> +expr_error(struct expr_context *ctx, const char *message, ...)
> +{
> +    if (!ctx->error) {
> +        if (ctx->lexer->token.type == LEX_T_ERROR) {
> +            ctx->error = xstrdup(ctx->lexer->token.s);
> +        } else {
> +            va_list args;
> +            va_start(args, message);
> +            ctx->error = xvasprintf(message, args);
> +            va_end(args);
> +        }
> +    }
> +}

This will silently drop the error message if expr_error() is called
twice.  It avoids a mem leak, which is great, but I wonder if a
VLOG_WARN would be good here.

I suppose another alternative would be to make it stack error messages.
 It could append something like: "(next error: %s)".

> +
> +static void OVS_PRINTF_FORMAT(2, 3)
> +expr_syntax_error(struct expr_context *ctx, const char *message, ...)
> +{
> +    if (!ctx->error) {
> +        if (ctx->lexer->token.type == LEX_T_ERROR) {
> +            ctx->error = xstrdup(ctx->lexer->token.s);
> +        } else {
> +            struct ds s;
> +
> +            ds_init(&s);
> +            ds_put_cstr(&s, "Syntax error ");
> +            if (ctx->lexer->token.type == LEX_T_END) {
> +                ds_put_cstr(&s, "at end of input ");
> +            } else if (ctx->lexer->start) {
> +                ds_put_format(&s, "at `%.*s' ",
> +                              (int) (ctx->lexer->input - ctx->lexer->start),
> +                              ctx->lexer->start);
> +            }
> +
> +            va_list args;
> +            va_start(args, message);
> +            ds_put_format_valist(&s, message, args);
> +            va_end(args);
> +
> +            ctx->error = ds_steal_cstr(&s);
> +        }
> +    }
> +}

You could reduce nesting in the 2 functions above with:

    if (ctx->error) {
        return;
    }

> +static bool
> +parse_constant_set(struct expr_context *ctx, struct expr_constant_set *cs)
> +{
> +    size_t allocated_values = 0;
> +    bool ok;
> +
> +    memset(cs, 0, sizeof *cs);

I went looking around the code seeing if there was any possibility of
leaking anything here.  Perhaps add a comment above the function that
clarifies the precondition that cs is uninitialized (or at least empty),
as this function will initialize it.

An expr_constant_set_init() might be nice for more consistency, bit it
may be overkill to do that for everything.

> +    if (lexer_match(ctx->lexer, LEX_T_LCURLY)) {
> +        ok = true;
> +        cs->in_curlies = true;
> +        do {
> +            if (!parse_constant(ctx, cs, &allocated_values)) {
> +                ok = false;
> +                break;
> +            }
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +        } while (!lexer_match(ctx->lexer, LEX_T_RCURLY));
> +    } else {
> +        ok = parse_constant(ctx, cs, &allocated_values);
> +    }
> +    if (!ok) {
> +        expr_constant_set_destroy(cs);
> +    }
> +    return ok;
> +}
> +

> +static struct expr *
> +crush_and(struct expr *expr, const struct expr_symbol *symbol)
> +{
> +    ovs_assert(!list_is_short(&expr->andor));
> +
> +    union mf_subvalue value, mask;
> +    memset(&value, 0, sizeof value);
> +    memset(&mask, 0, sizeof mask);
> +
> +    struct expr *sub, *next = NULL;
> +    LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
> +        list_remove(&sub->node);
> +        struct expr *new = crush_cmps(sub, symbol);
> +        switch (new->type) {
> +        case EXPR_T_CMP:
> +            if (!mf_subvalue_intersect(&value, &mask,
> +                                       &new->cmp.value, &new->cmp.mask,
> +                                       &value, &mask)) {
> +                expr_destroy(new);
> +                expr_destroy(expr);
> +                return expr_create_boolean(false);
> +            }
> +            expr_destroy(new);
> +            break;
> +        case EXPR_T_AND:
> +            OVS_NOT_REACHED();
> +        case EXPR_T_OR:
> +            list_insert(&next->node, &new->node);
> +            break;
> +        case EXPR_T_BOOLEAN:
> +            if (!new->boolean) {
> +                expr_destroy(expr);
> +                return expr_create_boolean(false);
> +            }
> +            free(new);
> +            break;
> +        }
> +    }
> +    if (list_is_empty(&expr->andor)) {
> +        if (is_all_zeros(&mask, sizeof mask)) {
> +            expr_destroy(expr);
> +            return expr_create_boolean(true);
> +        } else {
> +            struct expr *cmp;
> +            cmp = xmalloc(sizeof *cmp);
> +            cmp->type = EXPR_T_CMP;
> +            cmp->cmp.symbol = symbol;
> +            cmp->cmp.relop = EXPR_R_EQ;
> +            cmp->cmp.value = value;
> +            cmp->cmp.mask = mask;
> +            expr_destroy(expr);
> +            return cmp;

This code leaves part of cmp unitialized (cmp->node).

> +        }
> +    } else if (list_is_short(&expr->andor)) {
> +        /* Transform "a && (b || c || d)" into "ab || ac || ad" where "ab" is
> +         * computed as "a && b", etc. */
> +        struct expr *disjuncts = expr_from_node(list_pop_front(&expr->andor));
> +        struct expr *or;
> +
> +        or = xmalloc(sizeof *or);
> +        or->type = EXPR_T_OR;
> +        list_init(&or->andor);

or->node is left uninitialized here.

Maybe an expr_init() would be good to add?

> +
> +        ovs_assert(disjuncts->type == EXPR_T_OR);
> +        LIST_FOR_EACH_SAFE (sub, next, node, &disjuncts->andor) {
> +            ovs_assert(sub->type == EXPR_T_CMP);
> +            list_remove(&sub->node);
> +            if (mf_subvalue_intersect(&value, &mask,
> +                                      &sub->cmp.value, &sub->cmp.mask,
> +                                      &sub->cmp.value, &sub->cmp.mask)) {
> +                list_push_back(&or->andor, &sub->node);
> +            } else {
> +                free(sub);
> +            }
> +        }
> +        free(disjuncts);
> +        free(expr);
> +        if (list_is_empty(&or->andor)) {
> +            free(or);
> +            return expr_create_boolean(false);
> +        } else if (list_is_short(&or->andor)) {
> +            struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
> +            free(or);
> +            return cmp;
> +        } else {
> +            return or;
> +        }
> +    } else {
> +        /* Transform "x && (a0 || a1) && (b0 || b1) && ..." into
> +         *           "(xa0b0 || xa0b1 || xa1b0 || xa1b1) && ...". */
> +        struct expr *as = expr_from_node(list_pop_front(&expr->andor));
> +        struct expr *bs = expr_from_node(list_pop_front(&expr->andor));
> +        struct expr *new = NULL;
> +        struct expr *or;
> +
> +        or = xmalloc(sizeof *or);
> +        or->type = EXPR_T_OR;
> +        list_init(&or->andor);
> +
> +        struct expr *a;
> +        LIST_FOR_EACH (a, node, &as->andor) {
> +            union mf_subvalue a_value, a_mask;
> +
> +            ovs_assert(a->type == EXPR_T_CMP);
> +            if (!mf_subvalue_intersect(&value, &mask,
> +                                       &a->cmp.value, &a->cmp.mask,
> +                                       &a_value, &a_mask)) {
> +                continue;
> +            }
> +
> +            struct expr *b;
> +            LIST_FOR_EACH (b, node, &bs->andor) {
> +                ovs_assert(b->type == EXPR_T_CMP);
> +                if (!new) {
> +                    new = xmalloc(sizeof *new);
> +                    new->type = EXPR_T_CMP;
> +                    new->cmp.symbol = symbol;
> +                    new->cmp.relop = EXPR_R_EQ;
> +                }
> +                if (mf_subvalue_intersect(&a_value, &a_mask,
> +                                          &b->cmp.value, &b->cmp.mask,
> +                                          &new->cmp.value, &new->cmp.mask)) {
> +                    list_push_back(&or->andor, &new->node);
> +                    new = NULL;
> +                }
> +            }
> +        }
> +        expr_destroy(as);
> +        expr_destroy(bs);
> +        free(new);
> +
> +        if (list_is_empty(&or->andor)) {
> +            expr_destroy(expr);
> +            free(or);
> +            return expr_create_boolean(false);
> +        } else if (list_is_short(&or->andor)) {
> +            struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
> +            free(or);
> +            if (list_is_empty(&expr->andor)) {
> +                expr_destroy(expr);
> +                return crush_cmps(cmp, symbol);
> +            } else {
> +                return crush_cmps(expr_combine(EXPR_T_AND, cmp, expr), symbol);
> +            }
> +        } else if (!list_is_empty(&expr->andor)) {
> +            struct expr *e = expr_combine(EXPR_T_AND, or, expr);
> +            ovs_assert(!list_is_short(&e->andor));
> +            return crush_cmps(e, symbol);
> +        } else {
> +            expr_destroy(expr);
> +            return crush_cmps(or, symbol);
> +        }
> +    }
> +}

> +static struct expr_match *
> +expr_match_new(const struct match *m, uint8_t clause, uint8_t n_clauses,
> +               uint32_t conj_id)
> +{
> +    struct expr_match *match = xmalloc(sizeof *match);

How about xzalloc() here?

if m is NULL, match->match is uninitialized.

All cases leave match->hmap_node uninitialized.

> +    if (m) {
> +        match->match = *m;
> +    }
> +    if (conj_id) {
> +        match->conjunctions = xmalloc(sizeof *match->conjunctions);
> +        match->conjunctions[0].id = conj_id;
> +        match->conjunctions[0].clause = clause;
> +        match->conjunctions[0].n_clauses = n_clauses;
> +        match->n = 1;
> +        match->allocated = 1;
> +    } else {
> +        match->conjunctions = NULL;
> +        match->n = 0;
> +        match->allocated = 0;
> +    }
> +    return match;
> +}
> +


-- 
Russell Bryant



More information about the dev mailing list