[ovs-dev] [self-check 3/3] ofproto-dpif: Implement self-check of flow translations.

Ethan Jackson ethan at nicira.com
Mon Jan 16 19:32:10 UTC 2012


> The code looks OK to me.  Can you point out the problem more
> specifically?

Looking over it again, I misread the code.  The existing code is correct.

In ofproto_dpif_self_check() you say 'argc' is OVS_UNUSED even though
you use it.

Otherwise looks good,
Ethan


>
>> It may be worth printing the other possible values of
>> subfacet->key_fitness in case it's not OK but not TOO_LITTLE.
>
> OK, I added a function odp_key_fitness_to_string().
>
>> In ofproto_dpif_self_check():
>> I've found it useful to do appctl commands for all instances when one
>> isn't specified in the lacp, bond, and cfm module.  It may be worth
>> doing it here as well, though I don't feel strongly about it.
>
> Good idea.  Done.
>
> Here's an incremental followed by a full respin.
>
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index ffb486e..12e8daf 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
>                            expected_attrs, flow, key, key_len);
>  }
>
> +/* Returns 'fitness' as a string, for use in debug messages. */
> +const char *
> +odp_key_fitness_to_string(enum odp_key_fitness fitness)
> +{
> +    switch (fitness) {
> +    case ODP_FIT_PERFECT:
> +        return "OK";
> +    case ODP_FIT_TOO_MUCH:
> +        return "too_much";
> +    case ODP_FIT_TOO_LITTLE:
> +        return "too_little";
> +    case ODP_FIT_ERROR:
> +        return "error";
> +    default:
> +        return "<unknown>";
> +    }
> +}
> +
>  /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
>  * Netlink PID 'pid'.  If 'cookie' is nonnull, adds a userdata attribute whose
>  * contents contains 'cookie' and returns the offset within 'odp_actions' of
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index a6f8a30..0028499 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -114,6 +114,7 @@ enum odp_key_fitness {
>  };
>  enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t,
>                                           struct flow *);
> +const char *odp_key_fitness_to_string(enum odp_key_fitness);
>
>  enum user_action_cookie_type {
>     USER_ACTION_COOKIE_UNSPEC,
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 6e73e0d..64e9073 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -3366,8 +3366,6 @@ facet_check_consistency(struct facet *facet)
>     struct subfacet *subfacet;
>     bool ok;
>
> -    COVERAGE_INC(facet_revalidate);
> -
>     /* Check the rule for consistency. */
>     rule = rule_dpif_lookup(ofproto, &facet->flow, 0);
>     if (!rule) {
> @@ -3380,6 +3378,7 @@ facet_check_consistency(struct facet *facet)
>     } else if (rule != facet->rule) {
>         struct ds s;
>
> +        ds_init(&s);
>         flow_format(&s, &facet->flow);
>         ds_put_format(&s, ": facet associated with wrong rule (was "
>                       "table=%"PRIu8",", facet->rule->up.table_id);
> @@ -3434,12 +3433,13 @@ facet_check_consistency(struct facet *facet)
>
>             ds_put_cstr(&s, ": inconsistency in subfacet");
>             if (should_install != subfacet->installed) {
> +                enum odp_key_fitness fitness = subfacet->key_fitness;
> +
>                 ds_put_format(&s, " (should%s have been installed)",
>                               should_install ? "" : " not");
>                 ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)",
>                               ctx.may_set_up_flow ? "true" : "false",
> -                              subfacet->key_fitness == ODP_FIT_TOO_LITTLE
> -                              ? "too_little" : "OK");
> +                              odp_key_fitness_to_string(fitness));
>             }
>             if (actions_changed) {
>                 ds_put_cstr(&s, " (actions were: ");
> @@ -6048,24 +6048,14 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
>     unixctl_command_reply(conn, 200, NULL);
>  }
>
> +/* Runs a self-check of flow translations in 'ofproto'.  Appends a message to
> + * 'reply' describing the results. */
>  static void
> -ofproto_dpif_self_check(struct unixctl_conn *conn,
> -                        int argc OVS_UNUSED, const char *argv[],
> -                        void *aux OVS_UNUSED)
> +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
>  {
> -    const char *dpname = argv[1];
> -    struct ofproto_dpif *ofproto;
>     struct facet *facet;
> -    char *reply;
>     int errors;
>
> -    ofproto = ofproto_dpif_lookup(dpname);
> -    if (!ofproto) {
> -        unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list "
> -                              "for help)");
> -        return;
> -    }
> -
>     errors = 0;
>     HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
>         if (!facet_check_consistency(facet)) {
> @@ -6076,11 +6066,38 @@ ofproto_dpif_self_check(struct unixctl_conn *conn,
>         ofproto->need_revalidate = true;
>     }
>
> -    reply = (errors
> -             ? xasprintf("%s: self-check failed (%d errors)", dpname, errors)
> -             : xasprintf("%s: self-check passed", dpname));
> -    unixctl_command_reply(conn, 200, reply);
> -    free(reply);
> +    if (errors) {
> +        ds_put_format(reply, "%s: self-check failed (%d errors)\n",
> +                      ofproto->up.name, errors);
> +    } else {
> +        ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
> +    }
> +}
> +
> +static void
> +ofproto_dpif_self_check(struct unixctl_conn *conn,
> +                        int argc OVS_UNUSED, const char *argv[],
> +                        void *aux OVS_UNUSED)
> +{
> +    struct ds reply = DS_EMPTY_INITIALIZER;
> +    struct ofproto_dpif *ofproto;
> +
> +    if (argc > 1) {
> +        ofproto = ofproto_dpif_lookup(argv[1]);
> +        if (!ofproto) {
> +            unixctl_command_reply(conn, 501, "Unknown ofproto (use "
> +                                  "ofproto/list for help)");
> +            return;
> +        }
> +        ofproto_dpif_self_check__(ofproto, &reply);
> +    } else {
> +        HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
> +            ofproto_dpif_self_check__(ofproto, &reply);
> +        }
> +    }
> +
> +    unixctl_command_reply(conn, 200, ds_cstr(&reply));
> +    ds_destroy(&reply);
>  }
>
>  static void
> @@ -6104,7 +6121,7 @@ ofproto_dpif_unixctl_init(void)
>                              ofproto_dpif_clog, NULL);
>     unixctl_command_register("ofproto/unclog", "", 0, 0,
>                              ofproto_dpif_unclog, NULL);
> -    unixctl_command_register("ofproto/self-check", "bridge", 1, 1,
> +    unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
>                              ofproto_dpif_self_check, NULL);
>  }
>
> diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man
> index 5ae90f2..ad9e02d 100644
> --- a/ofproto/ofproto-unixctl.man
> +++ b/ofproto/ofproto-unixctl.man
> @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of
>  datapath actions in some corner cases.  If the results say that this
>  is the case, rerun \fBofproto/trace\fR supplying a packet in the flow
>  to get complete results.
> +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]"
> +Runs an internal consistency check on \fIswitch\fR, if specified,
> +otherwise on all ofproto instances, and responds with a brief summary
> +of the results.  If the summary reports any errors, then the Open
> +vSwitch logs should contain more detailed information.  Please pass
> +along errors reported by this command to the Open vSwitch developers
> +as bugs.
>
> --8<--------------------------cut here-------------------------->8--
>
> From: Ben Pfaff <blp at nicira.com>
> Date: Mon, 16 Jan 2012 11:15:25 -0800
> Subject: [PATCH] ofproto-dpif: Implement self-check of flow translations.
>
> One of the major tasks of ofproto-dpif is to translate OpenFlow
> actions into "ODP" datapath actions.  These translations are essentially
> a cache that requires revalidation when certain state changes occur.  For
> best performance it's important to revalidate flows only when necessary,
> so from time to time Open vSwitch has gotten this wrong, which meant that
> stale flows could persist in the kernel and cause surprising behavior.
>
> This commit implements a simple "self check": every trip through the
> Open vSwitch main loop randomly chooses one flow entry and checks that
> its actions have been correctly translated.  If not, Open vSwitch logs
> the details of the problem.  This should help find problems more
> quickly in the future.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/odp-util.c              |   20 +++++-
>  lib/odp-util.h              |    3 +-
>  ofproto/ofproto-dpif.c      |  181 +++++++++++++++++++++++++++++++++++++++++++
>  ofproto/ofproto-unixctl.man |    7 ++
>  4 files changed, 209 insertions(+), 2 deletions(-)
>
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index ffb486e..12e8daf 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
>                            expected_attrs, flow, key, key_len);
>  }
>
> +/* Returns 'fitness' as a string, for use in debug messages. */
> +const char *
> +odp_key_fitness_to_string(enum odp_key_fitness fitness)
> +{
> +    switch (fitness) {
> +    case ODP_FIT_PERFECT:
> +        return "OK";
> +    case ODP_FIT_TOO_MUCH:
> +        return "too_much";
> +    case ODP_FIT_TOO_LITTLE:
> +        return "too_little";
> +    case ODP_FIT_ERROR:
> +        return "error";
> +    default:
> +        return "<unknown>";
> +    }
> +}
> +
>  /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
>  * Netlink PID 'pid'.  If 'cookie' is nonnull, adds a userdata attribute whose
>  * contents contains 'cookie' and returns the offset within 'odp_actions' of
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index a6f8a30..0028499 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -114,6 +114,7 @@ enum odp_key_fitness {
>  };
>  enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t,
>                                           struct flow *);
> +const char *odp_key_fitness_to_string(enum odp_key_fitness);
>
>  enum user_action_cookie_type {
>     USER_ACTION_COOKIE_UNSPEC,
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index e3cb51b..64e9073 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -327,6 +327,7 @@ static struct facet *facet_find(struct ofproto_dpif *, const struct flow *);
>  static struct facet *facet_lookup_valid(struct ofproto_dpif *,
>                                         const struct flow *);
>  static bool facet_revalidate(struct facet *);
> +static bool facet_check_consistency(struct facet *);
>
>  static void facet_flush_stats(struct facet *);
>
> @@ -382,6 +383,8 @@ static struct subfacet *subfacet_find(struct ofproto_dpif *,
>                                       const struct nlattr *key, size_t key_len);
>  static void subfacet_destroy(struct subfacet *);
>  static void subfacet_destroy__(struct subfacet *);
> +static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *,
> +                             struct ofpbuf *key);
>  static void subfacet_reset_dp_stats(struct subfacet *,
>                                     struct dpif_flow_stats *);
>  static void subfacet_update_time(struct subfacet *, long long int used);
> @@ -821,6 +824,19 @@ run(struct ofproto *ofproto_)
>         }
>     }
>
> +    /* Check the consistency of a random facet, to aid debugging. */
> +    if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) {
> +        struct facet *facet;
> +
> +        facet = CONTAINER_OF(hmap_random_node(&ofproto->facets),
> +                             struct facet, hmap_node);
> +        if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) {
> +            if (!facet_check_consistency(facet)) {
> +                ofproto->need_revalidate = true;
> +            }
> +        }
> +    }
> +
>     return 0;
>  }
>
> @@ -3339,6 +3355,117 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
>     return facet;
>  }
>
> +static bool
> +facet_check_consistency(struct facet *facet)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
> +
> +    struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
> +
> +    struct rule_dpif *rule;
> +    struct subfacet *subfacet;
> +    bool ok;
> +
> +    /* Check the rule for consistency. */
> +    rule = rule_dpif_lookup(ofproto, &facet->flow, 0);
> +    if (!rule) {
> +        if (!VLOG_DROP_WARN(&rl)) {
> +            char *s = flow_to_string(&facet->flow);
> +            VLOG_WARN("%s: facet should not exist", s);
> +            free(s);
> +        }
> +        return false;
> +    } else if (rule != facet->rule) {
> +        struct ds s;
> +
> +        ds_init(&s);
> +        flow_format(&s, &facet->flow);
> +        ds_put_format(&s, ": facet associated with wrong rule (was "
> +                      "table=%"PRIu8",", facet->rule->up.table_id);
> +        cls_rule_format(&facet->rule->up.cr, &s);
> +        ds_put_format(&s, ") (should have been table=%"PRIu8",",
> +                      rule->up.table_id);
> +        cls_rule_format(&rule->up.cr, &s);
> +        ds_put_char(&s, ')');
> +
> +        VLOG_WARN("%s", ds_cstr(&s));
> +        ds_destroy(&s);
> +
> +        ok = false;
> +    } else {
> +        ok = true;
> +    }
> +
> +    /* Check the datapath actions for consistency. */
> +    LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
> +        struct action_xlate_ctx ctx;
> +        struct ofpbuf *odp_actions;
> +        bool actions_changed;
> +        bool should_install;
> +
> +        action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
> +                              subfacet->initial_tci, rule->up.flow_cookie,
> +                              NULL);
> +        odp_actions = xlate_actions(&ctx, rule->up.actions,
> +                                    rule->up.n_actions);
> +
> +        should_install = (ctx.may_set_up_flow
> +                          && subfacet->key_fitness != ODP_FIT_TOO_LITTLE);
> +        if (!should_install && !subfacet->installed) {
> +            /* The actions for uninstallable flows may vary from one packet to
> +             * the next, so don't compare the actions. */
> +            goto next;
> +        }
> +
> +        actions_changed = (subfacet->actions_len != odp_actions->size
> +                           || memcmp(subfacet->actions, odp_actions->data,
> +                                     subfacet->actions_len));
> +        if (should_install != subfacet->installed || actions_changed) {
> +            struct odputil_keybuf keybuf;
> +            struct ofpbuf key;
> +            struct ds s;
> +
> +            ok = false;
> +
> +            ds_init(&s);
> +            subfacet_get_key(subfacet, &keybuf, &key);
> +            odp_flow_key_format(key.data, key.size, &s);
> +
> +            ds_put_cstr(&s, ": inconsistency in subfacet");
> +            if (should_install != subfacet->installed) {
> +                enum odp_key_fitness fitness = subfacet->key_fitness;
> +
> +                ds_put_format(&s, " (should%s have been installed)",
> +                              should_install ? "" : " not");
> +                ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)",
> +                              ctx.may_set_up_flow ? "true" : "false",
> +                              odp_key_fitness_to_string(fitness));
> +            }
> +            if (actions_changed) {
> +                ds_put_cstr(&s, " (actions were: ");
> +                format_odp_actions(&s, subfacet->actions,
> +                                   subfacet->actions_len);
> +                ds_put_cstr(&s, ") (correct actions: ");
> +                format_odp_actions(&s, odp_actions->data,
> +                                   odp_actions->size);
> +                ds_put_char(&s, ')');
> +            } else {
> +                ds_put_cstr(&s, " (actions: ");
> +                format_odp_actions(&s, subfacet->actions,
> +                                   subfacet->actions_len);
> +                ds_put_char(&s, ')');
> +            }
> +            VLOG_WARN("%s", ds_cstr(&s));
> +            ds_destroy(&s);
> +        }
> +
> +    next:
> +        ofpbuf_delete(odp_actions);
> +    }
> +
> +    return ok;
> +}
> +
>  /* Re-searches the classifier for 'facet':
>  *
>  *   - If the rule found is different from 'facet''s current rule, moves
> @@ -5921,6 +6048,58 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
>     unixctl_command_reply(conn, 200, NULL);
>  }
>
> +/* Runs a self-check of flow translations in 'ofproto'.  Appends a message to
> + * 'reply' describing the results. */
> +static void
> +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
> +{
> +    struct facet *facet;
> +    int errors;
> +
> +    errors = 0;
> +    HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
> +        if (!facet_check_consistency(facet)) {
> +            errors++;
> +        }
> +    }
> +    if (errors) {
> +        ofproto->need_revalidate = true;
> +    }
> +
> +    if (errors) {
> +        ds_put_format(reply, "%s: self-check failed (%d errors)\n",
> +                      ofproto->up.name, errors);
> +    } else {
> +        ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
> +    }
> +}
> +
> +static void
> +ofproto_dpif_self_check(struct unixctl_conn *conn,
> +                        int argc OVS_UNUSED, const char *argv[],
> +                        void *aux OVS_UNUSED)
> +{
> +    struct ds reply = DS_EMPTY_INITIALIZER;
> +    struct ofproto_dpif *ofproto;
> +
> +    if (argc > 1) {
> +        ofproto = ofproto_dpif_lookup(argv[1]);
> +        if (!ofproto) {
> +            unixctl_command_reply(conn, 501, "Unknown ofproto (use "
> +                                  "ofproto/list for help)");
> +            return;
> +        }
> +        ofproto_dpif_self_check__(ofproto, &reply);
> +    } else {
> +        HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
> +            ofproto_dpif_self_check__(ofproto, &reply);
> +        }
> +    }
> +
> +    unixctl_command_reply(conn, 200, ds_cstr(&reply));
> +    ds_destroy(&reply);
> +}
> +
>  static void
>  ofproto_dpif_unixctl_init(void)
>  {
> @@ -5942,6 +6121,8 @@ ofproto_dpif_unixctl_init(void)
>                              ofproto_dpif_clog, NULL);
>     unixctl_command_register("ofproto/unclog", "", 0, 0,
>                              ofproto_dpif_unclog, NULL);
> +    unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
> +                             ofproto_dpif_self_check, NULL);
>  }
>
>  /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
> diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man
> index 5ae90f2..ad9e02d 100644
> --- a/ofproto/ofproto-unixctl.man
> +++ b/ofproto/ofproto-unixctl.man
> @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of
>  datapath actions in some corner cases.  If the results say that this
>  is the case, rerun \fBofproto/trace\fR supplying a packet in the flow
>  to get complete results.
> +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]"
> +Runs an internal consistency check on \fIswitch\fR, if specified,
> +otherwise on all ofproto instances, and responds with a brief summary
> +of the results.  If the summary reports any errors, then the Open
> +vSwitch logs should contain more detailed information.  Please pass
> +along errors reported by this command to the Open vSwitch developers
> +as bugs.
> --
> 1.7.2.5
>



More information about the dev mailing list