[ovs-dev] [self-check 3/3] ofproto-dpif: Implement self-check of flow translations.
Ethan Jackson
ethan at nicira.com
Mon Jan 16 19:32:10 UTC 2012
> The code looks OK to me. Can you point out the problem more
> specifically?
Looking over it again, I misread the code. The existing code is correct.
In ofproto_dpif_self_check() you say 'argc' is OVS_UNUSED even though
you use it.
Otherwise looks good,
Ethan
>
>> It may be worth printing the other possible values of
>> subfacet->key_fitness in case it's not OK but not TOO_LITTLE.
>
> OK, I added a function odp_key_fitness_to_string().
>
>> In ofproto_dpif_self_check():
>> I've found it useful to do appctl commands for all instances when one
>> isn't specified in the lacp, bond, and cfm module. It may be worth
>> doing it here as well, though I don't feel strongly about it.
>
> Good idea. Done.
>
> Here's an incremental followed by a full respin.
>
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index ffb486e..12e8daf 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
> expected_attrs, flow, key, key_len);
> }
>
> +/* Returns 'fitness' as a string, for use in debug messages. */
> +const char *
> +odp_key_fitness_to_string(enum odp_key_fitness fitness)
> +{
> + switch (fitness) {
> + case ODP_FIT_PERFECT:
> + return "OK";
> + case ODP_FIT_TOO_MUCH:
> + return "too_much";
> + case ODP_FIT_TOO_LITTLE:
> + return "too_little";
> + case ODP_FIT_ERROR:
> + return "error";
> + default:
> + return "<unknown>";
> + }
> +}
> +
> /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
> * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose
> * contents contains 'cookie' and returns the offset within 'odp_actions' of
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index a6f8a30..0028499 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -114,6 +114,7 @@ enum odp_key_fitness {
> };
> enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t,
> struct flow *);
> +const char *odp_key_fitness_to_string(enum odp_key_fitness);
>
> enum user_action_cookie_type {
> USER_ACTION_COOKIE_UNSPEC,
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 6e73e0d..64e9073 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -3366,8 +3366,6 @@ facet_check_consistency(struct facet *facet)
> struct subfacet *subfacet;
> bool ok;
>
> - COVERAGE_INC(facet_revalidate);
> -
> /* Check the rule for consistency. */
> rule = rule_dpif_lookup(ofproto, &facet->flow, 0);
> if (!rule) {
> @@ -3380,6 +3378,7 @@ facet_check_consistency(struct facet *facet)
> } else if (rule != facet->rule) {
> struct ds s;
>
> + ds_init(&s);
> flow_format(&s, &facet->flow);
> ds_put_format(&s, ": facet associated with wrong rule (was "
> "table=%"PRIu8",", facet->rule->up.table_id);
> @@ -3434,12 +3433,13 @@ facet_check_consistency(struct facet *facet)
>
> ds_put_cstr(&s, ": inconsistency in subfacet");
> if (should_install != subfacet->installed) {
> + enum odp_key_fitness fitness = subfacet->key_fitness;
> +
> ds_put_format(&s, " (should%s have been installed)",
> should_install ? "" : " not");
> ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)",
> ctx.may_set_up_flow ? "true" : "false",
> - subfacet->key_fitness == ODP_FIT_TOO_LITTLE
> - ? "too_little" : "OK");
> + odp_key_fitness_to_string(fitness));
> }
> if (actions_changed) {
> ds_put_cstr(&s, " (actions were: ");
> @@ -6048,24 +6048,14 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
> unixctl_command_reply(conn, 200, NULL);
> }
>
> +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to
> + * 'reply' describing the results. */
> static void
> -ofproto_dpif_self_check(struct unixctl_conn *conn,
> - int argc OVS_UNUSED, const char *argv[],
> - void *aux OVS_UNUSED)
> +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
> {
> - const char *dpname = argv[1];
> - struct ofproto_dpif *ofproto;
> struct facet *facet;
> - char *reply;
> int errors;
>
> - ofproto = ofproto_dpif_lookup(dpname);
> - if (!ofproto) {
> - unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list "
> - "for help)");
> - return;
> - }
> -
> errors = 0;
> HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
> if (!facet_check_consistency(facet)) {
> @@ -6076,11 +6066,38 @@ ofproto_dpif_self_check(struct unixctl_conn *conn,
> ofproto->need_revalidate = true;
> }
>
> - reply = (errors
> - ? xasprintf("%s: self-check failed (%d errors)", dpname, errors)
> - : xasprintf("%s: self-check passed", dpname));
> - unixctl_command_reply(conn, 200, reply);
> - free(reply);
> + if (errors) {
> + ds_put_format(reply, "%s: self-check failed (%d errors)\n",
> + ofproto->up.name, errors);
> + } else {
> + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
> + }
> +}
> +
> +static void
> +ofproto_dpif_self_check(struct unixctl_conn *conn,
> + int argc OVS_UNUSED, const char *argv[],
> + void *aux OVS_UNUSED)
> +{
> + struct ds reply = DS_EMPTY_INITIALIZER;
> + struct ofproto_dpif *ofproto;
> +
> + if (argc > 1) {
> + ofproto = ofproto_dpif_lookup(argv[1]);
> + if (!ofproto) {
> + unixctl_command_reply(conn, 501, "Unknown ofproto (use "
> + "ofproto/list for help)");
> + return;
> + }
> + ofproto_dpif_self_check__(ofproto, &reply);
> + } else {
> + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
> + ofproto_dpif_self_check__(ofproto, &reply);
> + }
> + }
> +
> + unixctl_command_reply(conn, 200, ds_cstr(&reply));
> + ds_destroy(&reply);
> }
>
> static void
> @@ -6104,7 +6121,7 @@ ofproto_dpif_unixctl_init(void)
> ofproto_dpif_clog, NULL);
> unixctl_command_register("ofproto/unclog", "", 0, 0,
> ofproto_dpif_unclog, NULL);
> - unixctl_command_register("ofproto/self-check", "bridge", 1, 1,
> + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
> ofproto_dpif_self_check, NULL);
> }
>
> diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man
> index 5ae90f2..ad9e02d 100644
> --- a/ofproto/ofproto-unixctl.man
> +++ b/ofproto/ofproto-unixctl.man
> @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of
> datapath actions in some corner cases. If the results say that this
> is the case, rerun \fBofproto/trace\fR supplying a packet in the flow
> to get complete results.
> +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]"
> +Runs an internal consistency check on \fIswitch\fR, if specified,
> +otherwise on all ofproto instances, and responds with a brief summary
> +of the results. If the summary reports any errors, then the Open
> +vSwitch logs should contain more detailed information. Please pass
> +along errors reported by this command to the Open vSwitch developers
> +as bugs.
>
> --8<--------------------------cut here-------------------------->8--
>
> From: Ben Pfaff <blp at nicira.com>
> Date: Mon, 16 Jan 2012 11:15:25 -0800
> Subject: [PATCH] ofproto-dpif: Implement self-check of flow translations.
>
> One of the major tasks of ofproto-dpif is to translate OpenFlow
> actions into "ODP" datapath actions. These translations are essentially
> a cache that requires revalidation when certain state changes occur. For
> best performance it's important to revalidate flows only when necessary,
> so from time to time Open vSwitch has gotten this wrong, which meant that
> stale flows could persist in the kernel and cause surprising behavior.
>
> This commit implements a simple "self check": every trip through the
> Open vSwitch main loop randomly chooses one flow entry and checks that
> its actions have been correctly translated. If not, Open vSwitch logs
> the details of the problem. This should help find problems more
> quickly in the future.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/odp-util.c | 20 +++++-
> lib/odp-util.h | 3 +-
> ofproto/ofproto-dpif.c | 181 +++++++++++++++++++++++++++++++++++++++++++
> ofproto/ofproto-unixctl.man | 7 ++
> 4 files changed, 209 insertions(+), 2 deletions(-)
>
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index ffb486e..12e8daf 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
> expected_attrs, flow, key, key_len);
> }
>
> +/* Returns 'fitness' as a string, for use in debug messages. */
> +const char *
> +odp_key_fitness_to_string(enum odp_key_fitness fitness)
> +{
> + switch (fitness) {
> + case ODP_FIT_PERFECT:
> + return "OK";
> + case ODP_FIT_TOO_MUCH:
> + return "too_much";
> + case ODP_FIT_TOO_LITTLE:
> + return "too_little";
> + case ODP_FIT_ERROR:
> + return "error";
> + default:
> + return "<unknown>";
> + }
> +}
> +
> /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
> * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose
> * contents contains 'cookie' and returns the offset within 'odp_actions' of
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index a6f8a30..0028499 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -114,6 +114,7 @@ enum odp_key_fitness {
> };
> enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t,
> struct flow *);
> +const char *odp_key_fitness_to_string(enum odp_key_fitness);
>
> enum user_action_cookie_type {
> USER_ACTION_COOKIE_UNSPEC,
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index e3cb51b..64e9073 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -327,6 +327,7 @@ static struct facet *facet_find(struct ofproto_dpif *, const struct flow *);
> static struct facet *facet_lookup_valid(struct ofproto_dpif *,
> const struct flow *);
> static bool facet_revalidate(struct facet *);
> +static bool facet_check_consistency(struct facet *);
>
> static void facet_flush_stats(struct facet *);
>
> @@ -382,6 +383,8 @@ static struct subfacet *subfacet_find(struct ofproto_dpif *,
> const struct nlattr *key, size_t key_len);
> static void subfacet_destroy(struct subfacet *);
> static void subfacet_destroy__(struct subfacet *);
> +static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *,
> + struct ofpbuf *key);
> static void subfacet_reset_dp_stats(struct subfacet *,
> struct dpif_flow_stats *);
> static void subfacet_update_time(struct subfacet *, long long int used);
> @@ -821,6 +824,19 @@ run(struct ofproto *ofproto_)
> }
> }
>
> + /* Check the consistency of a random facet, to aid debugging. */
> + if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) {
> + struct facet *facet;
> +
> + facet = CONTAINER_OF(hmap_random_node(&ofproto->facets),
> + struct facet, hmap_node);
> + if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) {
> + if (!facet_check_consistency(facet)) {
> + ofproto->need_revalidate = true;
> + }
> + }
> + }
> +
> return 0;
> }
>
> @@ -3339,6 +3355,117 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
> return facet;
> }
>
> +static bool
> +facet_check_consistency(struct facet *facet)
> +{
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
> +
> + struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
> +
> + struct rule_dpif *rule;
> + struct subfacet *subfacet;
> + bool ok;
> +
> + /* Check the rule for consistency. */
> + rule = rule_dpif_lookup(ofproto, &facet->flow, 0);
> + if (!rule) {
> + if (!VLOG_DROP_WARN(&rl)) {
> + char *s = flow_to_string(&facet->flow);
> + VLOG_WARN("%s: facet should not exist", s);
> + free(s);
> + }
> + return false;
> + } else if (rule != facet->rule) {
> + struct ds s;
> +
> + ds_init(&s);
> + flow_format(&s, &facet->flow);
> + ds_put_format(&s, ": facet associated with wrong rule (was "
> + "table=%"PRIu8",", facet->rule->up.table_id);
> + cls_rule_format(&facet->rule->up.cr, &s);
> + ds_put_format(&s, ") (should have been table=%"PRIu8",",
> + rule->up.table_id);
> + cls_rule_format(&rule->up.cr, &s);
> + ds_put_char(&s, ')');
> +
> + VLOG_WARN("%s", ds_cstr(&s));
> + ds_destroy(&s);
> +
> + ok = false;
> + } else {
> + ok = true;
> + }
> +
> + /* Check the datapath actions for consistency. */
> + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
> + struct action_xlate_ctx ctx;
> + struct ofpbuf *odp_actions;
> + bool actions_changed;
> + bool should_install;
> +
> + action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
> + subfacet->initial_tci, rule->up.flow_cookie,
> + NULL);
> + odp_actions = xlate_actions(&ctx, rule->up.actions,
> + rule->up.n_actions);
> +
> + should_install = (ctx.may_set_up_flow
> + && subfacet->key_fitness != ODP_FIT_TOO_LITTLE);
> + if (!should_install && !subfacet->installed) {
> + /* The actions for uninstallable flows may vary from one packet to
> + * the next, so don't compare the actions. */
> + goto next;
> + }
> +
> + actions_changed = (subfacet->actions_len != odp_actions->size
> + || memcmp(subfacet->actions, odp_actions->data,
> + subfacet->actions_len));
> + if (should_install != subfacet->installed || actions_changed) {
> + struct odputil_keybuf keybuf;
> + struct ofpbuf key;
> + struct ds s;
> +
> + ok = false;
> +
> + ds_init(&s);
> + subfacet_get_key(subfacet, &keybuf, &key);
> + odp_flow_key_format(key.data, key.size, &s);
> +
> + ds_put_cstr(&s, ": inconsistency in subfacet");
> + if (should_install != subfacet->installed) {
> + enum odp_key_fitness fitness = subfacet->key_fitness;
> +
> + ds_put_format(&s, " (should%s have been installed)",
> + should_install ? "" : " not");
> + ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)",
> + ctx.may_set_up_flow ? "true" : "false",
> + odp_key_fitness_to_string(fitness));
> + }
> + if (actions_changed) {
> + ds_put_cstr(&s, " (actions were: ");
> + format_odp_actions(&s, subfacet->actions,
> + subfacet->actions_len);
> + ds_put_cstr(&s, ") (correct actions: ");
> + format_odp_actions(&s, odp_actions->data,
> + odp_actions->size);
> + ds_put_char(&s, ')');
> + } else {
> + ds_put_cstr(&s, " (actions: ");
> + format_odp_actions(&s, subfacet->actions,
> + subfacet->actions_len);
> + ds_put_char(&s, ')');
> + }
> + VLOG_WARN("%s", ds_cstr(&s));
> + ds_destroy(&s);
> + }
> +
> + next:
> + ofpbuf_delete(odp_actions);
> + }
> +
> + return ok;
> +}
> +
> /* Re-searches the classifier for 'facet':
> *
> * - If the rule found is different from 'facet''s current rule, moves
> @@ -5921,6 +6048,58 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
> unixctl_command_reply(conn, 200, NULL);
> }
>
> +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to
> + * 'reply' describing the results. */
> +static void
> +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
> +{
> + struct facet *facet;
> + int errors;
> +
> + errors = 0;
> + HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
> + if (!facet_check_consistency(facet)) {
> + errors++;
> + }
> + }
> + if (errors) {
> + ofproto->need_revalidate = true;
> + }
> +
> + if (errors) {
> + ds_put_format(reply, "%s: self-check failed (%d errors)\n",
> + ofproto->up.name, errors);
> + } else {
> + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
> + }
> +}
> +
> +static void
> +ofproto_dpif_self_check(struct unixctl_conn *conn,
> + int argc OVS_UNUSED, const char *argv[],
> + void *aux OVS_UNUSED)
> +{
> + struct ds reply = DS_EMPTY_INITIALIZER;
> + struct ofproto_dpif *ofproto;
> +
> + if (argc > 1) {
> + ofproto = ofproto_dpif_lookup(argv[1]);
> + if (!ofproto) {
> + unixctl_command_reply(conn, 501, "Unknown ofproto (use "
> + "ofproto/list for help)");
> + return;
> + }
> + ofproto_dpif_self_check__(ofproto, &reply);
> + } else {
> + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
> + ofproto_dpif_self_check__(ofproto, &reply);
> + }
> + }
> +
> + unixctl_command_reply(conn, 200, ds_cstr(&reply));
> + ds_destroy(&reply);
> +}
> +
> static void
> ofproto_dpif_unixctl_init(void)
> {
> @@ -5942,6 +6121,8 @@ ofproto_dpif_unixctl_init(void)
> ofproto_dpif_clog, NULL);
> unixctl_command_register("ofproto/unclog", "", 0, 0,
> ofproto_dpif_unclog, NULL);
> + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
> + ofproto_dpif_self_check, NULL);
> }
>
> /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
> diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man
> index 5ae90f2..ad9e02d 100644
> --- a/ofproto/ofproto-unixctl.man
> +++ b/ofproto/ofproto-unixctl.man
> @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of
> datapath actions in some corner cases. If the results say that this
> is the case, rerun \fBofproto/trace\fR supplying a packet in the flow
> to get complete results.
> +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]"
> +Runs an internal consistency check on \fIswitch\fR, if specified,
> +otherwise on all ofproto instances, and responds with a brief summary
> +of the results. If the summary reports any errors, then the Open
> +vSwitch logs should contain more detailed information. Please pass
> +along errors reported by this command to the Open vSwitch developers
> +as bugs.
> --
> 1.7.2.5
>
More information about the dev
mailing list