[ovs-dev] [PATCH 3/3] Limit the number of generated conjunctions.

Dumitru Ceara dceara at redhat.com
Mon Oct 28 16:42:24 UTC 2019


On Mon, Oct 28, 2019 at 5:29 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> On Mon, Oct 28, 2019 at 5:07 PM Mark Michelson <mmichels at redhat.com> wrote:
> >
> > On 10/28/19 10:37 AM, Dumitru Ceara wrote:
> > > On Mon, Oct 28, 2019 at 1:46 PM Mark Michelson <mmichels at redhat.com> wrote:
> > >>
> > >> On 10/28/19 7:46 AM, Dumitru Ceara wrote:
> > >>> On Fri, Oct 25, 2019 at 11:07 PM Mark Michelson <mmichels at redhat.com> wrote:
> > >>>>
> > >>>> There is a maximum number of resubmits that can occur during packet
> > >>>> processing. Deliberately creating a flow table that might cross this
> > >>>> threshold is irresponsible.
> > >>>>
> > >>>> This commit causes the ofctrl code to track the maximum width and depth
> > >>>> of conjunctions in the desired flow table. By multiplying them, we have
> > >>>> a possible worst case for number of resubmits required. If this number
> > >>>> exceeds a quarter of the maximum resubmits allowed, then we fall back to
> > >>>> forcing crossproducts.
> > >>>>
> > >>>> The resulting flow table can end up being a mixture of conjunction and
> > >>>> non-conjunction flows if the limit is exceeded.
> > >>>>
> > >>>> Signed-off-by: Mark Michelson <mmichels at redhat.com>
> > >>>
> > >>> Hi Mark,
> > >>>
> > >>> I've been testing the series on a setup with:
> > >>> - 100 logical routers connected to a logical gateway router
> > >>> - 100 logical switches (each connected to a logical router)
> > >>> - 200 VIFs (2 per logical switch)
> > >>> - 2 NAT rules on each router
> > >>>
> > >>> I bound all VIFs to OVS internal interfaces on the machine when
> > >>> ovn-northd is running.
> > >>>
> > >>> If I start ovn-controller on the same node, without your series I see
> > >>> ovn-controller taking more than 100s for a single flow computation
> > >>> run.
> > >>>
> > >>> If I apply your series, the maximum duration of a flow computation run
> > >>> in the same scenario drops to ~4s.
> > >>>
> > >>> This is really nice, however, I see some issues with flow removal (see below).
> > >>
> > >> Thanks for the test, Dumitru.
> > >>
> > >>>
> > >>>> ---
> > >>>>    controller/lflow.c  | 11 +++++++++
> > >>>>    controller/ofctrl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >>>>    controller/ofctrl.h |  6 +++++
> > >>>>    include/ovn/expr.h  |  2 ++
> > >>>>    lib/expr.c          |  6 +++++
> > >>>>    5 files changed, 94 insertions(+)
> > >>>>
> > >>>> diff --git a/controller/lflow.c b/controller/lflow.c
> > >>>> index 34b7c36a6..318066465 100644
> > >>>> --- a/controller/lflow.c
> > >>>> +++ b/controller/lflow.c
> > >>>> @@ -37,6 +37,13 @@
> > >>>>    VLOG_DEFINE_THIS_MODULE(lflow);
> > >>>>
> > >>>>    COVERAGE_DEFINE(lflow_run);
> > >>>> +
> > >>>> +/* Limit maximum conjunctions to a quarter of the max
> > >>>> + * resubmits. Unfortunatley, max resubmits is not publicly
> > >>>> + * defined in a header file, so if max resubmits is changed
> > >>>> + * from 4096, we have to make sure to update this as well
> > >>>> + */
> > >>>> +#define MAX_CONJUNCTIONS (4096 / 4)
> > >>>>
> > >>>>    /* Symbol table. */
> > >>>>
> > >>>> @@ -602,6 +609,10 @@ consider_logical_flow(
> > >>>>        struct expr *prereqs;
> > >>>>        char *error;
> > >>>>
> > >>>> +    uint32_t conj_worst_case;
> > >>>> +    conj_worst_case = flow_table->max_conj_width * flow_table->max_conj_depth;
> > >>>> +    expr_set_force_crossproduct(conj_worst_case >= MAX_CONJUNCTIONS);
> > >>>> +
> > >>>>        error = ovnacts_parse_string(lflow->actions, &pp, &ovnacts, &prereqs);
> > >>>>        if (error) {
> > >>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > >>>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> > >>>> index afb0036f1..2b2fde3c9 100644
> > >>>> --- a/controller/ofctrl.c
> > >>>> +++ b/controller/ofctrl.c
> > >>>> @@ -686,6 +686,64 @@ ofctrl_check_and_add_flow(struct ovn_desired_flow_table *flow_table,
> > >>>>                      f->uuid_hindex_node.hash);
> > >>>>    }
> > >>>>
> > >>>> +static void
> > >>>> +get_conjunction_dimensions(const struct ovn_flow *f, uint32_t *conj_width_p,
> > >>>> +                           uint32_t *conj_depth_p)
> > >>>> +{
> > >>>> +    struct ofpact *ofpact;
> > >>>> +    uint32_t conj_width = 0;
> > >>>> +    uint32_t conj_depth = 0;
> > >>>> +    OFPACT_FOR_EACH (ofpact, f->ofpacts, f->ofpacts_len) {
> > >>>> +        if (ofpact->type == OFPACT_CONJUNCTION) {
> > >>>> +            struct ofpact_conjunction *conj = ofpact_get_CONJUNCTION(ofpact);
> > >>>> +            if (conj->n_clauses > conj_depth) {
> > >>>> +                conj_depth = conj->n_clauses;
> > >>>> +            }
> > >>>> +            conj_width++;
> > >>>> +        }
> > >>>> +    }
> > >>>> +
> > >>>> +    *conj_width_p = conj_width;
> > >>>> +    *conj_depth_p = conj_depth;
> > >>>> +}
> > >>>> +
> > >>>> +static void
> > >>>> +adjust_conjunction_count(struct ovn_desired_flow_table *desired_flows,
> > >>>> +                 const struct ovn_flow *f, bool add)
> > >>>> +{
> > >>>> +    uint32_t conj_width;
> > >>>> +    uint32_t conj_depth;
> > >>>> +
> > >>>> +    get_conjunction_dimensions(f, &conj_width, &conj_depth);
> > >>>> +
> > >>>> +    if (add) {
> > >>>> +        if (conj_width > desired_flows->max_conj_width) {
> > >>>> +            desired_flows->max_conj_width = conj_width;
> > >>>> +        }
> > >>>> +        if (conj_depth > desired_flows->max_conj_depth) {
> > >>>> +            desired_flows->max_conj_depth = conj_depth;
> > >>>> +        }
> > >>>> +    } else if (desired_flows->max_conj_width <= conj_width ||
> > >>>> +               desired_flows->max_conj_depth <= conj_depth) {
> > >>>> +        /* Removing either the widest or deepest conjunction.
> > >>>> +         * Walk the table and recalculate maximums
> > >>>> +         */
> > >>>> +        struct ovn_flow *iter;
> > >>>> +        desired_flows->max_conj_width = 0;
> > >>>> +        desired_flows->max_conj_depth = 0;
> > >>>> +        HMAP_FOR_EACH (iter, match_hmap_node,
> > >>>> +                       &desired_flows->match_flow_table) {
> > >>>> +            get_conjunction_dimensions(iter, &conj_width, &conj_depth);
> > >>>> +            if (conj_width > desired_flows->max_conj_width) {
> > >>>> +                desired_flows->max_conj_width = conj_width;
> > >>>> +            }
> > >>>> +            if (conj_depth > desired_flows->max_conj_depth) {
> > >>>> +                desired_flows->max_conj_depth = conj_depth;
> > >>>> +            }
> > >>>> +        }
> > >>>
> > >>> I think this is quite expensive because we change single flow removal
> > >>> complexity from amortized O(1) to O(n).
> > >>>
> > >>> On the same setup as above if I continue testing and remove all
> > >>> logical switches from the NB database it will trigger all flows to be
> > >>> removed.
> > >>>
> > >>> In this scenario, with your series I see that two flow computation
> > >>> runs take up to 17s.
> > >>> On the other hand, if I do the deletion test without applying your
> > >>> third patch, flow computation runs never take longer than 5s.
> > >>>
> > >>> I agree that it's good to limit the number of conjunctive flows but I
> > >>> think we need to find a better way to track the max_conj_width/depth.
> > >>> I guess we could track max flow conj_width/depth by storing ovn_flows
> > >>> in a max-heap but that would change both flow add and remove
> > >>> operations' complexities to O(log(n)).
> > >>>
> > >>> Or do you think we can have a different heuristic for disabling
> > >>> conjunctive flows?
> > >>
> > >> Short version: we probably should just remove the limitation.
> > >>
> > >> Long version:
> > >>
> > >> My calculation method could stand to be improved.
> > >>
> > >> 1) The max_width * max_depth calculation assumes that the flow with the
> > >> max_width correlates with the conjunction with max depth. This may not
> > >> be the case.
> > >> 2) The max_width * max_depth calculation only accounts for one possible
> > >> conjunction's contribution to the resubmit count. If there are multiple
> > >> conjunctive match flows, we don't account for that.
> > >> 3) As you found, removing a flow requires an expensive traversal of the
> > >> desired flows table. There could be some ways to mitigate this, but no
> > >> matter what, the O(1) algorithm is toast.
> > >> 4) I more-or-less guessed what the upper bound should be before setting
> > >> force_crossproduct.
> > >>
> > >> So given all that, the proper things to do would be to either remove the
> > >> limitation checks or come up with a different calculation.
> > >>
> > >> When it comes to calculations, we can either perform a running
> > >> calculation (like I've done), or we can do a once-per-loop estimation
> > >> based on configuration.
> > >
> > > An alternative would be to do the current check periodically instead
> > > of at every flow addition/deletion. On the long run that would keep
> > > complexity to O(1) as we just walk the whole hashtable one more time
> > > on a timer. The disadvantage is that we might need to reinstall the
> > > existing conjunctive flows once we determine that the max_depth/width
> > > was reached.
> >
> > My main concern with doing a periodic recalculation is similar to what I
> > brought up with doing an all-at-once calculation on each loop. It's
> > difficult to create a hybrid flow table that has both conjunctions and
> > crossproducts. If you find that there are too many conjunctions during
> > the calculation, I'm not sure how you can install only a certain number
> > of conjunctions. I think it becomes all or nothing.
>
> Actually, what I meant was that if on the periodic htable run we
> figure out that the conjunction limit was reached in either direction
> we could call expr_set_force_crossproduct(true/false), then trigger a
> full recompute of the incremental processing engine
> (engine_set_force_recompute(true)) and poll_immediate_wake().
>
> We might need some hysteresis to avoid rapid changes between
> crossproduct and conjunction modes when we're close to the limit.
>
> Also some logs to inform the user that we switched to/from the
> "crossproduct mode" might be useful.
>
> Wouldn't that work?
>
> >
> > One idea I had is that we could only re-calculate on flow removal if the
> > current worst case for conjunctions is above our upper bound. If we
> > don't need to install crossproducts, and our worst case just got better,
> > then we don't need to spend the time recalculating. We obviously can
> > just use our previously calculated upper bound. The issue here is that
> > we might end up with a false positive for our worst case if we go a long
> > time without a full recalculation. So one way to avoid this might be to
> > recalculate on every, say, 10th removal no matter what. This counter
> > would reset each time we recalculate the worst case for conjunctions,
> > and it would reset each time the incremental engine does a full
> > recompute of the flow table.
> >
> > What do you think?
> >
> >
> > >
> > >>
> > >> The running calculation has the advantage of potentially being more
> > >> accurate. It also allows for us to enable force_crossproduct partway
> > >> through an ovn-controller loop. We can have some conjunctions, and some
> > >> crossproducts in the resulting flow table. The problem is that this
> > >> requires extra CPU time in order to keep track of the current
> > >> conjunction count. The amount can be better than what I've put forth,
> > >> but it's still going to be a downgrade from what it currently is.
> > >>
> > >> The one-time calculation might work by scanning the configured ACLs,
> > >> Address_Sets, and Port_Groups to make an educated guess at the worst
> > >> case for flow traversal. Unless we're willing to do some early parsing
> > >> of ACLs, this likely would be only a rough estimate for the worst case.
> > >> If we calculate that the number of potential conjunctions would exceed
> > >> our upper bound, then we likely would need to completely disable
> > >> conjunctions when calculating flows.
> > >>
> > >> The problem of the resubmit count is hypothetical. It's hard to know how
> > >> likely it is for real systems to exceed the max resubmit count. It's
> > >> also hard to estimate what the proper upper bound for conjunctions
> > >> should be. I think that it would take an extreme network hitting the
> > >> absolute worst cases to hit the resubmit limit. I think it's safe to
> > >> continue without the limitations. If we ever get reports about max
> > >> resubmit issues due to conjunctions, we can examine the individual
> > >> scenarios and try to formulate some good heuristics.
> > >
> > > This is also an option. And it's basically what OVN had before
> > > conjunctions were disabled.
> >
> > It's slightly different, though, since we risk more resubmits by having
> > multiple conjunction actions in the same flow.

Ack, my bad.

> >
> > I'm leaning towards doing this for this patch series. It may be worth
> > submitting the limitation calculation as a separate patch series since
> > it'll be super nice to get conjunctions reintroduced.

I agree that we should reenable conjunctions. I guess a safety check
for too many conjunctions can be added later if we can't find an easy
way to do it now.

> >
> > >
> > > Thanks,
> > > Dumitru
> > >
> > >>
> > >>>
> > >>> Thanks,
> > >>> Dumitru
> > >>>
> > >>
> > >>
> > >>
> > >>>> +    }
> > >>>> +}
> > >>>> +
> > >>>>    /* Removes a bundles of flows from the flow table. */
> > >>>>    void
> > >>>>    ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table,
> > >>>> @@ -697,6 +755,7 @@ ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table,
> > >>>>                                        &flow_table->uuid_flow_table) {
> > >>>>            if (uuid_equals(&f->sb_uuid, sb_uuid)) {
> > >>>>                ovn_flow_log(f, "ofctrl_remove_flow");
> > >>>> +            adjust_conjunction_count(flow_table, f, false);
> > >>>>                hmap_remove(&flow_table->match_flow_table,
> > >>>>                            &f->match_hmap_node);
> > >>>>                hindex_remove(&flow_table->uuid_flow_table, &f->uuid_hindex_node);
> > >>>> @@ -747,6 +806,12 @@ ofctrl_add_or_append_flow(struct ovn_desired_flow_table *desired_flows,
> > >>>>            existing->ofpacts = xmemdup(compound.data, compound.size);
> > >>>>            existing->ofpacts_len = compound.size;
> > >>>>
> > >>>> +        /* It's a bit of a cheat that we only call adjust_conjunction_count in
> > >>>> +         * this function and not in ofctrl_add(). However, we know that
> > >>>> +         * conjunctions are only ever added with this function
> > >>>> +         */
> > >>>> +        adjust_conjunction_count(desired_flows, existing, true);
> > >>>> +
> > >>>>            ovn_flow_destroy(f);
> > >>>>        } else {
> > >>>>            hmap_insert(&desired_flows->match_flow_table, &f->match_hmap_node,
> > >>>> @@ -862,6 +927,8 @@ ovn_desired_flow_table_init(struct ovn_desired_flow_table *flow_table)
> > >>>>    {
> > >>>>        hmap_init(&flow_table->match_flow_table);
> > >>>>        hindex_init(&flow_table->uuid_flow_table);
> > >>>> +    flow_table->max_conj_width = 0;
> > >>>> +    flow_table->max_conj_depth = 0;
> > >>>>    }
> > >>>>
> > >>>>    void
> > >>>> @@ -874,6 +941,8 @@ ovn_desired_flow_table_clear(struct ovn_desired_flow_table *flow_table)
> > >>>>            hindex_remove(&flow_table->uuid_flow_table, &f->uuid_hindex_node);
> > >>>>            ovn_flow_destroy(f);
> > >>>>        }
> > >>>> +    flow_table->max_conj_width = 0;
> > >>>> +    flow_table->max_conj_depth = 0;
> > >>>>    }
> > >>>>
> > >>>>    void
> > >>>> diff --git a/controller/ofctrl.h b/controller/ofctrl.h
> > >>>> index 21d2ce648..c295c04ed 100644
> > >>>> --- a/controller/ofctrl.h
> > >>>> +++ b/controller/ofctrl.h
> > >>>> @@ -37,6 +37,12 @@ struct ovn_desired_flow_table {
> > >>>>
> > >>>>        /* SB uuid index for the nodes in match_flow_table.*/
> > >>>>        struct hindex uuid_flow_table;
> > >>>> +
> > >>>> +    /* Highest known number of conjunction() actions in a single flow */
> > >>>> +    uint32_t max_conj_width;
> > >>>> +
> > >>>> +    /* Largest number of clauses for a conjunction in the table */
> > >>>> +    uint32_t max_conj_depth;
> > >>>>    };
> > >>>>
> > >>>>    /* Interface for OVN main loop. */
> > >>>> diff --git a/include/ovn/expr.h b/include/ovn/expr.h
> > >>>> index 22f633e57..714e21add 100644
> > >>>> --- a/include/ovn/expr.h
> > >>>> +++ b/include/ovn/expr.h
> > >>>> @@ -425,6 +425,8 @@ bool expr_evaluate(const struct expr *, const struct flow *uflow,
> > >>>>                       bool (*lookup_port)(const void *aux, const char *port_name,
> > >>>>                                           unsigned int *portp),
> > >>>>                       const void *aux);
> > >>>> +
> > >>>> +void expr_set_force_crossproduct(bool should_force);
> > >>>>
> > >>>>    /* Converting expressions to OpenFlow flows. */
> > >>>>
> > >>>> diff --git a/lib/expr.c b/lib/expr.c
> > >>>> index 71de61543..4c164caed 100644
> > >>>> --- a/lib/expr.c
> > >>>> +++ b/lib/expr.c
> > >>>> @@ -3278,6 +3278,12 @@ expr_evaluate(const struct expr *e, const struct flow *uflow,
> > >>>>            OVS_NOT_REACHED();
> > >>>>        }
> > >>>>    }
> > >>>> +
> > >>>> +void
> > >>>> +expr_set_force_crossproduct(bool should_force)
> > >>>> +{
> > >>>> +    force_crossproduct = should_force;
> > >>>> +}
> > >>>>
> > >>>>    /* Action parsing helper. */
> > >>>>
> > >>>> --
> > >>>> 2.14.5
> > >>>>
> > >>>> _______________________________________________
> > >>>> dev mailing list
> > >>>> dev at openvswitch.org
> > >>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >>
> >


More information about the dev mailing list