[ovs-dev] [PATCH 3/3] Limit the number of generated conjunctions.

Mark Michelson mmichels at redhat.com
Mon Oct 28 16:07:35 UTC 2019


On 10/28/19 10:37 AM, Dumitru Ceara wrote:
> On Mon, Oct 28, 2019 at 1:46 PM Mark Michelson <mmichels at redhat.com> wrote:
>>
>> On 10/28/19 7:46 AM, Dumitru Ceara wrote:
>>> On Fri, Oct 25, 2019 at 11:07 PM Mark Michelson <mmichels at redhat.com> wrote:
>>>>
>>>> There is a maximum number of resubmits that can occur during packet
>>>> processing. Deliberately creating a flow table that might cross this
>>>> threshold is irresponsible.
>>>>
>>>> This commit causes the ofctrl code to track the maximum width and depth
>>>> of conjunctions in the desired flow table. By multiplying them, we have
>>>> a possible worst case for number of resubmits required. If this number
>>>> exceeds a quarter of the maximum resubmits allowed, then we fall back to
>>>> forcing crossproducts.
>>>>
>>>> The resulting flow table can end up being a mixture of conjunction and
>>>> non-conjunction flows if the limit is exceeded.
>>>>
>>>> Signed-off-by: Mark Michelson <mmichels at redhat.com>
>>>
>>> Hi Mark,
>>>
>>> I've been testing the series on a setup with:
>>> - 100 logical routers connected to a logical gateway router
>>> - 100 logical switches (each connected to a logical router)
>>> - 200 VIFs (2 per logical switch)
>>> - 2 NAT rules on each router
>>>
>>> I bound all VIFs to OVS internal interfaces on the machine when
>>> ovn-northd is running.
>>>
>>> If I start ovn-controller on the same node, without your series I see
>>> ovn-controller taking more than 100s for a single flow computation
>>> run.
>>>
>>> If I apply your series, the maximum duration of a flow computation run
>>> in the same scenario drops to ~4s.
>>>
>>> This is really nice, however, I see some issues with flow removal (see below).
>>
>> Thanks for the test, Dumitru.
>>
>>>
>>>> ---
>>>>    controller/lflow.c  | 11 +++++++++
>>>>    controller/ofctrl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    controller/ofctrl.h |  6 +++++
>>>>    include/ovn/expr.h  |  2 ++
>>>>    lib/expr.c          |  6 +++++
>>>>    5 files changed, 94 insertions(+)
>>>>
>>>> diff --git a/controller/lflow.c b/controller/lflow.c
>>>> index 34b7c36a6..318066465 100644
>>>> --- a/controller/lflow.c
>>>> +++ b/controller/lflow.c
>>>> @@ -37,6 +37,13 @@
>>>>    VLOG_DEFINE_THIS_MODULE(lflow);
>>>>
>>>>    COVERAGE_DEFINE(lflow_run);
>>>> +
>>>> +/* Limit maximum conjunctions to a quarter of the max
>>>> + * resubmits. Unfortunatley, max resubmits is not publicly
>>>> + * defined in a header file, so if max resubmits is changed
>>>> + * from 4096, we have to make sure to update this as well
>>>> + */
>>>> +#define MAX_CONJUNCTIONS (4096 / 4)
>>>>
>>>>    /* Symbol table. */
>>>>
>>>> @@ -602,6 +609,10 @@ consider_logical_flow(
>>>>        struct expr *prereqs;
>>>>        char *error;
>>>>
>>>> +    uint32_t conj_worst_case;
>>>> +    conj_worst_case = flow_table->max_conj_width * flow_table->max_conj_depth;
>>>> +    expr_set_force_crossproduct(conj_worst_case >= MAX_CONJUNCTIONS);
>>>> +
>>>>        error = ovnacts_parse_string(lflow->actions, &pp, &ovnacts, &prereqs);
>>>>        if (error) {
>>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
>>>> index afb0036f1..2b2fde3c9 100644
>>>> --- a/controller/ofctrl.c
>>>> +++ b/controller/ofctrl.c
>>>> @@ -686,6 +686,64 @@ ofctrl_check_and_add_flow(struct ovn_desired_flow_table *flow_table,
>>>>                      f->uuid_hindex_node.hash);
>>>>    }
>>>>
>>>> +static void
>>>> +get_conjunction_dimensions(const struct ovn_flow *f, uint32_t *conj_width_p,
>>>> +                           uint32_t *conj_depth_p)
>>>> +{
>>>> +    struct ofpact *ofpact;
>>>> +    uint32_t conj_width = 0;
>>>> +    uint32_t conj_depth = 0;
>>>> +    OFPACT_FOR_EACH (ofpact, f->ofpacts, f->ofpacts_len) {
>>>> +        if (ofpact->type == OFPACT_CONJUNCTION) {
>>>> +            struct ofpact_conjunction *conj = ofpact_get_CONJUNCTION(ofpact);
>>>> +            if (conj->n_clauses > conj_depth) {
>>>> +                conj_depth = conj->n_clauses;
>>>> +            }
>>>> +            conj_width++;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    *conj_width_p = conj_width;
>>>> +    *conj_depth_p = conj_depth;
>>>> +}
>>>> +
>>>> +static void
>>>> +adjust_conjunction_count(struct ovn_desired_flow_table *desired_flows,
>>>> +                 const struct ovn_flow *f, bool add)
>>>> +{
>>>> +    uint32_t conj_width;
>>>> +    uint32_t conj_depth;
>>>> +
>>>> +    get_conjunction_dimensions(f, &conj_width, &conj_depth);
>>>> +
>>>> +    if (add) {
>>>> +        if (conj_width > desired_flows->max_conj_width) {
>>>> +            desired_flows->max_conj_width = conj_width;
>>>> +        }
>>>> +        if (conj_depth > desired_flows->max_conj_depth) {
>>>> +            desired_flows->max_conj_depth = conj_depth;
>>>> +        }
>>>> +    } else if (desired_flows->max_conj_width <= conj_width ||
>>>> +               desired_flows->max_conj_depth <= conj_depth) {
>>>> +        /* Removing either the widest or deepest conjunction.
>>>> +         * Walk the table and recalculate maximums
>>>> +         */
>>>> +        struct ovn_flow *iter;
>>>> +        desired_flows->max_conj_width = 0;
>>>> +        desired_flows->max_conj_depth = 0;
>>>> +        HMAP_FOR_EACH (iter, match_hmap_node,
>>>> +                       &desired_flows->match_flow_table) {
>>>> +            get_conjunction_dimensions(iter, &conj_width, &conj_depth);
>>>> +            if (conj_width > desired_flows->max_conj_width) {
>>>> +                desired_flows->max_conj_width = conj_width;
>>>> +            }
>>>> +            if (conj_depth > desired_flows->max_conj_depth) {
>>>> +                desired_flows->max_conj_depth = conj_depth;
>>>> +            }
>>>> +        }
>>>
>>> I think this is quite expensive because we change single flow removal
>>> complexity from amortized O(1) to O(n).
>>>
>>> On the same setup as above if I continue testing and remove all
>>> logical switches from the NB database it will trigger all flows to be
>>> removed.
>>>
>>> In this scenario, with your series I see that two flow computation
>>> runs take up to 17s.
>>> On the other hand, if I do the deletion test without applying your
>>> third patch, flow computation runs never take longer than 5s.
>>>
>>> I agree that it's good to limit the number of conjunctive flows but I
>>> think we need to find a better way to track the max_conj_width/depth.
>>> I guess we could track max flow conj_width/depth by storing ovn_flows
>>> in a max-heap but that would change both flow add and remove
>>> operations' complexities to O(log(n)).
>>>
>>> Or do you think we can have a different heuristic for disabling
>>> conjunctive flows?
>>
>> Short version: we probably should just remove the limitation.
>>
>> Long version:
>>
>> My calculation method could stand to be improved.
>>
>> 1) The max_width * max_depth calculation assumes that the flow with the
>> max_width correlates with the conjunction with max depth. This may not
>> be the case.
>> 2) The max_width * max_depth calculation only accounts for one possible
>> conjunction's contribution to the resubmit count. If there are multiple
>> conjunctive match flows, we don't account for that.
>> 3) As you found, removing a flow requires an expensive traversal of the
>> desired flows table. There could be some ways to mitigate this, but no
>> matter what, the O(1) algorithm is toast.
>> 4) I more-or-less guessed what the upper bound should be before setting
>> force_crossproduct.
>>
>> So given all that, the proper things to do would be to either remove the
>> limitation checks or come up with a different calculation.
>>
>> When it comes to calculations, we can either perform a running
>> calculation (like I've done), or we can do a once-per-loop estimation
>> based on configuration.
> 
> An alternative would be to do the current check periodically instead
> of at every flow addition/deletion. On the long run that would keep
> complexity to O(1) as we just walk the whole hashtable one more time
> on a timer. The disadvantage is that we might need to reinstall the
> existing conjunctive flows once we determine that the max_depth/width
> was reached.

My main concern with doing a periodic recalculation is similar to what I 
brought up with doing an all-at-once calculation on each loop. It's 
difficult to create a hybrid flow table that has both conjunctions and 
crossproducts. If you find that there are too many conjunctions during 
the calculation, I'm not sure how you can install only a certain number 
of conjunctions. I think it becomes all or nothing.

One idea I had is that we could only re-calculate on flow removal if the 
current worst case for conjunctions is above our upper bound. If we 
don't need to install crossproducts, and our worst case just got better, 
then we don't need to spend the time recalculating. We obviously can 
just use our previously calculated upper bound. The issue here is that 
we might end up with a false positive for our worst case if we go a long 
time without a full recalculation. So one way to avoid this might be to 
recalculate on every, say, 10th removal no matter what. This counter 
would reset each time we recalculate the worst case for conjunctions, 
and it would reset each time the incremental engine does a full 
recompute of the flow table.

What do you think?


> 
>>
>> The running calculation has the advantage of potentially being more
>> accurate. It also allows for us to enable force_crossproduct partway
>> through an ovn-controller loop. We can have some conjunctions, and some
>> crossproducts in the resulting flow table. The problem is that this
>> requires extra CPU time in order to keep track of the current
>> conjunction count. The amount can be better than what I've put forth,
>> but it's still going to be a downgrade from what it currently is.
>>
>> The one-time calculation might work by scanning the configured ACLs,
>> Address_Sets, and Port_Groups to make an educated guess at the worst
>> case for flow traversal. Unless we're willing to do some early parsing
>> of ACLs, this likely would be only a rough estimate for the worst case.
>> If we calculate that the number of potential conjunctions would exceed
>> our upper bound, then we likely would need to completely disable
>> conjunctions when calculating flows.
>>
>> The problem of the resubmit count is hypothetical. It's hard to know how
>> likely it is for real systems to exceed the max resubmit count. It's
>> also hard to estimate what the proper upper bound for conjunctions
>> should be. I think that it would take an extreme network hitting the
>> absolute worst cases to hit the resubmit limit. I think it's safe to
>> continue without the limitations. If we ever get reports about max
>> resubmit issues due to conjunctions, we can examine the individual
>> scenarios and try to formulate some good heuristics.
> 
> This is also an option. And it's basically what OVN had before
> conjunctions were disabled.

It's slightly different, though, since we risk more resubmits by having 
multiple conjunction actions in the same flow.

I'm leaning towards doing this for this patch series. It may be worth 
submitting the limitation calculation as a separate patch series since 
it'll be super nice to get conjunctions reintroduced.

> 
> Thanks,
> Dumitru
> 
>>
>>>
>>> Thanks,
>>> Dumitru
>>>
>>
>>
>>
>>>> +    }
>>>> +}
>>>> +
>>>>    /* Removes a bundles of flows from the flow table. */
>>>>    void
>>>>    ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table,
>>>> @@ -697,6 +755,7 @@ ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table,
>>>>                                        &flow_table->uuid_flow_table) {
>>>>            if (uuid_equals(&f->sb_uuid, sb_uuid)) {
>>>>                ovn_flow_log(f, "ofctrl_remove_flow");
>>>> +            adjust_conjunction_count(flow_table, f, false);
>>>>                hmap_remove(&flow_table->match_flow_table,
>>>>                            &f->match_hmap_node);
>>>>                hindex_remove(&flow_table->uuid_flow_table, &f->uuid_hindex_node);
>>>> @@ -747,6 +806,12 @@ ofctrl_add_or_append_flow(struct ovn_desired_flow_table *desired_flows,
>>>>            existing->ofpacts = xmemdup(compound.data, compound.size);
>>>>            existing->ofpacts_len = compound.size;
>>>>
>>>> +        /* It's a bit of a cheat that we only call adjust_conjunction_count in
>>>> +         * this function and not in ofctrl_add(). However, we know that
>>>> +         * conjunctions are only ever added with this function
>>>> +         */
>>>> +        adjust_conjunction_count(desired_flows, existing, true);
>>>> +
>>>>            ovn_flow_destroy(f);
>>>>        } else {
>>>>            hmap_insert(&desired_flows->match_flow_table, &f->match_hmap_node,
>>>> @@ -862,6 +927,8 @@ ovn_desired_flow_table_init(struct ovn_desired_flow_table *flow_table)
>>>>    {
>>>>        hmap_init(&flow_table->match_flow_table);
>>>>        hindex_init(&flow_table->uuid_flow_table);
>>>> +    flow_table->max_conj_width = 0;
>>>> +    flow_table->max_conj_depth = 0;
>>>>    }
>>>>
>>>>    void
>>>> @@ -874,6 +941,8 @@ ovn_desired_flow_table_clear(struct ovn_desired_flow_table *flow_table)
>>>>            hindex_remove(&flow_table->uuid_flow_table, &f->uuid_hindex_node);
>>>>            ovn_flow_destroy(f);
>>>>        }
>>>> +    flow_table->max_conj_width = 0;
>>>> +    flow_table->max_conj_depth = 0;
>>>>    }
>>>>
>>>>    void
>>>> diff --git a/controller/ofctrl.h b/controller/ofctrl.h
>>>> index 21d2ce648..c295c04ed 100644
>>>> --- a/controller/ofctrl.h
>>>> +++ b/controller/ofctrl.h
>>>> @@ -37,6 +37,12 @@ struct ovn_desired_flow_table {
>>>>
>>>>        /* SB uuid index for the nodes in match_flow_table.*/
>>>>        struct hindex uuid_flow_table;
>>>> +
>>>> +    /* Highest known number of conjunction() actions in a single flow */
>>>> +    uint32_t max_conj_width;
>>>> +
>>>> +    /* Largest number of clauses for a conjunction in the table */
>>>> +    uint32_t max_conj_depth;
>>>>    };
>>>>
>>>>    /* Interface for OVN main loop. */
>>>> diff --git a/include/ovn/expr.h b/include/ovn/expr.h
>>>> index 22f633e57..714e21add 100644
>>>> --- a/include/ovn/expr.h
>>>> +++ b/include/ovn/expr.h
>>>> @@ -425,6 +425,8 @@ bool expr_evaluate(const struct expr *, const struct flow *uflow,
>>>>                       bool (*lookup_port)(const void *aux, const char *port_name,
>>>>                                           unsigned int *portp),
>>>>                       const void *aux);
>>>> +
>>>> +void expr_set_force_crossproduct(bool should_force);
>>>>
>>>>    /* Converting expressions to OpenFlow flows. */
>>>>
>>>> diff --git a/lib/expr.c b/lib/expr.c
>>>> index 71de61543..4c164caed 100644
>>>> --- a/lib/expr.c
>>>> +++ b/lib/expr.c
>>>> @@ -3278,6 +3278,12 @@ expr_evaluate(const struct expr *e, const struct flow *uflow,
>>>>            OVS_NOT_REACHED();
>>>>        }
>>>>    }
>>>> +
>>>> +void
>>>> +expr_set_force_crossproduct(bool should_force)
>>>> +{
>>>> +    force_crossproduct = should_force;
>>>> +}
>>>>
>>>>    /* Action parsing helper. */
>>>>
>>>> --
>>>> 2.14.5
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev at openvswitch.org
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>



More information about the dev mailing list