[ovs-dev] [PATCH ovn 2/2] ovn-controller: Cache logical flow expr tree for each lflow.
Numan Siddique
numans at ovn.org
Tue Jan 21 14:02:32 UTC 2020
On Tue, Jan 21, 2020 at 2:03 PM Han Zhou <zhouhan at gmail.com> wrote:
>
> On Thu, Jan 9, 2020 at 9:37 AM <numans at ovn.org> wrote:
> >
> > From: Numan Siddique <numans at ovn.org>
> >
> > This patch caches the logical flow expr tree for each logical flow. This
> > cache is stored as an hashmap in the output_flow engine data. When a
> logical
> > flow is deleted, the expr tree for that lflow is deleted in the
> > flow_output_sb_logical_flow_handler().
> >
> > Below are the details of the OVN resource in my setup
> >
> > No of logical switches - 49
> > No of logical ports - 1191
> > No of logical routers - 7
> > No of logical ports - 51
> > No of ACLs - 1221
> > No of Logical flows - 664736
> >
> > On a chassis hosting 7 distributed router ports and around 1090
> > port bindings, the no of OVS rules was 921162.
> >
> > Without this patch, the function add_logical_flows() took ~15 seconds.
> > And with this patch it took ~10 seconds. There is a good 34% improvement
> > in logical flow processing.
>
> Great improvement! Thanks Numan. Please see my comments below.
>
Thanks for the review. v3 is on its way. PSB for few comments.
> >
> > Signed-off-by: Numan Siddique <numans at ovn.org>
> > ---
> > controller/lflow.c | 182 ++++++++++++++++++++++++++----------
> > controller/lflow.h | 9 +-
> > controller/ovn-controller.c | 17 +++-
> > 3 files changed, 154 insertions(+), 54 deletions(-)
> >
> > diff --git a/controller/lflow.c b/controller/lflow.c
> > index 93ec53a1c..be46f0661 100644
> > --- a/controller/lflow.c
> > +++ b/controller/lflow.c
> > @@ -79,7 +79,9 @@ static bool consider_logical_flow(
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > - uint32_t *conj_id_ofs);
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache,
> > + bool delete_expr_from_cache);
>
> It was a convention that output args are at the end of the list (Ben took a
> lot of effort to make it this way when we were implementing incremental
> processing), so it would be better to move the last one
> "delete_expr_from_cache" to the position before ovn_desired_flow_table.
Ack.
>
> >
> > static bool
> > lookup_port_cb(const void *aux_, const char *port_name, unsigned int
> *portp)
> > @@ -255,6 +257,59 @@ lflow_resource_destroy_lflow(struct
> lflow_resource_ref *lfrr,
> > free(lfrn);
> > }
> >
> > +struct lflow_expr {
> > + struct hmap_node node;
> > + struct uuid lflow_uuid; /* key */
> > + struct expr *expr;
> > + char *lflow_match;
> > +};
> > +
> > +static void
> > +lflow_expr_add(struct hmap *lflow_expr_cache,
> > + const struct sbrec_logical_flow *lflow,
> > + struct expr *lflow_expr)
> > +{
> > + struct lflow_expr *le = xmalloc(sizeof *le);
> > + le->lflow_uuid = lflow->header_.uuid;
> > + le->expr = lflow_expr;
> > + le->lflow_match = xstrdup(lflow->match);
> > + hmap_insert(lflow_expr_cache, &le->node, uuid_hash(&le->lflow_uuid));
> > +}
> > +
> > +static struct lflow_expr *
> > +lflow_expr_get(struct hmap *lflow_expr_cache,
> > + const struct sbrec_logical_flow *lflow)
> > +{
> > + struct lflow_expr *le;
> > + size_t hash = uuid_hash(&lflow->header_.uuid);
> > + HMAP_FOR_EACH_WITH_HASH (le, node, hash, lflow_expr_cache) {
> > + if (!strcmp(lflow->match, le->lflow_match)) {
>
> I think here we should compare lflow uuid first to make sure we found the
> original lflow, and then compare lflow_match to make sure the match didn't
> change. If the lflow uuid matched but match string changed, it means the
> cache is no longer valid and we should delete it from the cache.
> Otherwise, if a lflow in SB is updated on the match field, it will end up
> with unused entries in the cache forever, unless the same lflow's match
> condition changed back.
> In fact, the case that a lflow's match condition changes may never happen
> at all considering northd's logic (correct me if I am wrong), and if this
> is true, then I don't think we need to compare the lflow->match at all, and
> then we don't need the lflow_match in the struct lflow_expr.
> In either case, it seems this code need some change.
I agree with you. Infact, I think matching the lflow uuid is just
sufficient. I confirmed the
ovn-northd logic. When ovn-northd computes a flow and this flow is
changed a bit (like match condition)
from the flow in the SB DB logical_flow table, ovn-northd deletes the
existing flow in SB DB and adds
a new one.
https://github.com/ovn-org/ovn/blob/master/northd/ovn-northd.c#L9471
https://github.com/ovn-org/ovn/blob/master/northd/ovn-northd.c#L3718
>
> > + return le;
> > + }
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > +static void
> > +lflow_expr_delete(struct hmap *lflow_expr_cache, struct lflow_expr *le)
> > +{
> > + hmap_remove(lflow_expr_cache, &le->node);
> > + free(le->lflow_match);
> > + expr_destroy(le->expr);
> > + free(le);
> > +}
> > +
> > +void
> > +lflow_expr_destroy(struct hmap *lflow_expr_cache)
> > +{
> > + struct lflow_expr *le;
> > + HMAP_FOR_EACH_POP (le, node, lflow_expr_cache) {
> > + free(le->lflow_match);
>
> I didn't see le->expr being destroyed. It seems memory leak here?
Nice catch. Thanks.
>
> > + free(le);
> > + }
>
> It may be better to destroy the hmap in this function instead of calling
> hmap_destroy separately by the caller.
Ack. Will do.
>
> > +}
> > +
> > /* Adds the logical flows from the Logical_Flow table to flow tables. */
> > static void
> > add_logical_flows(
> > @@ -273,7 +328,8 @@ add_logical_flows(
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > - uint32_t *conj_id_ofs)
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache)
> > {
> > const struct sbrec_logical_flow *lflow;
> >
> > @@ -308,7 +364,8 @@ add_logical_flows(
> > addr_sets, port_groups,
> > active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table,
> > - lfrr, conj_id_ofs)) {
> > + lfrr, conj_id_ofs, lflow_expr_cache,
> > + false)) {
> > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
> 5);
> > VLOG_ERR_RL(&rl, "Conjunction id overflow when processing
> lflow "
> > UUID_FMT, UUID_ARGS(&lflow->header_.uuid));
> > @@ -338,7 +395,8 @@ lflow_handle_changed_flows(
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > - uint32_t *conj_id_ofs)
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache)
> > {
> > bool ret = true;
> > const struct sbrec_logical_flow *lflow;
> > @@ -373,6 +431,12 @@ lflow_handle_changed_flows(
> > ofctrl_remove_flows(flow_table, &lflow->header_.uuid);
> > /* Delete entries from lflow resource reference. */
> > lflow_resource_destroy_lflow(lfrr, &lflow->header_.uuid);
> > +
> > + /* Delete the expr cache for this lflow. */
> > + struct lflow_expr *le = lflow_expr_get(lflow_expr_cache,
> lflow);
> > + if (le) {
> > + lflow_expr_delete(lflow_expr_cache, le);
> > + }
> > }
> > }
> >
> > @@ -401,7 +465,8 @@ lflow_handle_changed_flows(
> > addr_sets, port_groups,
> > active_tunnels, local_lport_ids,
> > flow_table, group_table,
> meter_table,
> > - lfrr, conj_id_ofs)) {
> > + lfrr, conj_id_ofs,
> lflow_expr_cache,
> > + false)) {
>
> Here I think "delete_expr_from_cache" should be true, because we know the
> lflow is either being update or new.
> If a lflow's match string is updated, this code would leave the old
> lflow_expr in the hash table forever, because lflow_expr_get() would not
> find it at all.
> Since we are handling only the changed lflows (incremental processing)
> here, the performance gain of reusing the cached the expr is not important
> in this case, so I think it is better just use "true".
Agree. Also whenever a logical_flow table change happens, either a row
is added or deleted, it is
never updated, so it can't hit the cache.
>
> > ret = false;
> > break;
> > }
> > @@ -434,6 +499,7 @@ lflow_handle_changed_ref(
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache,
> > bool *changed)
> > {
> > struct ref_lflow_node *rlfn =
> ref_lflow_lookup(&lfrr->ref_lflow_table,
> > @@ -505,7 +571,8 @@ lflow_handle_changed_ref(
> > addr_sets, port_groups,
> > active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table,
> > - lfrr, conj_id_ofs)) {
> > + lfrr, conj_id_ofs, lflow_expr_cache,
> > + true)) {
> > ret = false;
> > break;
> > }
> > @@ -555,7 +622,9 @@ consider_logical_flow(
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > - uint32_t *conj_id_ofs)
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache,
> > + bool delete_expr_from_cache)
> > {
> > /* Determine translation of logical table IDs to physical table IDs.
> */
> > bool ingress = !strcmp(lflow->pipeline, "ingress");
> > @@ -613,59 +682,77 @@ consider_logical_flow(
> >
> > /* Translate OVN match into table of OpenFlow matches. */
> > struct hmap matches;
> > - struct expr *expr;
> > + struct expr *expr = NULL;
> >
> > struct sset addr_sets_ref = SSET_INITIALIZER(&addr_sets_ref);
> > struct sset port_groups_ref = SSET_INITIALIZER(&port_groups_ref);
> > - expr = expr_parse_string(lflow->match, &symtab, addr_sets,
> port_groups,
> > - &addr_sets_ref, &port_groups_ref, &error);
> > - const char *addr_set_name;
> > - SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> > - lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> > - &lflow->header_.uuid);
> > - }
> > - const char *port_group_name;
> > - SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> > - lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> > - &lflow->header_.uuid);
> > - }
> > - sset_destroy(&addr_sets_ref);
> > - sset_destroy(&port_groups_ref);
> > -
> > - if (!error) {
> > - if (prereqs) {
> > - expr = expr_combine(EXPR_T_AND, expr, prereqs);
> > - prereqs = NULL;
> > + struct lflow_expr *le = lflow_expr_get(lflow_expr_cache, lflow);
> > + if (le) {
> > + if (delete_expr_from_cache) {
> > + lflow_expr_delete(lflow_expr_cache, le);
> > + } else {
> > + expr = le->expr;
> > }
> > - expr = expr_annotate(expr, &symtab, &error);
> > }
> > - if (error) {
> > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > - VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> > - lflow->match, error);
> > - expr_destroy(prereqs);
> > - free(error);
> > - ovnacts_free(ovnacts.data, ovnacts.size);
> > - ofpbuf_uninit(&ovnacts);
> > - return true;
> > +
> > + if (!expr) {
> > + expr = expr_parse_string(lflow->match, &symtab, addr_sets,
> port_groups,
> > + &addr_sets_ref, &port_groups_ref,
> &error);
> > + const char *addr_set_name;
> > + SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> > + lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> > + &lflow->header_.uuid);
> > + }
> > + const char *port_group_name;
> > + SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> > + lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> > + &lflow->header_.uuid);
> > + }
> > + sset_destroy(&addr_sets_ref);
> > + sset_destroy(&port_groups_ref);
> > +
> > + if (!error) {
> > + if (prereqs) {
> > + expr = expr_combine(EXPR_T_AND, expr, prereqs);
> > + prereqs = NULL;
> > + }
> > + expr = expr_annotate(expr, &symtab, &error);
> > + }
> > + if (error) {
> > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
> 1);
> > + VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> > + lflow->match, error);
> > + expr_destroy(prereqs);
> > + free(error);
> > + ovnacts_free(ovnacts.data, ovnacts.size);
> > + ofpbuf_uninit(&ovnacts);
> > + return true;
> > + }
> > +
> > + expr = expr_simplify(expr);
> > + expr = expr_normalize(expr);
> > +
> > + lflow_expr_add(lflow_expr_cache, lflow, expr);
> > }
> >
> > - struct lookup_port_aux aux = {
> > - .sbrec_multicast_group_by_name_datapath
> > - = sbrec_multicast_group_by_name_datapath,
> > - .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> > - .dp = lflow->logical_datapath
> > - };
> > struct condition_aux cond_aux = {
> > .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> > .chassis = chassis,
> > .active_tunnels = active_tunnels,
> > };
> > - expr = expr_simplify(expr);
> > - expr = expr_normalize(expr);
> > +
> > + expr = expr_clone(expr);
> > expr = expr_evaluate_condition(expr, is_chassis_resident_cb,
> &cond_aux);
> > +
> > + struct lookup_port_aux aux = {
> > + .sbrec_multicast_group_by_name_datapath
> > + = sbrec_multicast_group_by_name_datapath,
> > + .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> > + .dp = lflow->logical_datapath
> > + };
> > uint32_t n_conjs = expr_to_matches(expr, lookup_port_cb, &aux,
> > &matches);
> > +
> > expr_destroy(expr);
> >
> > if (hmap_is_empty(&matches)) {
> > @@ -907,7 +994,8 @@ lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *lfrr,
> > - uint32_t *conj_id_ofs)
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache)
> > {
> > COVERAGE_INC(lflow_run);
> >
> > @@ -916,7 +1004,7 @@ lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> > dhcpv6_options_table, logical_flow_table,
> > local_datapaths, chassis, addr_sets, port_groups,
> > active_tunnels, local_lport_ids, flow_table,
> group_table,
> > - meter_table, lfrr, conj_id_ofs);
> > + meter_table, lfrr, conj_id_ofs, lflow_expr_cache);
> > add_neighbor_flows(sbrec_port_binding_by_name, mac_binding_table,
> > flow_table);
> > }
> > diff --git a/controller/lflow.h b/controller/lflow.h
> > index abdc55e96..a2fa087f8 100644
> > --- a/controller/lflow.h
> > +++ b/controller/lflow.h
> > @@ -132,7 +132,8 @@ void lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *,
> > - uint32_t *conj_id_ofs);
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache);
> >
> > bool lflow_handle_changed_flows(
> > struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
> > @@ -150,7 +151,8 @@ bool lflow_handle_changed_flows(
> > struct ovn_extend_table *group_table,
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *,
> > - uint32_t *conj_id_ofs);
> > + uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache);
> >
> > bool lflow_handle_changed_ref(
> > enum ref_type,
> > @@ -171,6 +173,7 @@ bool lflow_handle_changed_ref(
> > struct ovn_extend_table *meter_table,
> > struct lflow_resource_ref *,
> > uint32_t *conj_id_ofs,
> > + struct hmap *lflow_expr_cache,
> > bool *changed);
> >
> > void lflow_handle_changed_neighbors(
> > @@ -180,4 +183,6 @@ void lflow_handle_changed_neighbors(
> >
> > void lflow_destroy(void);
> >
> > +void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> > +
> > #endif /* controller/lflow.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 17744d416..ca073438f 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1211,6 +1211,9 @@ struct ed_type_flow_output {
> > uint32_t conj_id_ofs;
> > /* lflow resource cross reference */
> > struct lflow_resource_ref lflow_resource_ref;
> > +
> > + /* Cache of lflow expr tree. */
> > + struct hmap lflow_expr_cache;
> > };
> >
> > static void *
> > @@ -1224,6 +1227,7 @@ en_flow_output_init(struct engine_node *node
> OVS_UNUSED,
> > ovn_extend_table_init(&data->meter_table);
> > data->conj_id_ofs = 1;
> > lflow_resource_init(&data->lflow_resource_ref);
> > + hmap_init(&data->lflow_expr_cache);
> > return data;
> > }
> >
> > @@ -1235,6 +1239,8 @@ en_flow_output_cleanup(void *data)
> > ovn_extend_table_destroy(&flow_output_data->group_table);
> > ovn_extend_table_destroy(&flow_output_data->meter_table);
> > lflow_resource_destroy(&flow_output_data->lflow_resource_ref);
> > + lflow_expr_destroy(&flow_output_data->lflow_expr_cache);
> > + hmap_destroy(&flow_output_data->lflow_expr_cache);
> > }
> >
> > static void
> > @@ -1335,7 +1341,8 @@ en_flow_output_run(struct engine_node *node, void
> *data)
> > chassis, local_datapaths, addr_sets,
> > port_groups, active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table, lfrr,
> > - conj_id_ofs);
> > + conj_id_ofs,
> > + &fo->lflow_expr_cache);
> >
> > struct sbrec_multicast_group_table *multicast_group_table =
> > (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
> > @@ -1436,7 +1443,7 @@ flow_output_sb_logical_flow_handler(struct
> engine_node *node, void *data)
> > local_datapaths, chassis, addr_sets,
> > port_groups, active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table, lfrr,
> > - conj_id_ofs);
> > + conj_id_ofs, &fo->lflow_expr_cache);
> >
> > engine_set_node_state(node, EN_UPDATED);
> > return handled;
> > @@ -1721,7 +1728,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> > local_datapaths, chassis, addr_sets,
> > port_groups, active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table, lfrr,
> > - conj_id_ofs, &changed)) {
> > + conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> > return false;
> > }
> > if (changed) {
> > @@ -1736,7 +1743,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> > local_datapaths, chassis, addr_sets,
> > port_groups, active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table, lfrr,
> > - conj_id_ofs, &changed)) {
> > + conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> > return false;
> > }
> > if (changed) {
> > @@ -1751,7 +1758,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> > local_datapaths, chassis, addr_sets,
> > port_groups, active_tunnels, local_lport_ids,
> > flow_table, group_table, meter_table, lfrr,
> > - conj_id_ofs, &changed)) {
> > + conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> > return false;
> > }
> > if (changed) {
> > --
> > 2.24.1
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
More information about the dev
mailing list