[ovs-dev] [PATCH ovn 2/2] ovn-controller: Cache logical flow expr tree for each lflow.

Numan Siddique numans at ovn.org
Tue Jan 21 14:02:32 UTC 2020


On Tue, Jan 21, 2020 at 2:03 PM Han Zhou <zhouhan at gmail.com> wrote:
>
> On Thu, Jan 9, 2020 at 9:37 AM <numans at ovn.org> wrote:
> >
> > From: Numan Siddique <numans at ovn.org>
> >
> > This patch caches the logical flow expr tree for each logical flow. This
> > cache is stored as an hashmap in the output_flow engine data. When a
> logical
> > flow is deleted, the expr tree for that lflow is deleted in the
> > flow_output_sb_logical_flow_handler().
> >
> > Below are the details of the OVN resource in my setup
> >
> > No of logical switches - 49
> > No of logical ports    - 1191
> > No of logical routers  - 7
> > No of logical ports    - 51
> > No of ACLs             - 1221
> > No of Logical flows    - 664736
> >
> > On a chassis hosting 7 distributed router ports and around 1090
> > port bindings, the no of OVS rules was 921162.
> >
> > Without this patch, the function add_logical_flows() took ~15 seconds.
> > And with this patch it took ~10 seconds. There is a good 34% improvement
> > in logical flow processing.
>
> Great improvement! Thanks Numan. Please see my comments below.
>

Thanks for the review. v3 is on its way. PSB for few comments.

> >
> > Signed-off-by: Numan Siddique <numans at ovn.org>
> > ---
> >  controller/lflow.c          | 182 ++++++++++++++++++++++++++----------
> >  controller/lflow.h          |   9 +-
> >  controller/ovn-controller.c |  17 +++-
> >  3 files changed, 154 insertions(+), 54 deletions(-)
> >
> > diff --git a/controller/lflow.c b/controller/lflow.c
> > index 93ec53a1c..be46f0661 100644
> > --- a/controller/lflow.c
> > +++ b/controller/lflow.c
> > @@ -79,7 +79,9 @@ static bool consider_logical_flow(
> >      struct ovn_extend_table *group_table,
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *lfrr,
> > -    uint32_t *conj_id_ofs);
> > +    uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache,
> > +    bool delete_expr_from_cache);
>
> It was a convention that output args are at the end of the list (Ben took a
> lot of effort to make it this way when we were implementing incremental
> processing), so it would be better to move the last one
> "delete_expr_from_cache" to the position before ovn_desired_flow_table.

Ack.


>
> >
> >  static bool
> >  lookup_port_cb(const void *aux_, const char *port_name, unsigned int
> *portp)
> > @@ -255,6 +257,59 @@ lflow_resource_destroy_lflow(struct
> lflow_resource_ref *lfrr,
> >      free(lfrn);
> >  }
> >
> > +struct lflow_expr {
> > +    struct hmap_node node;
> > +    struct uuid lflow_uuid; /* key */
> > +    struct expr *expr;
> > +    char *lflow_match;
> > +};
> > +
> > +static void
> > +lflow_expr_add(struct hmap *lflow_expr_cache,
> > +               const struct sbrec_logical_flow *lflow,
> > +               struct expr *lflow_expr)
> > +{
> > +    struct lflow_expr *le = xmalloc(sizeof *le);
> > +    le->lflow_uuid = lflow->header_.uuid;
> > +    le->expr = lflow_expr;
> > +    le->lflow_match = xstrdup(lflow->match);
> > +    hmap_insert(lflow_expr_cache, &le->node, uuid_hash(&le->lflow_uuid));
> > +}
> > +
> > +static struct lflow_expr *
> > +lflow_expr_get(struct hmap *lflow_expr_cache,
> > +               const struct sbrec_logical_flow *lflow)
> > +{
> > +    struct lflow_expr *le;
> > +    size_t hash = uuid_hash(&lflow->header_.uuid);
> > +    HMAP_FOR_EACH_WITH_HASH (le, node, hash, lflow_expr_cache) {
> > +        if (!strcmp(lflow->match, le->lflow_match)) {
>
> I think here we should compare lflow uuid first to make sure we found the
> original lflow, and then compare lflow_match to make sure the match didn't
> change. If the lflow uuid matched but match string changed, it means the
> cache is no longer valid and we should delete it from the cache.
> Otherwise, if a lflow in SB is updated on the match field, it will end up
> with unused entries in the cache forever, unless the same lflow's match
> condition changed back.
> In fact, the case that a lflow's match condition changes may never happen
> at all considering northd's logic (correct me if I am wrong), and if this
> is true, then I don't think we need to compare the lflow->match at all, and
> then we don't need the lflow_match in the struct lflow_expr.
> In either case, it seems this code need some change.

I agree with you. Infact, I think matching the lflow uuid is just
sufficient. I confirmed the
ovn-northd logic. When ovn-northd computes a flow and this flow is
changed a bit (like match condition)
from the  flow in the SB DB logical_flow table, ovn-northd deletes the
existing flow in SB DB and adds
a new one.

https://github.com/ovn-org/ovn/blob/master/northd/ovn-northd.c#L9471
https://github.com/ovn-org/ovn/blob/master/northd/ovn-northd.c#L3718


>
> > +            return le;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +lflow_expr_delete(struct hmap *lflow_expr_cache, struct lflow_expr *le)
> > +{
> > +    hmap_remove(lflow_expr_cache, &le->node);
> > +    free(le->lflow_match);
> > +    expr_destroy(le->expr);
> > +    free(le);
> > +}
> > +
> > +void
> > +lflow_expr_destroy(struct hmap *lflow_expr_cache)
> > +{
> > +    struct lflow_expr *le;
> > +    HMAP_FOR_EACH_POP (le, node, lflow_expr_cache) {
> > +        free(le->lflow_match);
>
> I didn't see le->expr being destroyed. It seems memory leak here?

Nice catch. Thanks.
>
> > +        free(le);
> > +    }
>
> It may be better to destroy the hmap in this function instead of calling
> hmap_destroy separately by the caller.

Ack. Will do.
>
> > +}
> > +
> >  /* Adds the logical flows from the Logical_Flow table to flow tables. */
> >  static void
> >  add_logical_flows(
> > @@ -273,7 +328,8 @@ add_logical_flows(
> >      struct ovn_extend_table *group_table,
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *lfrr,
> > -    uint32_t *conj_id_ofs)
> > +    uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache)
> >  {
> >      const struct sbrec_logical_flow *lflow;
> >
> > @@ -308,7 +364,8 @@ add_logical_flows(
> >                                     addr_sets, port_groups,
> >                                     active_tunnels, local_lport_ids,
> >                                     flow_table, group_table, meter_table,
> > -                                   lfrr, conj_id_ofs)) {
> > +                                   lfrr, conj_id_ofs, lflow_expr_cache,
> > +                                   false)) {
> >              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
> 5);
> >              VLOG_ERR_RL(&rl, "Conjunction id overflow when processing
> lflow "
> >                          UUID_FMT, UUID_ARGS(&lflow->header_.uuid));
> > @@ -338,7 +395,8 @@ lflow_handle_changed_flows(
> >      struct ovn_extend_table *group_table,
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *lfrr,
> > -    uint32_t *conj_id_ofs)
> > +    uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache)
> >  {
> >      bool ret = true;
> >      const struct sbrec_logical_flow *lflow;
> > @@ -373,6 +431,12 @@ lflow_handle_changed_flows(
> >              ofctrl_remove_flows(flow_table, &lflow->header_.uuid);
> >              /* Delete entries from lflow resource reference. */
> >              lflow_resource_destroy_lflow(lfrr, &lflow->header_.uuid);
> > +
> > +            /* Delete the expr cache for this lflow. */
> > +            struct lflow_expr *le = lflow_expr_get(lflow_expr_cache,
> lflow);
> > +            if (le) {
> > +                lflow_expr_delete(lflow_expr_cache, le);
> > +            }
> >          }
> >      }
> >
> > @@ -401,7 +465,8 @@ lflow_handle_changed_flows(
> >                                         addr_sets, port_groups,
> >                                         active_tunnels, local_lport_ids,
> >                                         flow_table, group_table,
> meter_table,
> > -                                       lfrr, conj_id_ofs)) {
> > +                                       lfrr, conj_id_ofs,
> lflow_expr_cache,
> > +                                       false)) {
>
> Here I think "delete_expr_from_cache" should be true, because we know the
> lflow is either being update or new.
> If a lflow's match string is updated, this code would leave the old
> lflow_expr in the hash table forever, because lflow_expr_get() would not
> find it at all.
> Since we are handling only the changed lflows (incremental processing)
> here, the performance gain of reusing the cached the expr is not important
> in this case, so I think it is better just use "true".

Agree. Also whenever a logical_flow table change happens, either a row
is added or deleted, it is
never updated, so it can't hit the cache.


>
> >                  ret = false;
> >                  break;
> >              }
> > @@ -434,6 +499,7 @@ lflow_handle_changed_ref(
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *lfrr,
> >      uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache,
> >      bool *changed)
> >  {
> >      struct ref_lflow_node *rlfn =
> ref_lflow_lookup(&lfrr->ref_lflow_table,
> > @@ -505,7 +571,8 @@ lflow_handle_changed_ref(
> >                                     addr_sets, port_groups,
> >                                     active_tunnels, local_lport_ids,
> >                                     flow_table, group_table, meter_table,
> > -                                   lfrr, conj_id_ofs)) {
> > +                                   lfrr, conj_id_ofs, lflow_expr_cache,
> > +                                   true)) {
> >              ret = false;
> >              break;
> >          }
> > @@ -555,7 +622,9 @@ consider_logical_flow(
> >      struct ovn_extend_table *group_table,
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *lfrr,
> > -    uint32_t *conj_id_ofs)
> > +    uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache,
> > +    bool delete_expr_from_cache)
> >  {
> >      /* Determine translation of logical table IDs to physical table IDs.
> */
> >      bool ingress = !strcmp(lflow->pipeline, "ingress");
> > @@ -613,59 +682,77 @@ consider_logical_flow(
> >
> >      /* Translate OVN match into table of OpenFlow matches. */
> >      struct hmap matches;
> > -    struct expr *expr;
> > +    struct expr *expr = NULL;
> >
> >      struct sset addr_sets_ref = SSET_INITIALIZER(&addr_sets_ref);
> >      struct sset port_groups_ref = SSET_INITIALIZER(&port_groups_ref);
> > -    expr = expr_parse_string(lflow->match, &symtab, addr_sets,
> port_groups,
> > -                             &addr_sets_ref, &port_groups_ref, &error);
> > -    const char *addr_set_name;
> > -    SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> > -        lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> > -                           &lflow->header_.uuid);
> > -    }
> > -    const char *port_group_name;
> > -    SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> > -        lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> > -                           &lflow->header_.uuid);
> > -    }
> > -    sset_destroy(&addr_sets_ref);
> > -    sset_destroy(&port_groups_ref);
> > -
> > -    if (!error) {
> > -        if (prereqs) {
> > -            expr = expr_combine(EXPR_T_AND, expr, prereqs);
> > -            prereqs = NULL;
> > +    struct lflow_expr *le = lflow_expr_get(lflow_expr_cache, lflow);
> > +    if (le) {
> > +        if (delete_expr_from_cache) {
> > +            lflow_expr_delete(lflow_expr_cache, le);
> > +        } else {
> > +            expr = le->expr;
> >          }
> > -        expr = expr_annotate(expr, &symtab, &error);
> >      }
> > -    if (error) {
> > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > -        VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> > -                     lflow->match, error);
> > -        expr_destroy(prereqs);
> > -        free(error);
> > -        ovnacts_free(ovnacts.data, ovnacts.size);
> > -        ofpbuf_uninit(&ovnacts);
> > -        return true;
> > +
> > +    if (!expr) {
> > +        expr = expr_parse_string(lflow->match, &symtab, addr_sets,
> port_groups,
> > +                                &addr_sets_ref, &port_groups_ref,
> &error);
> > +        const char *addr_set_name;
> > +        SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> > +            lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> > +                            &lflow->header_.uuid);
> > +        }
> > +        const char *port_group_name;
> > +        SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> > +            lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> > +                            &lflow->header_.uuid);
> > +        }
> > +        sset_destroy(&addr_sets_ref);
> > +        sset_destroy(&port_groups_ref);
> > +
> > +        if (!error) {
> > +            if (prereqs) {
> > +                expr = expr_combine(EXPR_T_AND, expr, prereqs);
> > +                prereqs = NULL;
> > +            }
> > +            expr = expr_annotate(expr, &symtab, &error);
> > +        }
> > +        if (error) {
> > +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
> 1);
> > +            VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> > +                        lflow->match, error);
> > +            expr_destroy(prereqs);
> > +            free(error);
> > +            ovnacts_free(ovnacts.data, ovnacts.size);
> > +            ofpbuf_uninit(&ovnacts);
> > +            return true;
> > +        }
> > +
> > +        expr = expr_simplify(expr);
> > +        expr = expr_normalize(expr);
> > +
> > +        lflow_expr_add(lflow_expr_cache, lflow, expr);
> >      }
> >
> > -    struct lookup_port_aux aux = {
> > -        .sbrec_multicast_group_by_name_datapath
> > -            = sbrec_multicast_group_by_name_datapath,
> > -        .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> > -        .dp = lflow->logical_datapath
> > -    };
> >      struct condition_aux cond_aux = {
> >          .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> >          .chassis = chassis,
> >          .active_tunnels = active_tunnels,
> >      };
> > -    expr = expr_simplify(expr);
> > -    expr = expr_normalize(expr);
> > +
> > +    expr = expr_clone(expr);
> >      expr = expr_evaluate_condition(expr, is_chassis_resident_cb,
> &cond_aux);
> > +
> > +    struct lookup_port_aux aux = {
> > +        .sbrec_multicast_group_by_name_datapath
> > +            = sbrec_multicast_group_by_name_datapath,
> > +        .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> > +        .dp = lflow->logical_datapath
> > +    };
> >      uint32_t n_conjs = expr_to_matches(expr, lookup_port_cb, &aux,
> >                                         &matches);
> > +
> >      expr_destroy(expr);
> >
> >      if (hmap_is_empty(&matches)) {
> > @@ -907,7 +994,8 @@ lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> >            struct ovn_extend_table *group_table,
> >            struct ovn_extend_table *meter_table,
> >            struct lflow_resource_ref *lfrr,
> > -          uint32_t *conj_id_ofs)
> > +          uint32_t *conj_id_ofs,
> > +          struct hmap *lflow_expr_cache)
> >  {
> >      COVERAGE_INC(lflow_run);
> >
> > @@ -916,7 +1004,7 @@ lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> >                        dhcpv6_options_table, logical_flow_table,
> >                        local_datapaths, chassis, addr_sets, port_groups,
> >                        active_tunnels, local_lport_ids, flow_table,
> group_table,
> > -                      meter_table, lfrr, conj_id_ofs);
> > +                      meter_table, lfrr, conj_id_ofs, lflow_expr_cache);
> >      add_neighbor_flows(sbrec_port_binding_by_name, mac_binding_table,
> >                         flow_table);
> >  }
> > diff --git a/controller/lflow.h b/controller/lflow.h
> > index abdc55e96..a2fa087f8 100644
> > --- a/controller/lflow.h
> > +++ b/controller/lflow.h
> > @@ -132,7 +132,8 @@ void lflow_run(struct ovsdb_idl_index
> *sbrec_multicast_group_by_name_datapath,
> >                 struct ovn_extend_table *group_table,
> >                 struct ovn_extend_table *meter_table,
> >                 struct lflow_resource_ref *,
> > -               uint32_t *conj_id_ofs);
> > +               uint32_t *conj_id_ofs,
> > +               struct hmap *lflow_expr_cache);
> >
> >  bool lflow_handle_changed_flows(
> >      struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
> > @@ -150,7 +151,8 @@ bool lflow_handle_changed_flows(
> >      struct ovn_extend_table *group_table,
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *,
> > -    uint32_t *conj_id_ofs);
> > +    uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache);
> >
> >  bool lflow_handle_changed_ref(
> >      enum ref_type,
> > @@ -171,6 +173,7 @@ bool lflow_handle_changed_ref(
> >      struct ovn_extend_table *meter_table,
> >      struct lflow_resource_ref *,
> >      uint32_t *conj_id_ofs,
> > +    struct hmap *lflow_expr_cache,
> >      bool *changed);
> >
> >  void lflow_handle_changed_neighbors(
> > @@ -180,4 +183,6 @@ void lflow_handle_changed_neighbors(
> >
> >  void lflow_destroy(void);
> >
> > +void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> > +
> >  #endif /* controller/lflow.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 17744d416..ca073438f 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1211,6 +1211,9 @@ struct ed_type_flow_output {
> >      uint32_t conj_id_ofs;
> >      /* lflow resource cross reference */
> >      struct lflow_resource_ref lflow_resource_ref;
> > +
> > +    /* Cache of lflow expr tree. */
> > +    struct hmap lflow_expr_cache;
> >  };
> >
> >  static void *
> > @@ -1224,6 +1227,7 @@ en_flow_output_init(struct engine_node *node
> OVS_UNUSED,
> >      ovn_extend_table_init(&data->meter_table);
> >      data->conj_id_ofs = 1;
> >      lflow_resource_init(&data->lflow_resource_ref);
> > +    hmap_init(&data->lflow_expr_cache);
> >      return data;
> >  }
> >
> > @@ -1235,6 +1239,8 @@ en_flow_output_cleanup(void *data)
> >      ovn_extend_table_destroy(&flow_output_data->group_table);
> >      ovn_extend_table_destroy(&flow_output_data->meter_table);
> >      lflow_resource_destroy(&flow_output_data->lflow_resource_ref);
> > +    lflow_expr_destroy(&flow_output_data->lflow_expr_cache);
> > +    hmap_destroy(&flow_output_data->lflow_expr_cache);
> >  }
> >
> >  static void
> > @@ -1335,7 +1341,8 @@ en_flow_output_run(struct engine_node *node, void
> *data)
> >                chassis, local_datapaths, addr_sets,
> >                port_groups, active_tunnels, local_lport_ids,
> >                flow_table, group_table, meter_table, lfrr,
> > -              conj_id_ofs);
> > +              conj_id_ofs,
> > +              &fo->lflow_expr_cache);
> >
> >      struct sbrec_multicast_group_table *multicast_group_table =
> >          (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
> > @@ -1436,7 +1443,7 @@ flow_output_sb_logical_flow_handler(struct
> engine_node *node, void *data)
> >                local_datapaths, chassis, addr_sets,
> >                port_groups, active_tunnels, local_lport_ids,
> >                flow_table, group_table, meter_table, lfrr,
> > -              conj_id_ofs);
> > +              conj_id_ofs, &fo->lflow_expr_cache);
> >
> >      engine_set_node_state(node, EN_UPDATED);
> >      return handled;
> > @@ -1721,7 +1728,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> >                      local_datapaths, chassis, addr_sets,
> >                      port_groups, active_tunnels, local_lport_ids,
> >                      flow_table, group_table, meter_table, lfrr,
> > -                    conj_id_ofs, &changed)) {
> > +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> >              return false;
> >          }
> >          if (changed) {
> > @@ -1736,7 +1743,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> >                      local_datapaths, chassis, addr_sets,
> >                      port_groups, active_tunnels, local_lport_ids,
> >                      flow_table, group_table, meter_table, lfrr,
> > -                    conj_id_ofs, &changed)) {
> > +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> >              return false;
> >          }
> >          if (changed) {
> > @@ -1751,7 +1758,7 @@ _flow_output_resource_ref_handler(struct
> engine_node *node, void *data,
> >                      local_datapaths, chassis, addr_sets,
> >                      port_groups, active_tunnels, local_lport_ids,
> >                      flow_table, group_table, meter_table, lfrr,
> > -                    conj_id_ofs, &changed)) {
> > +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
> >              return false;
> >          }
> >          if (changed) {
> > --
> > 2.24.1
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list