[ovs-dev] [PATCH ovn 2/2] ovn-controller: Cache logical flow expr tree for each lflow.

Han Zhou zhouhan at gmail.com
Tue Jan 21 08:32:53 UTC 2020


On Thu, Jan 9, 2020 at 9:37 AM <numans at ovn.org> wrote:
>
> From: Numan Siddique <numans at ovn.org>
>
> This patch caches the logical flow expr tree for each logical flow. This
> cache is stored as an hashmap in the output_flow engine data. When a
logical
> flow is deleted, the expr tree for that lflow is deleted in the
> flow_output_sb_logical_flow_handler().
>
> Below are the details of the OVN resource in my setup
>
> No of logical switches - 49
> No of logical ports    - 1191
> No of logical routers  - 7
> No of logical ports    - 51
> No of ACLs             - 1221
> No of Logical flows    - 664736
>
> On a chassis hosting 7 distributed router ports and around 1090
> port bindings, the no of OVS rules was 921162.
>
> Without this patch, the function add_logical_flows() took ~15 seconds.
> And with this patch it took ~10 seconds. There is a good 34% improvement
> in logical flow processing.

Great improvement! Thanks Numan. Please see my comments below.

>
> Signed-off-by: Numan Siddique <numans at ovn.org>
> ---
>  controller/lflow.c          | 182 ++++++++++++++++++++++++++----------
>  controller/lflow.h          |   9 +-
>  controller/ovn-controller.c |  17 +++-
>  3 files changed, 154 insertions(+), 54 deletions(-)
>
> diff --git a/controller/lflow.c b/controller/lflow.c
> index 93ec53a1c..be46f0661 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -79,7 +79,9 @@ static bool consider_logical_flow(
>      struct ovn_extend_table *group_table,
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *lfrr,
> -    uint32_t *conj_id_ofs);
> +    uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache,
> +    bool delete_expr_from_cache);

It was a convention that output args are at the end of the list (Ben took a
lot of effort to make it this way when we were implementing incremental
processing), so it would be better to move the last one
"delete_expr_from_cache" to the position before ovn_desired_flow_table.

>
>  static bool
>  lookup_port_cb(const void *aux_, const char *port_name, unsigned int
*portp)
> @@ -255,6 +257,59 @@ lflow_resource_destroy_lflow(struct
lflow_resource_ref *lfrr,
>      free(lfrn);
>  }
>
> +struct lflow_expr {
> +    struct hmap_node node;
> +    struct uuid lflow_uuid; /* key */
> +    struct expr *expr;
> +    char *lflow_match;
> +};
> +
> +static void
> +lflow_expr_add(struct hmap *lflow_expr_cache,
> +               const struct sbrec_logical_flow *lflow,
> +               struct expr *lflow_expr)
> +{
> +    struct lflow_expr *le = xmalloc(sizeof *le);
> +    le->lflow_uuid = lflow->header_.uuid;
> +    le->expr = lflow_expr;
> +    le->lflow_match = xstrdup(lflow->match);
> +    hmap_insert(lflow_expr_cache, &le->node, uuid_hash(&le->lflow_uuid));
> +}
> +
> +static struct lflow_expr *
> +lflow_expr_get(struct hmap *lflow_expr_cache,
> +               const struct sbrec_logical_flow *lflow)
> +{
> +    struct lflow_expr *le;
> +    size_t hash = uuid_hash(&lflow->header_.uuid);
> +    HMAP_FOR_EACH_WITH_HASH (le, node, hash, lflow_expr_cache) {
> +        if (!strcmp(lflow->match, le->lflow_match)) {

I think here we should compare lflow uuid first to make sure we found the
original lflow, and then compare lflow_match to make sure the match didn't
change. If the lflow uuid matched but match string changed, it means the
cache is no longer valid and we should delete it from the cache.
Otherwise, if a lflow in SB is updated on the match field, it will end up
with unused entries in the cache forever, unless the same lflow's match
condition changed back.
In fact, the case that a lflow's match condition changes may never happen
at all considering northd's logic (correct me if I am wrong), and if this
is true, then I don't think we need to compare the lflow->match at all, and
then we don't need the lflow_match in the struct lflow_expr.
In either case, it seems this code need some change.

> +            return le;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +lflow_expr_delete(struct hmap *lflow_expr_cache, struct lflow_expr *le)
> +{
> +    hmap_remove(lflow_expr_cache, &le->node);
> +    free(le->lflow_match);
> +    expr_destroy(le->expr);
> +    free(le);
> +}
> +
> +void
> +lflow_expr_destroy(struct hmap *lflow_expr_cache)
> +{
> +    struct lflow_expr *le;
> +    HMAP_FOR_EACH_POP (le, node, lflow_expr_cache) {
> +        free(le->lflow_match);

I didn't see le->expr being destroyed. It seems memory leak here?

> +        free(le);
> +    }

It may be better to destroy the hmap in this function instead of calling
hmap_destroy separately by the caller.

> +}
> +
>  /* Adds the logical flows from the Logical_Flow table to flow tables. */
>  static void
>  add_logical_flows(
> @@ -273,7 +328,8 @@ add_logical_flows(
>      struct ovn_extend_table *group_table,
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *lfrr,
> -    uint32_t *conj_id_ofs)
> +    uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache)
>  {
>      const struct sbrec_logical_flow *lflow;
>
> @@ -308,7 +364,8 @@ add_logical_flows(
>                                     addr_sets, port_groups,
>                                     active_tunnels, local_lport_ids,
>                                     flow_table, group_table, meter_table,
> -                                   lfrr, conj_id_ofs)) {
> +                                   lfrr, conj_id_ofs, lflow_expr_cache,
> +                                   false)) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
5);
>              VLOG_ERR_RL(&rl, "Conjunction id overflow when processing
lflow "
>                          UUID_FMT, UUID_ARGS(&lflow->header_.uuid));
> @@ -338,7 +395,8 @@ lflow_handle_changed_flows(
>      struct ovn_extend_table *group_table,
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *lfrr,
> -    uint32_t *conj_id_ofs)
> +    uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache)
>  {
>      bool ret = true;
>      const struct sbrec_logical_flow *lflow;
> @@ -373,6 +431,12 @@ lflow_handle_changed_flows(
>              ofctrl_remove_flows(flow_table, &lflow->header_.uuid);
>              /* Delete entries from lflow resource reference. */
>              lflow_resource_destroy_lflow(lfrr, &lflow->header_.uuid);
> +
> +            /* Delete the expr cache for this lflow. */
> +            struct lflow_expr *le = lflow_expr_get(lflow_expr_cache,
lflow);
> +            if (le) {
> +                lflow_expr_delete(lflow_expr_cache, le);
> +            }
>          }
>      }
>
> @@ -401,7 +465,8 @@ lflow_handle_changed_flows(
>                                         addr_sets, port_groups,
>                                         active_tunnels, local_lport_ids,
>                                         flow_table, group_table,
meter_table,
> -                                       lfrr, conj_id_ofs)) {
> +                                       lfrr, conj_id_ofs,
lflow_expr_cache,
> +                                       false)) {

Here I think "delete_expr_from_cache" should be true, because we know the
lflow is either being update or new.
If a lflow's match string is updated, this code would leave the old
lflow_expr in the hash table forever, because lflow_expr_get() would not
find it at all.
Since we are handling only the changed lflows (incremental processing)
here, the performance gain of reusing the cached the expr is not important
in this case, so I think it is better just use "true".

>                  ret = false;
>                  break;
>              }
> @@ -434,6 +499,7 @@ lflow_handle_changed_ref(
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *lfrr,
>      uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache,
>      bool *changed)
>  {
>      struct ref_lflow_node *rlfn =
ref_lflow_lookup(&lfrr->ref_lflow_table,
> @@ -505,7 +571,8 @@ lflow_handle_changed_ref(
>                                     addr_sets, port_groups,
>                                     active_tunnels, local_lport_ids,
>                                     flow_table, group_table, meter_table,
> -                                   lfrr, conj_id_ofs)) {
> +                                   lfrr, conj_id_ofs, lflow_expr_cache,
> +                                   true)) {
>              ret = false;
>              break;
>          }
> @@ -555,7 +622,9 @@ consider_logical_flow(
>      struct ovn_extend_table *group_table,
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *lfrr,
> -    uint32_t *conj_id_ofs)
> +    uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache,
> +    bool delete_expr_from_cache)
>  {
>      /* Determine translation of logical table IDs to physical table IDs.
*/
>      bool ingress = !strcmp(lflow->pipeline, "ingress");
> @@ -613,59 +682,77 @@ consider_logical_flow(
>
>      /* Translate OVN match into table of OpenFlow matches. */
>      struct hmap matches;
> -    struct expr *expr;
> +    struct expr *expr = NULL;
>
>      struct sset addr_sets_ref = SSET_INITIALIZER(&addr_sets_ref);
>      struct sset port_groups_ref = SSET_INITIALIZER(&port_groups_ref);
> -    expr = expr_parse_string(lflow->match, &symtab, addr_sets,
port_groups,
> -                             &addr_sets_ref, &port_groups_ref, &error);
> -    const char *addr_set_name;
> -    SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> -        lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> -                           &lflow->header_.uuid);
> -    }
> -    const char *port_group_name;
> -    SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> -        lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> -                           &lflow->header_.uuid);
> -    }
> -    sset_destroy(&addr_sets_ref);
> -    sset_destroy(&port_groups_ref);
> -
> -    if (!error) {
> -        if (prereqs) {
> -            expr = expr_combine(EXPR_T_AND, expr, prereqs);
> -            prereqs = NULL;
> +    struct lflow_expr *le = lflow_expr_get(lflow_expr_cache, lflow);
> +    if (le) {
> +        if (delete_expr_from_cache) {
> +            lflow_expr_delete(lflow_expr_cache, le);
> +        } else {
> +            expr = le->expr;
>          }
> -        expr = expr_annotate(expr, &symtab, &error);
>      }
> -    if (error) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> -        VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> -                     lflow->match, error);
> -        expr_destroy(prereqs);
> -        free(error);
> -        ovnacts_free(ovnacts.data, ovnacts.size);
> -        ofpbuf_uninit(&ovnacts);
> -        return true;
> +
> +    if (!expr) {
> +        expr = expr_parse_string(lflow->match, &symtab, addr_sets,
port_groups,
> +                                &addr_sets_ref, &port_groups_ref,
&error);
> +        const char *addr_set_name;
> +        SSET_FOR_EACH (addr_set_name, &addr_sets_ref) {
> +            lflow_resource_add(lfrr, REF_TYPE_ADDRSET, addr_set_name,
> +                            &lflow->header_.uuid);
> +        }
> +        const char *port_group_name;
> +        SSET_FOR_EACH (port_group_name, &port_groups_ref) {
> +            lflow_resource_add(lfrr, REF_TYPE_PORTGROUP, port_group_name,
> +                            &lflow->header_.uuid);
> +        }
> +        sset_destroy(&addr_sets_ref);
> +        sset_destroy(&port_groups_ref);
> +
> +        if (!error) {
> +            if (prereqs) {
> +                expr = expr_combine(EXPR_T_AND, expr, prereqs);
> +                prereqs = NULL;
> +            }
> +            expr = expr_annotate(expr, &symtab, &error);
> +        }
> +        if (error) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
1);
> +            VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
> +                        lflow->match, error);
> +            expr_destroy(prereqs);
> +            free(error);
> +            ovnacts_free(ovnacts.data, ovnacts.size);
> +            ofpbuf_uninit(&ovnacts);
> +            return true;
> +        }
> +
> +        expr = expr_simplify(expr);
> +        expr = expr_normalize(expr);
> +
> +        lflow_expr_add(lflow_expr_cache, lflow, expr);
>      }
>
> -    struct lookup_port_aux aux = {
> -        .sbrec_multicast_group_by_name_datapath
> -            = sbrec_multicast_group_by_name_datapath,
> -        .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> -        .dp = lflow->logical_datapath
> -    };
>      struct condition_aux cond_aux = {
>          .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
>          .chassis = chassis,
>          .active_tunnels = active_tunnels,
>      };
> -    expr = expr_simplify(expr);
> -    expr = expr_normalize(expr);
> +
> +    expr = expr_clone(expr);
>      expr = expr_evaluate_condition(expr, is_chassis_resident_cb,
&cond_aux);
> +
> +    struct lookup_port_aux aux = {
> +        .sbrec_multicast_group_by_name_datapath
> +            = sbrec_multicast_group_by_name_datapath,
> +        .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> +        .dp = lflow->logical_datapath
> +    };
>      uint32_t n_conjs = expr_to_matches(expr, lookup_port_cb, &aux,
>                                         &matches);
> +
>      expr_destroy(expr);
>
>      if (hmap_is_empty(&matches)) {
> @@ -907,7 +994,8 @@ lflow_run(struct ovsdb_idl_index
*sbrec_multicast_group_by_name_datapath,
>            struct ovn_extend_table *group_table,
>            struct ovn_extend_table *meter_table,
>            struct lflow_resource_ref *lfrr,
> -          uint32_t *conj_id_ofs)
> +          uint32_t *conj_id_ofs,
> +          struct hmap *lflow_expr_cache)
>  {
>      COVERAGE_INC(lflow_run);
>
> @@ -916,7 +1004,7 @@ lflow_run(struct ovsdb_idl_index
*sbrec_multicast_group_by_name_datapath,
>                        dhcpv6_options_table, logical_flow_table,
>                        local_datapaths, chassis, addr_sets, port_groups,
>                        active_tunnels, local_lport_ids, flow_table,
group_table,
> -                      meter_table, lfrr, conj_id_ofs);
> +                      meter_table, lfrr, conj_id_ofs, lflow_expr_cache);
>      add_neighbor_flows(sbrec_port_binding_by_name, mac_binding_table,
>                         flow_table);
>  }
> diff --git a/controller/lflow.h b/controller/lflow.h
> index abdc55e96..a2fa087f8 100644
> --- a/controller/lflow.h
> +++ b/controller/lflow.h
> @@ -132,7 +132,8 @@ void lflow_run(struct ovsdb_idl_index
*sbrec_multicast_group_by_name_datapath,
>                 struct ovn_extend_table *group_table,
>                 struct ovn_extend_table *meter_table,
>                 struct lflow_resource_ref *,
> -               uint32_t *conj_id_ofs);
> +               uint32_t *conj_id_ofs,
> +               struct hmap *lflow_expr_cache);
>
>  bool lflow_handle_changed_flows(
>      struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
> @@ -150,7 +151,8 @@ bool lflow_handle_changed_flows(
>      struct ovn_extend_table *group_table,
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *,
> -    uint32_t *conj_id_ofs);
> +    uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache);
>
>  bool lflow_handle_changed_ref(
>      enum ref_type,
> @@ -171,6 +173,7 @@ bool lflow_handle_changed_ref(
>      struct ovn_extend_table *meter_table,
>      struct lflow_resource_ref *,
>      uint32_t *conj_id_ofs,
> +    struct hmap *lflow_expr_cache,
>      bool *changed);
>
>  void lflow_handle_changed_neighbors(
> @@ -180,4 +183,6 @@ void lflow_handle_changed_neighbors(
>
>  void lflow_destroy(void);
>
> +void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> +
>  #endif /* controller/lflow.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 17744d416..ca073438f 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1211,6 +1211,9 @@ struct ed_type_flow_output {
>      uint32_t conj_id_ofs;
>      /* lflow resource cross reference */
>      struct lflow_resource_ref lflow_resource_ref;
> +
> +    /* Cache of lflow expr tree. */
> +    struct hmap lflow_expr_cache;
>  };
>
>  static void *
> @@ -1224,6 +1227,7 @@ en_flow_output_init(struct engine_node *node
OVS_UNUSED,
>      ovn_extend_table_init(&data->meter_table);
>      data->conj_id_ofs = 1;
>      lflow_resource_init(&data->lflow_resource_ref);
> +    hmap_init(&data->lflow_expr_cache);
>      return data;
>  }
>
> @@ -1235,6 +1239,8 @@ en_flow_output_cleanup(void *data)
>      ovn_extend_table_destroy(&flow_output_data->group_table);
>      ovn_extend_table_destroy(&flow_output_data->meter_table);
>      lflow_resource_destroy(&flow_output_data->lflow_resource_ref);
> +    lflow_expr_destroy(&flow_output_data->lflow_expr_cache);
> +    hmap_destroy(&flow_output_data->lflow_expr_cache);
>  }
>
>  static void
> @@ -1335,7 +1341,8 @@ en_flow_output_run(struct engine_node *node, void
*data)
>                chassis, local_datapaths, addr_sets,
>                port_groups, active_tunnels, local_lport_ids,
>                flow_table, group_table, meter_table, lfrr,
> -              conj_id_ofs);
> +              conj_id_ofs,
> +              &fo->lflow_expr_cache);
>
>      struct sbrec_multicast_group_table *multicast_group_table =
>          (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
> @@ -1436,7 +1443,7 @@ flow_output_sb_logical_flow_handler(struct
engine_node *node, void *data)
>                local_datapaths, chassis, addr_sets,
>                port_groups, active_tunnels, local_lport_ids,
>                flow_table, group_table, meter_table, lfrr,
> -              conj_id_ofs);
> +              conj_id_ofs, &fo->lflow_expr_cache);
>
>      engine_set_node_state(node, EN_UPDATED);
>      return handled;
> @@ -1721,7 +1728,7 @@ _flow_output_resource_ref_handler(struct
engine_node *node, void *data,
>                      local_datapaths, chassis, addr_sets,
>                      port_groups, active_tunnels, local_lport_ids,
>                      flow_table, group_table, meter_table, lfrr,
> -                    conj_id_ofs, &changed)) {
> +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
>              return false;
>          }
>          if (changed) {
> @@ -1736,7 +1743,7 @@ _flow_output_resource_ref_handler(struct
engine_node *node, void *data,
>                      local_datapaths, chassis, addr_sets,
>                      port_groups, active_tunnels, local_lport_ids,
>                      flow_table, group_table, meter_table, lfrr,
> -                    conj_id_ofs, &changed)) {
> +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
>              return false;
>          }
>          if (changed) {
> @@ -1751,7 +1758,7 @@ _flow_output_resource_ref_handler(struct
engine_node *node, void *data,
>                      local_datapaths, chassis, addr_sets,
>                      port_groups, active_tunnels, local_lport_ids,
>                      flow_table, group_table, meter_table, lfrr,
> -                    conj_id_ofs, &changed)) {
> +                    conj_id_ofs, &fo->lflow_expr_cache, &changed)) {
>              return false;
>          }
>          if (changed) {
> --
> 2.24.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list