[ovs-dev] [PATCH ovn] lflow-cache: Automatically trim cache when it becomes inactive.

Numan Siddique numans at ovn.org
Tue Nov 30 16:54:19 UTC 2021


On Tue, Nov 23, 2021 at 2:49 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> By default perform memory trimming 30 seconds after the lflow cache
> utilization has dropped.  Otherwise, idle ovn-controller processes might
> wastefully fail to return unused memory to the system.  The timeout is
> configurable through the 'ovn-trim-timeout-ms' external_id.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2024768
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>

Thanks for adding this support.  The patch and approach LGTM.

I just have one question.  Please see below.

Thanks
Numan

> ---
>  NEWS                            |  3 ++
>  controller/chassis.c            | 16 ++++++++
>  controller/lflow-cache.c        | 67 ++++++++++++++++++++++++++++++++-
>  controller/lflow-cache.h        |  5 ++-
>  controller/ovn-controller.8.xml |  8 ++++
>  controller/ovn-controller.c     |  9 ++++-
>  controller/test-lflow-cache.c   | 13 +++++--
>  7 files changed, 114 insertions(+), 7 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index e27aaad06..9880a678e 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -12,6 +12,9 @@ Post v21.09.0
>      running on SmartNIC control plane CPUs.  Please refer to
>      Documentation/topics/vif-plug-providers/vif-plug-providers.rst for more
>      information.
> +  - ovn-controller: Add configuration knob, through OVS external-id
> +    "ovn-trim-timeout-ms" to allow specifiying an lflow cache inactivity
> +    timeout after which ovn-controller should trim memory.
>
>  OVN v21.09.0 - 01 Oct 2021
>  --------------------------
> diff --git a/controller/chassis.c b/controller/chassis.c
> index c11c10a34..8a1559653 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -54,6 +54,7 @@ struct ovs_chassis_cfg {
>      const char *memlimit_lflow_cache;
>      const char *trim_limit_lflow_cache;
>      const char *trim_wmark_perc_lflow_cache;
> +    const char *trim_timeout_ms;
>
>      /* Set of encap types parsed from the 'ovn-encap-type' external-id. */
>      struct sset encap_type_set;
> @@ -163,6 +164,12 @@ get_trim_wmark_perc_lflow_cache(const struct smap *ext_ids)
>      return smap_get_def(ext_ids, "ovn-trim-wmark-perc-lflow-cache", "");
>  }
>
> +static const char *
> +get_trim_timeout(const struct smap *ext_ids)
> +{
> +    return smap_get_def(ext_ids, "ovn-trim-timeout-ms", "");
> +}
> +
>  static const char *
>  get_encap_csum(const struct smap *ext_ids)
>  {
> @@ -292,6 +299,7 @@ chassis_parse_ovs_config(const struct ovsrec_open_vswitch_table *ovs_table,
>          get_trim_limit_lflow_cache(&cfg->external_ids);
>      ovs_cfg->trim_wmark_perc_lflow_cache =
>          get_trim_wmark_perc_lflow_cache(&cfg->external_ids);
> +    ovs_cfg->trim_timeout_ms = get_trim_timeout(&cfg->external_ids);
>
>      if (!chassis_parse_ovs_encap_type(encap_type, &ovs_cfg->encap_type_set)) {
>          return false;
> @@ -336,6 +344,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg *ovs_cfg,
>                   ovs_cfg->trim_limit_lflow_cache);
>      smap_replace(config, "ovn-trim-wmark-perc-lflow-cache",
>                   ovs_cfg->trim_wmark_perc_lflow_cache);
> +    smap_replace(config, "ovn-trim-timeout-ms", ovs_cfg->trim_timeout_ms);
>      smap_replace(config, "iface-types", ds_cstr_ro(&ovs_cfg->iface_types));
>      smap_replace(config, "ovn-chassis-mac-mappings", ovs_cfg->chassis_macs);
>      smap_replace(config, "is-interconn",
> @@ -415,6 +424,13 @@ chassis_other_config_changed(const struct ovs_chassis_cfg *ovs_cfg,
>          return true;
>      }
>
> +    const char *chassis_trim_timeout_ms =
> +        get_trim_timeout(&chassis_rec->other_config);
> +
> +    if (strcmp(ovs_cfg->trim_timeout_ms, chassis_trim_timeout_ms)) {
> +        return true;
> +    }
> +
>      const char *chassis_mac_mappings =
>          get_chassis_mac_mappings(&chassis_rec->other_config);
>      if (strcmp(ovs_cfg->chassis_macs, chassis_mac_mappings)) {
> diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
> index 6db85fc12..9c3db06e7 100644
> --- a/controller/lflow-cache.c
> +++ b/controller/lflow-cache.c
> @@ -24,8 +24,10 @@
>  #include "coverage.h"
>  #include "lflow-cache.h"
>  #include "lib/uuid.h"
> +#include "openvswitch/poll-loop.h"
>  #include "openvswitch/vlog.h"
>  #include "ovn/expr.h"
> +#include "timeval.h"
>
>  VLOG_DEFINE_THIS_MODULE(lflow_cache);
>
> @@ -59,7 +61,10 @@ struct lflow_cache {
>      uint64_t max_mem_usage;
>      uint32_t trim_limit;
>      uint32_t trim_wmark_perc;
> +    uint32_t trim_timeout_ms;
>      uint64_t trim_count;
> +    long long int last_active_ms;
> +    bool recently_active;
>      bool enabled;
>  };
>
> @@ -79,6 +84,7 @@ static struct lflow_cache_value *lflow_cache_add__(
>  static void lflow_cache_delete__(struct lflow_cache *lc,
>                                   struct lflow_cache_entry *lce);
>  static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
> +static void lflow_cache_record_activity__(struct lflow_cache *lc);
>
>  struct lflow_cache *
>  lflow_cache_create(void)
> @@ -128,7 +134,7 @@ lflow_cache_destroy(struct lflow_cache *lc)
>  void
>  lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
>                     uint64_t max_mem_usage_kb, uint32_t lflow_trim_limit,
> -                   uint32_t trim_wmark_perc)
> +                   uint32_t trim_wmark_perc, uint32_t trim_timeout_ms)
>  {
>      if (!lc) {
>          return;
> @@ -141,6 +147,13 @@ lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
>          trim_wmark_perc = 100;
>      }
>
> +    if (trim_timeout_ms < 1000) {
> +        VLOG_WARN_RL(&rl, "Invalid requested trim timeout: "
> +                     "requested %"PRIu32" ms, using 1000 ms instead",
> +                     trim_timeout_ms);
> +        trim_timeout_ms = 1000;
> +    }
> +
>      uint64_t max_mem_usage = max_mem_usage_kb * 1024;
>      bool need_flush = false;
>      bool need_trim = false;
> @@ -160,10 +173,13 @@ lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
>      lc->max_mem_usage = max_mem_usage;
>      lc->trim_limit = lflow_trim_limit;
>      lc->trim_wmark_perc = trim_wmark_perc;
> +    lc->trim_timeout_ms = trim_timeout_ms;
>
>      if (need_flush) {
> +        lflow_cache_record_activity__(lc);
>          lflow_cache_flush(lc);
>      } else if (need_trim) {
> +        lflow_cache_record_activity__(lc);
>          lflow_cache_trim__(lc, false);
>      }
>  }
> @@ -271,6 +287,7 @@ lflow_cache_delete(struct lflow_cache *lc, const struct uuid *lflow_uuid)
>          lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct lflow_cache_entry,
>                                                value));
>          lflow_cache_trim__(lc, false);
> +        lflow_cache_record_activity__(lc);
>      }
>  }
>
> @@ -308,6 +325,46 @@ lflow_cache_get_memory_usage(const struct lflow_cache *lc, struct simap *usage)
>                     ROUND_UP(lc->mem_usage, 1024) / 1024);
>  }
>
> +void
> +lflow_cache_run(struct lflow_cache *lc)
> +{
> +    if (!lc->recently_active) {
> +        return;
> +    }
> +
> +    long long int now = time_msec();
> +    if (now < lc->last_active_ms || now < lc->trim_timeout_ms) {
> +        VLOG_WARN_RL(&rl, "Detected cache last active timestamp overflow");
> +        lc->recently_active = false;
> +        lflow_cache_trim__(lc, true);
> +        return;
> +    }
> +
> +    if (now < lc->trim_timeout_ms) {
> +        VLOG_WARN_RL(&rl, "Detected very large trim timeout: "
> +                     "now %lld ms timeout %"PRIu32" ms",
> +                     now, lc->trim_timeout_ms);
> +        return;
> +    }
> +
> +    if (now - lc->trim_timeout_ms >= lc->last_active_ms) {
> +        VLOG_INFO_RL(&rl, "Detected cache inactivity "
> +                    "(last active %lld ms ago): trimming cache",
> +                    now - lc->last_active_ms);
> +        lflow_cache_trim__(lc, true);
> +        lc->recently_active = false;
> +    }
> +}
> +
> +void
> +lflow_cache_wait(struct lflow_cache *lc)
> +{
> +    if (!lc->recently_active) {
> +        return;
> +    }
> +    poll_timer_wait_until(lc->last_active_ms + lc->trim_timeout_ms);
> +}
> +
>  static struct lflow_cache_value *
>  lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>                    enum lflow_cache_type type, uint64_t value_size)
> @@ -332,6 +389,7 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>          }
>      }
>
> +    lflow_cache_record_activity__(lc);

Do we need to record activity when a cache entry is added ?   If a
logical flow expr is cached,
we probably don't need to trim the memory right as we probably are not
freeing any memory in cache?
It may be sufficient to trim the memory when an entry is deleted.

Although it's possible that processing a logical flow and adding it to
the cache may not free any memory in cache,
but ovn-controller / IDL code may have freed the memory elsewhere.  So
there could be benefits in trimming.
I'm just wondering if it would be an overkill ? What do you think ?

Thanks
Numan

>      lc->mem_usage += size;
>
>      COVERAGE_INC(lflow_cache_add);
> @@ -397,3 +455,10 @@ lflow_cache_trim__(struct lflow_cache *lc, bool force)
>      lc->high_watermark = lc->n_entries;
>      lc->trim_count++;
>  }
> +
> +static void
> +lflow_cache_record_activity__(struct lflow_cache *lc)
> +{
> +    lc->last_active_ms = time_msec();
> +    lc->recently_active = true;
> +}
> diff --git a/controller/lflow-cache.h b/controller/lflow-cache.h
> index faad32bb6..300a56077 100644
> --- a/controller/lflow-cache.h
> +++ b/controller/lflow-cache.h
> @@ -61,7 +61,7 @@ void lflow_cache_flush(struct lflow_cache *);
>  void lflow_cache_destroy(struct lflow_cache *);
>  void lflow_cache_enable(struct lflow_cache *, bool enabled, uint32_t capacity,
>                          uint64_t max_mem_usage_kb, uint32_t lflow_trim_limit,
> -                        uint32_t trim_wmark_perc);
> +                        uint32_t trim_wmark_perc, uint32_t trim_timeout_ms);
>  bool lflow_cache_is_enabled(const struct lflow_cache *);
>  void lflow_cache_get_stats(const struct lflow_cache *, struct ds *output);
>
> @@ -79,4 +79,7 @@ void lflow_cache_delete(struct lflow_cache *, const struct uuid *lflow_uuid);
>  void lflow_cache_get_memory_usage(const struct lflow_cache *,
>                                    struct simap *usage);
>
> +void lflow_cache_run(struct lflow_cache *);
> +void lflow_cache_wait(struct lflow_cache *);
> +
>  #endif /* controller/lflow-cache.h */
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 37c7efb5b..e9708fe64 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -296,6 +296,14 @@
>          than half of the last measured high watermark.  By default this is set
>          to 50.
>        </dd>
> +      <dt><code>external_ids:ovn-trim-timeout-ms</code></dt>
> +      <dd>
> +        When used, this configuration value specifies the time, in
> +        milliseconds, since the last logical flow cache operation after
> +        which <code>ovn-controller</code> performs memory trimming regardless
> +        of how many entries there are in the cache.  By default this is set to
> +        30000 (30 seconds).
> +      </dd>
>      </dl>
>
>      <p>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 29c1a05b2..e61c32484 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -128,6 +128,7 @@ static const char *ssl_ca_cert_file;
>  #define DEFAULT_LFLOW_CACHE_MAX_MEM_KB (UINT64_MAX / 1024)
>  #define DEFAULT_LFLOW_CACHE_TRIM_LIMIT 10000
>  #define DEFAULT_LFLOW_CACHE_WMARK_PERC 50
> +#define DEFAULT_LFLOW_CACHE_TRIM_TO_MS 30000
>
>  struct controller_engine_ctx {
>      struct lflow_cache *lflow_cache;
> @@ -576,7 +577,10 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl,
>                                           DEFAULT_LFLOW_CACHE_TRIM_LIMIT),
>                             smap_get_uint(&cfg->external_ids,
>                                           "ovn-trim-wmark-perc-lflow-cache",
> -                                         DEFAULT_LFLOW_CACHE_WMARK_PERC));
> +                                         DEFAULT_LFLOW_CACHE_WMARK_PERC),
> +                           smap_get_uint(&cfg->external_ids,
> +                                         "ovn-trim-timeout-ms",
> +                                         DEFAULT_LFLOW_CACHE_TRIM_TO_MS));
>      }
>  }
>
> @@ -3862,6 +3866,9 @@ main(int argc, char *argv[])
>          ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
>          ovsdb_idl_track_clear(ovs_idl_loop.idl);
>
> +        lflow_cache_run(ctrl_engine_ctx.lflow_cache);
> +        lflow_cache_wait(ctrl_engine_ctx.lflow_cache);
> +
>  loop_done:
>          memory_wait();
>          poll_block();
> diff --git a/controller/test-lflow-cache.c b/controller/test-lflow-cache.c
> index 6f7a3c389..7ce999360 100644
> --- a/controller/test-lflow-cache.c
> +++ b/controller/test-lflow-cache.c
> @@ -32,6 +32,8 @@
>  /* Set memory trimming high watermark percentage to 50% by default. */
>  #define TEST_LFLOW_CACHE_TRIM_WMARK_PERC 50
>
> +#define TEST_LFLOW_CACHE_TRIM_TO_MS 30000
> +
>  static void
>  test_lflow_cache_add__(struct lflow_cache *lc, const char *op_type,
>                         const struct uuid *lflow_uuid,
> @@ -117,7 +119,8 @@ test_lflow_cache_operations(struct ovs_cmdl_context *ctx)
>
>      lflow_cache_enable(lc, enabled, UINT32_MAX, UINT32_MAX,
>                         TEST_LFLOW_CACHE_TRIM_LIMIT,
> -                       TEST_LFLOW_CACHE_TRIM_WMARK_PERC);
> +                       TEST_LFLOW_CACHE_TRIM_WMARK_PERC,
> +                       TEST_LFLOW_CACHE_TRIM_TO_MS);
>      test_lflow_cache_stats__(lc);
>
>      if (!test_read_uint_value(ctx, shift++, "n_ops", &n_ops)) {
> @@ -216,12 +219,13 @@ test_lflow_cache_operations(struct ovs_cmdl_context *ctx)
>              }
>              printf("ENABLE\n");
>              lflow_cache_enable(lc, true, limit, mem_limit_kb, trim_limit,
> -                               trim_wmark_perc);
> +                               trim_wmark_perc, TEST_LFLOW_CACHE_TRIM_TO_MS);
>          } else if (!strcmp(op, "disable")) {
>              printf("DISABLE\n");
>              lflow_cache_enable(lc, false, UINT32_MAX, UINT32_MAX,
>                                 TEST_LFLOW_CACHE_TRIM_LIMIT,
> -                               TEST_LFLOW_CACHE_TRIM_WMARK_PERC);
> +                               TEST_LFLOW_CACHE_TRIM_WMARK_PERC,
> +                               TEST_LFLOW_CACHE_TRIM_TO_MS);
>          } else if (!strcmp(op, "flush")) {
>              printf("FLUSH\n");
>              lflow_cache_flush(lc);
> @@ -243,7 +247,8 @@ test_lflow_cache_negative(struct ovs_cmdl_context *ctx OVS_UNUSED)
>      lflow_cache_destroy(NULL);
>      lflow_cache_enable(NULL, true, UINT32_MAX, UINT32_MAX,
>                         TEST_LFLOW_CACHE_TRIM_LIMIT,
> -                       TEST_LFLOW_CACHE_TRIM_WMARK_PERC);
> +                       TEST_LFLOW_CACHE_TRIM_WMARK_PERC,
> +                       TEST_LFLOW_CACHE_TRIM_TO_MS);
>      ovs_assert(!lflow_cache_is_enabled(NULL));
>
>      struct ds ds = DS_EMPTY_INITIALIZER;
> --
> 2.27.0
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list