[ovs-dev] [PATCH ovn] lflow-cache: Auto trim when cache size is reduced significantly.

Fri Jun 4 14:57:04 UTC 2021

On 6/4/21 4:34 PM, Dan Williams wrote:
> On Fri, 2021-06-04 at 12:00 +0200, Dumitru Ceara wrote:
>> Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
>> honored.  The lflow cache is one of the largest memory consumers in
>> ovn-controller and it used to trim memory whenever the cache was
>> flushed.  However, that required periodic intervention from the CMS
>> side.
>>
>> Instead, we now automatically trim memory every time the lflow cache
>> utilization decreases by 50%, with a threshold of at least
>> LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.
>>
>> [0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827
>>
>> Reported-at: https://bugzilla.redhat.com/1967882
>> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
>> ---
>>  controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
>>  tests/ovn-lflow-cache.at | 76
>> ++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 119 insertions(+), 25 deletions(-)
>>
>> diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
>> index 56ddf1075..11935e7ae 100644
>> --- a/controller/lflow-cache.c
>> +++ b/controller/lflow-cache.c
>> @@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
>>  COVERAGE_DEFINE(lflow_cache_full);
>>  COVERAGE_DEFINE(lflow_cache_mem_full);
>>  COVERAGE_DEFINE(lflow_cache_made_room);
>> +COVERAGE_DEFINE(lflow_cache_trim);
>>  
>>  static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
>>      [LCACHE_T_CONJ_ID] = "cache-conj-id",
>> @@ -49,6 +50,8 @@ static const char
>> *lflow_cache_type_names[LCACHE_T_MAX] = {
>>  
>>  struct lflow_cache {
>>      struct hmap entries[LCACHE_T_MAX];
>> +    uint32_t n_entries;
>> +    uint32_t high_watermark;
>>      uint32_t capacity;
>>      uint64_t mem_usage;
>>      uint64_t max_mem_usage;
>> @@ -63,7 +66,8 @@ struct lflow_cache_entry {
>>      struct lflow_cache_value value;
>>  };
>>  
>> -static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
>> +#define LFLOW_CACHE_TRIM_LIMIT 10000
>> +
>>  static bool lflow_cache_make_room__(struct lflow_cache *lc,
>>                                      enum lflow_cache_type type);
>>  static struct lflow_cache_value *lflow_cache_add__(
>> @@ -71,18 +75,18 @@ static struct lflow_cache_value
>> *lflow_cache_add__(
>>      enum lflow_cache_type type, uint64_t value_size);
>>  static void lflow_cache_delete__(struct lflow_cache *lc,
>>                                   struct lflow_cache_entry *lce);
>> +static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
>>  
>>  struct lflow_cache *
>>  lflow_cache_create(void)
>>  {
>> -    struct lflow_cache *lc = xmalloc(sizeof *lc);
>> +    struct lflow_cache *lc = xzalloc(sizeof *lc);
>>  
>>      for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>          hmap_init(&lc->entries[i]);
>>      }
>>  
>>      lc->enabled = true;
>> -    lc->mem_usage = 0;
>>      return lc;
>>  }
>>  
>> @@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
>>          HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
>>              lflow_cache_delete__(lc, lce);
>>          }
>> -        hmap_shrink(&lc->entries[i]);
>>      }
>> -
>> -#if HAVE_DECL_MALLOC_TRIM
>> -    malloc_trim(0);
>> -#endif
>> +    lflow_cache_trim__(lc, true);
>>  }
>>  
>>  void
>> @@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool
>> enabled, uint32_t capacity,
>>      uint64_t max_mem_usage = max_mem_usage_kb * 1024;
>>  
>>      if ((lc->enabled && !enabled)
>> -            || capacity < lflow_cache_n_entries__(lc)
>> +            || capacity < lc->n_entries
>>              || max_mem_usage < lc->mem_usage) {
>>          lflow_cache_flush(lc);
>>      }
>> @@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache
>> *lc, struct ds *output)
>>  
>>      ds_put_format(output, "Enabled: %s\n",
>>                    lflow_cache_is_enabled(lc) ? "true" : "false");
>> +    ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
>> +                  lc->high_watermark);
>> +    ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc-
>>> n_entries);
>>      for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>          ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
>>                        lflow_cache_type_names[i],
>> @@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc,
>> const struct uuid *lflow_uuid)
>>          COVERAGE_INC(lflow_cache_delete);
>>          lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct
>> lflow_cache_entry,
>>                                                value));
>> +        lflow_cache_trim__(lc, false);
>>      }
>>  }
>>  
>> -static size_t
>> -lflow_cache_n_entries__(const struct lflow_cache *lc)
>> -{
>> -    size_t n_entries = 0;
>> -
>> -    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>> -        n_entries += hmap_count(&lc->entries[i]);
>> -    }
>> -    return n_entries;
>> -}
>> -
>>  static bool
>>  lflow_cache_make_room__(struct lflow_cache *lc, enum
>> lflow_cache_type type)
>>  {
>> @@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const
>> struct uuid *lflow_uuid,
>>          return NULL;
>>      }
>>  
>> -    if (lflow_cache_n_entries__(lc) == lc->capacity) {
>> +    if (lc->n_entries == lc->capacity) {
>>          if (!lflow_cache_make_room__(lc, type)) {
>>              COVERAGE_INC(lflow_cache_full);
>>              return NULL;
>> @@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const
>> struct uuid *lflow_uuid,
>>      lce->size = size;
>>      lce->value.type = type;
>>      hmap_insert(&lc->entries[type], &lce->node,
>> uuid_hash(lflow_uuid));
>> +    lc->n_entries++;
>> +    lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
>>      return &lce->value;
>>  }
>>  
>>  static void
>>  lflow_cache_delete__(struct lflow_cache *lc, struct
>> lflow_cache_entry *lce)
>>  {
>> -    if (!lce) {
>> -        return;
>> -    }
>> -
>> +    ovs_assert(lc->n_entries > 0);
>>      hmap_remove(&lc->entries[lce->value.type], &lce->node);
>> +    lc->n_entries--;
>>      switch (lce->value.type) {
>>      case LCACHE_T_NONE:
>>          OVS_NOT_REACHED();
>> @@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc,
>> struct lflow_cache_entry *lce)
>>      lc->mem_usage -= lce->size;
>>      free(lce);
>>  }
>> +
>> +static void
>> +lflow_cache_trim__(struct lflow_cache *lc, bool force)
>> +{
>> +    /* Trim if we had at least 'TRIM_LIMIT' elements at some point
>> and if the
>> +     * current usage is less than half of 'high_watermark'.
>> +     */
>> +    ovs_assert(lc->high_watermark >= lc->n_entries);
>> +    if (!force
>> +            && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
>> +                || lc->n_entries > lc->high_watermark / 2)) {
> 
> Did you mean "lc->n_entries < lc->high_watermark / 2" here? Maybe I
> mis-understood though.

No, if 'force' is false we only want to trim if we are below 50% of the
last high watermark.  Otherwise return early without trimming.

> 
> Dan
> 

Thanks,
Dumitru