[ovs-dev] [PATCH ovn] lflow-cache: Auto trim when cache size is reduced significantly.

Mark Gray mark.d.gray at redhat.com
Fri Jun 11 14:48:03 UTC 2021


On 11/06/2021 10:59, Dumitru Ceara wrote:
> Thanks for the review!
> 
> On 6/11/21 11:22 AM, Mark Gray wrote:
>> On 04/06/2021 11:00, Dumitru Ceara wrote:
>>> Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
>>> honored.  The lflow cache is one of the largest memory consumers in
>>> ovn-controller and it used to trim memory whenever the cache was
>>> flushed.  However, that required periodic intervention from the CMS
>>> side.
>>>
>>> Instead, we now automatically trim memory every time the lflow cache
>>> utilization decreases by 50%, with a threshold of at least
>>> LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.
>>>
>>> [0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827
>>>
>>> Reported-at: https://bugzilla.redhat.com/1967882
>>> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
>>> ---
>>>  controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
>>>  tests/ovn-lflow-cache.at | 76 ++++++++++++++++++++++++++++++++++++++++
>>>  2 files changed, 119 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
>>> index 56ddf1075..11935e7ae 100644
>>> --- a/controller/lflow-cache.c
>>> +++ b/controller/lflow-cache.c
>>> @@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
>>>  COVERAGE_DEFINE(lflow_cache_full);
>>>  COVERAGE_DEFINE(lflow_cache_mem_full);
>>>  COVERAGE_DEFINE(lflow_cache_made_room);
>>> +COVERAGE_DEFINE(lflow_cache_trim);
>>>  
>>>  static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
>>>      [LCACHE_T_CONJ_ID] = "cache-conj-id",
>>> @@ -49,6 +50,8 @@ static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
>>>  
>>>  struct lflow_cache {
>>>      struct hmap entries[LCACHE_T_MAX];
>>> +    uint32_t n_entries;
>>> +    uint32_t high_watermark;
>>>      uint32_t capacity;
>>>      uint64_t mem_usage;
>>>      uint64_t max_mem_usage;
>>> @@ -63,7 +66,8 @@ struct lflow_cache_entry {
>>>      struct lflow_cache_value value;
>>>  };
>>>  
>>> -static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
>>> +#define LFLOW_CACHE_TRIM_LIMIT 10000
>>
>> This seems to be an arbitrary value? Was there a reason for selecting
>> this value? Perhaps it could be a runtime tunable parameter? wdyt?
> 
> This is an arbitrary value indeed :)
> 
> The reason I chose 10K was that it seemed high enough to allow low-scale
> deployments without any penalty due to memory trimming and still low
> enough to enable trimming on most mid/high scale deployments.
> 
> I can make it tunable through an OVS external-id but I guess in that
> case we might as well make the watermark tunable too.  What do you think?

Do you mean the percentage at which we flush? This might be a good idea
as well.
> 
>>> +
>>>  static bool lflow_cache_make_room__(struct lflow_cache *lc,
>>>                                      enum lflow_cache_type type);
>>>  static struct lflow_cache_value *lflow_cache_add__(
>>> @@ -71,18 +75,18 @@ static struct lflow_cache_value *lflow_cache_add__(
>>>      enum lflow_cache_type type, uint64_t value_size);
>>>  static void lflow_cache_delete__(struct lflow_cache *lc,
>>>                                   struct lflow_cache_entry *lce);
>>> +static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
>>>  
>>>  struct lflow_cache *
>>>  lflow_cache_create(void)
>>>  {
>>> -    struct lflow_cache *lc = xmalloc(sizeof *lc);
>>> +    struct lflow_cache *lc = xzalloc(sizeof *lc);
>>>  
>>>      for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>>          hmap_init(&lc->entries[i]);
>>>      }
>>>  
>>>      lc->enabled = true;
>>> -    lc->mem_usage = 0;
>>>      return lc;
>>>  }
>>>  
>>> @@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
>>>          HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
>>>              lflow_cache_delete__(lc, lce);
>>>          }
>>> -        hmap_shrink(&lc->entries[i]);
>>>      }
>>> -
>>> -#if HAVE_DECL_MALLOC_TRIM
>>> -    malloc_trim(0);
>>> -#endif
>>> +    lflow_cache_trim__(lc, true);
>>>  }
>>>  
>>>  void
>>> @@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
>>>      uint64_t max_mem_usage = max_mem_usage_kb * 1024;
>>>  
>>>      if ((lc->enabled && !enabled)
>>> -            || capacity < lflow_cache_n_entries__(lc)
>>> +            || capacity < lc->n_entries
>>>              || max_mem_usage < lc->mem_usage) {
>>>          lflow_cache_flush(lc);
>>>      }
>>> @@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache *lc, struct ds *output)
>>>  
>>>      ds_put_format(output, "Enabled: %s\n",
>>>                    lflow_cache_is_enabled(lc) ? "true" : "false");
>>> +    ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
>>> +                  lc->high_watermark);
>>> +    ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc->n_entries);
>>>      for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>>          ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
>>>                        lflow_cache_type_names[i],
>>> @@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc, const struct uuid *lflow_uuid)
>>>          COVERAGE_INC(lflow_cache_delete);
>>>          lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct lflow_cache_entry,
>>>                                                value));
>>> +        lflow_cache_trim__(lc, false);
>>
>> Should we have any concerns about the impact of doing this more
>> frequently now? Before, we only trimmed on flush? However, I do think
>> the impact will be reduced due to the watermarking.
> 
> That was the idea, to minimize impact thanks to the watermarking.  IMO
> reducing the memory usage is more critical than the limited impact we
> have due to trimming.  Also, as we scale down, trimming happens more
> often but its impact is also lower and lower.
> 
>>>      }
>>>  }
>>>  
>>> -static size_t
>>> -lflow_cache_n_entries__(const struct lflow_cache *lc)
>>> -{
>>> -    size_t n_entries = 0;
>>> -
>>> -    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> -        n_entries += hmap_count(&lc->entries[i]);
>>> -    }
>>> -    return n_entries;
>>> -}
>>> -
>>>  static bool
>>>  lflow_cache_make_room__(struct lflow_cache *lc, enum lflow_cache_type type)
>>>  {
>>> @@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>>>          return NULL;
>>>      }
>>>  
>>> -    if (lflow_cache_n_entries__(lc) == lc->capacity) {
>>> +    if (lc->n_entries == lc->capacity) {
>>>          if (!lflow_cache_make_room__(lc, type)) {
>>>              COVERAGE_INC(lflow_cache_full);
>>>              return NULL;
>>> @@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>>>      lce->size = size;
>>>      lce->value.type = type;
>>>      hmap_insert(&lc->entries[type], &lce->node, uuid_hash(lflow_uuid));
>>> +    lc->n_entries++;
>>> +    lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
>>>      return &lce->value;
>>>  }
>>>  
>>>  static void
>>>  lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
>>>  {
>>> -    if (!lce) {
>>> -        return;
>>> -    }
>>> -
>>> +    ovs_assert(lc->n_entries > 0);
>>>      hmap_remove(&lc->entries[lce->value.type], &lce->node);
>>> +    lc->n_entries--;
>>>      switch (lce->value.type) {
>>>      case LCACHE_T_NONE:
>>>          OVS_NOT_REACHED();
>>> @@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
>>>      lc->mem_usage -= lce->size;
>>>      free(lce);
>>>  }
>>> +
>>> +static void
>>> +lflow_cache_trim__(struct lflow_cache *lc, bool force)
>>> +{
>>> +    /* Trim if we had at least 'TRIM_LIMIT' elements at some point and if the
>>> +     * current usage is less than half of 'high_watermark'.
>>> +     */
>>> +    ovs_assert(lc->high_watermark >= lc->n_entries);
>>> +    if (!force
>>> +            && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
>>> +                || lc->n_entries > lc->high_watermark / 2)) {
>>> +        return;
>>> +    }
>>> +
>>> +    COVERAGE_INC(lflow_cache_trim);
>>> +    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> +        hmap_shrink(&lc->entries[i]);
>>> +    }
>>> +
>>> +#if HAVE_DECL_MALLOC_TRIM
>>> +    malloc_trim(0);
>>> +#endif
>>> +
>>> +    lc->high_watermark = lc->n_entries;
>>> +}
>>> diff --git a/tests/ovn-lflow-cache.at b/tests/ovn-lflow-cache.at
>>> index e5e9ed1e8..df483c9d7 100644
>>> --- a/tests/ovn-lflow-cache.at
>>> +++ b/tests/ovn-lflow-cache.at
>>> @@ -12,6 +12,8 @@ AT_CHECK(
>>>          add matches 3 | grep -v 'Mem usage (KB)'],
>>>      [0], [dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -21,6 +23,8 @@ LOOKUP:
>>>    conj_id_ofs: 1
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -30,6 +34,8 @@ LOOKUP:
>>>    conj_id_ofs: 2
>>>    type: expr
>>>  Enabled: true
>>> +high-watermark  : 2
>>> +total           : 2
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 0
>>> @@ -39,6 +45,8 @@ LOOKUP:
>>>    conj_id_ofs: 0
>>>    type: matches
>>>  Enabled: true
>>> +high-watermark  : 3
>>> +total           : 3
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 1
>>> @@ -54,6 +62,8 @@ AT_CHECK(
>>>          add-del matches 3 | grep -v 'Mem usage (KB)'],
>>>      [0], [dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -66,6 +76,8 @@ DELETE
>>>  LOOKUP:
>>>    not found
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -78,6 +90,8 @@ DELETE
>>>  LOOKUP:
>>>    not found
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -90,6 +104,8 @@ DELETE
>>>  LOOKUP:
>>>    not found
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -105,6 +121,8 @@ AT_CHECK(
>>>          add matches 3 | grep -v 'Mem usage (KB)'],
>>>      [0], [dnl
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -113,6 +131,8 @@ ADD conj-id:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -121,6 +141,8 @@ ADD expr:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -129,6 +151,8 @@ ADD matches:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -153,6 +177,8 @@ AT_CHECK(
>>>          flush | grep -v 'Mem usage (KB)'],
>>>      [0], [dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -162,6 +188,8 @@ LOOKUP:
>>>    conj_id_ofs: 1
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -171,6 +199,8 @@ LOOKUP:
>>>    conj_id_ofs: 2
>>>    type: expr
>>>  Enabled: true
>>> +high-watermark  : 2
>>> +total           : 2
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 0
>>> @@ -180,11 +210,15 @@ LOOKUP:
>>>    conj_id_ofs: 0
>>>    type: matches
>>>  Enabled: true
>>> +high-watermark  : 3
>>> +total           : 3
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 1
>>>  DISABLE
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -193,6 +227,8 @@ ADD conj-id:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -201,6 +237,8 @@ ADD expr:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -209,11 +247,15 @@ ADD matches:
>>>  LOOKUP:
>>>    not found
>>>  Enabled: false
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>>  ENABLE
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -223,6 +265,8 @@ LOOKUP:
>>>    conj_id_ofs: 7
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -232,6 +276,8 @@ LOOKUP:
>>>    conj_id_ofs: 8
>>>    type: expr
>>>  Enabled: true
>>> +high-watermark  : 2
>>> +total           : 2
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 0
>>> @@ -241,11 +287,15 @@ LOOKUP:
>>>    conj_id_ofs: 0
>>>    type: matches
>>>  Enabled: true
>>> +high-watermark  : 3
>>> +total           : 3
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 1
>>>  FLUSH
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -270,6 +320,8 @@ AT_CHECK(
>>>          add matches 10 | grep -v 'Mem usage (KB)'],
>>>      [0], [dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -279,6 +331,8 @@ LOOKUP:
>>>    conj_id_ofs: 1
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -288,6 +342,8 @@ LOOKUP:
>>>    conj_id_ofs: 2
>>>    type: expr
>>>  Enabled: true
>>> +high-watermark  : 2
>>> +total           : 2
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 0
>>> @@ -297,6 +353,8 @@ LOOKUP:
>>>    conj_id_ofs: 0
>>>    type: matches
>>>  Enabled: true
>>> +high-watermark  : 3
>>> +total           : 3
>>>  cache-conj-id   : 1
>>>  cache-expr      : 1
>>>  cache-matches   : 1
>>> @@ -305,6 +363,8 @@ dnl
>>>  dnl Max capacity smaller than current usage, cache should be flushed.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -314,6 +374,8 @@ LOOKUP:
>>>    conj_id_ofs: 4
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -327,6 +389,8 @@ dnl Cache is full but we can evict the conj-id entry because we're adding
>>>  dnl an expr one.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 0
>>>  cache-expr      : 1
>>>  cache-matches   : 0
>>> @@ -340,6 +404,8 @@ dnl Cache is full but we can evict the expr entry because we're adding
>>>  dnl a matches one.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 1
>>> @@ -352,6 +418,8 @@ dnl Cache is full and we're adding a conj-id entry so we shouldn't evict
>>>  dnl anything else.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 1
>>> @@ -361,6 +429,8 @@ dnl Max memory usage smaller than current memory usage, cache should be
>>>  dnl flushed.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 0
>>> +total           : 0
>>>  cache-conj-id   : 0
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -370,6 +440,8 @@ LOOKUP:
>>>    conj_id_ofs: 8
>>>    type: conj-id
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -382,6 +454,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
>>>  dnl memory limit so adding should fail.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>> @@ -394,6 +468,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
>>>  dnl memory limit so adding should fail.
>>>  dnl
>>>  Enabled: true
>>> +high-watermark  : 1
>>> +total           : 1
>>>  cache-conj-id   : 1
>>>  cache-expr      : 0
>>>  cache-matches   : 0
>>>
>>
>> You could add a test for the LIMIT?
>>
> 
> I'm not sure I follow, what test did you have in mind here?

Sorry, I should have been clearer. If we delete an entry when the
watermark is above the LFLOW_CACHE_TRIM_LIMIT we should trim and the
watermark. We could then check the coverage metric. Maybe we should
check trim coverage metrics in all these tests?
> 
> Thanks,
> Dumitru
> 



More information about the dev mailing list