[ovs-dev] [PATCH ovn] lflow-cache: Auto trim when cache size is reduced significantly.
Mark Gray
mark.d.gray at redhat.com
Fri Jun 11 14:48:03 UTC 2021
On 11/06/2021 10:59, Dumitru Ceara wrote:
> Thanks for the review!
>
> On 6/11/21 11:22 AM, Mark Gray wrote:
>> On 04/06/2021 11:00, Dumitru Ceara wrote:
>>> Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
>>> honored. The lflow cache is one of the largest memory consumers in
>>> ovn-controller and it used to trim memory whenever the cache was
>>> flushed. However, that required periodic intervention from the CMS
>>> side.
>>>
>>> Instead, we now automatically trim memory every time the lflow cache
>>> utilization decreases by 50%, with a threshold of at least
>>> LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.
>>>
>>> [0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827
>>>
>>> Reported-at: https://bugzilla.redhat.com/1967882
>>> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
>>> ---
>>> controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
>>> tests/ovn-lflow-cache.at | 76 ++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 119 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
>>> index 56ddf1075..11935e7ae 100644
>>> --- a/controller/lflow-cache.c
>>> +++ b/controller/lflow-cache.c
>>> @@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
>>> COVERAGE_DEFINE(lflow_cache_full);
>>> COVERAGE_DEFINE(lflow_cache_mem_full);
>>> COVERAGE_DEFINE(lflow_cache_made_room);
>>> +COVERAGE_DEFINE(lflow_cache_trim);
>>>
>>> static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
>>> [LCACHE_T_CONJ_ID] = "cache-conj-id",
>>> @@ -49,6 +50,8 @@ static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
>>>
>>> struct lflow_cache {
>>> struct hmap entries[LCACHE_T_MAX];
>>> + uint32_t n_entries;
>>> + uint32_t high_watermark;
>>> uint32_t capacity;
>>> uint64_t mem_usage;
>>> uint64_t max_mem_usage;
>>> @@ -63,7 +66,8 @@ struct lflow_cache_entry {
>>> struct lflow_cache_value value;
>>> };
>>>
>>> -static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
>>> +#define LFLOW_CACHE_TRIM_LIMIT 10000
>>
>> This seems to be an arbitrary value? Was there a reason for selecting
>> this value? Perhaps it could be a runtime tunable parameter? wdyt?
>
> This is an arbitrary value indeed :)
>
> The reason I chose 10K was that it seemed high enough to allow low-scale
> deployments without any penalty due to memory trimming and still low
> enough to enable trimming on most mid/high scale deployments.
>
> I can make it tunable through an OVS external-id but I guess in that
> case we might as well make the watermark tunable too. What do you think?
Do you mean the percentage at which we flush? This might be a good idea
as well.
>
>>> +
>>> static bool lflow_cache_make_room__(struct lflow_cache *lc,
>>> enum lflow_cache_type type);
>>> static struct lflow_cache_value *lflow_cache_add__(
>>> @@ -71,18 +75,18 @@ static struct lflow_cache_value *lflow_cache_add__(
>>> enum lflow_cache_type type, uint64_t value_size);
>>> static void lflow_cache_delete__(struct lflow_cache *lc,
>>> struct lflow_cache_entry *lce);
>>> +static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
>>>
>>> struct lflow_cache *
>>> lflow_cache_create(void)
>>> {
>>> - struct lflow_cache *lc = xmalloc(sizeof *lc);
>>> + struct lflow_cache *lc = xzalloc(sizeof *lc);
>>>
>>> for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> hmap_init(&lc->entries[i]);
>>> }
>>>
>>> lc->enabled = true;
>>> - lc->mem_usage = 0;
>>> return lc;
>>> }
>>>
>>> @@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
>>> HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
>>> lflow_cache_delete__(lc, lce);
>>> }
>>> - hmap_shrink(&lc->entries[i]);
>>> }
>>> -
>>> -#if HAVE_DECL_MALLOC_TRIM
>>> - malloc_trim(0);
>>> -#endif
>>> + lflow_cache_trim__(lc, true);
>>> }
>>>
>>> void
>>> @@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
>>> uint64_t max_mem_usage = max_mem_usage_kb * 1024;
>>>
>>> if ((lc->enabled && !enabled)
>>> - || capacity < lflow_cache_n_entries__(lc)
>>> + || capacity < lc->n_entries
>>> || max_mem_usage < lc->mem_usage) {
>>> lflow_cache_flush(lc);
>>> }
>>> @@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache *lc, struct ds *output)
>>>
>>> ds_put_format(output, "Enabled: %s\n",
>>> lflow_cache_is_enabled(lc) ? "true" : "false");
>>> + ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
>>> + lc->high_watermark);
>>> + ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc->n_entries);
>>> for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
>>> lflow_cache_type_names[i],
>>> @@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc, const struct uuid *lflow_uuid)
>>> COVERAGE_INC(lflow_cache_delete);
>>> lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct lflow_cache_entry,
>>> value));
>>> + lflow_cache_trim__(lc, false);
>>
>> Should we have any concerns about the impact of doing this more
>> frequently now? Before, we only trimmed on flush? However, I do think
>> the impact will be reduced due to the watermarking.
>
> That was the idea, to minimize impact thanks to the watermarking. IMO
> reducing the memory usage is more critical than the limited impact we
> have due to trimming. Also, as we scale down, trimming happens more
> often but its impact is also lower and lower.
>
>>> }
>>> }
>>>
>>> -static size_t
>>> -lflow_cache_n_entries__(const struct lflow_cache *lc)
>>> -{
>>> - size_t n_entries = 0;
>>> -
>>> - for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> - n_entries += hmap_count(&lc->entries[i]);
>>> - }
>>> - return n_entries;
>>> -}
>>> -
>>> static bool
>>> lflow_cache_make_room__(struct lflow_cache *lc, enum lflow_cache_type type)
>>> {
>>> @@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>>> return NULL;
>>> }
>>>
>>> - if (lflow_cache_n_entries__(lc) == lc->capacity) {
>>> + if (lc->n_entries == lc->capacity) {
>>> if (!lflow_cache_make_room__(lc, type)) {
>>> COVERAGE_INC(lflow_cache_full);
>>> return NULL;
>>> @@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
>>> lce->size = size;
>>> lce->value.type = type;
>>> hmap_insert(&lc->entries[type], &lce->node, uuid_hash(lflow_uuid));
>>> + lc->n_entries++;
>>> + lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
>>> return &lce->value;
>>> }
>>>
>>> static void
>>> lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
>>> {
>>> - if (!lce) {
>>> - return;
>>> - }
>>> -
>>> + ovs_assert(lc->n_entries > 0);
>>> hmap_remove(&lc->entries[lce->value.type], &lce->node);
>>> + lc->n_entries--;
>>> switch (lce->value.type) {
>>> case LCACHE_T_NONE:
>>> OVS_NOT_REACHED();
>>> @@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
>>> lc->mem_usage -= lce->size;
>>> free(lce);
>>> }
>>> +
>>> +static void
>>> +lflow_cache_trim__(struct lflow_cache *lc, bool force)
>>> +{
>>> + /* Trim if we had at least 'TRIM_LIMIT' elements at some point and if the
>>> + * current usage is less than half of 'high_watermark'.
>>> + */
>>> + ovs_assert(lc->high_watermark >= lc->n_entries);
>>> + if (!force
>>> + && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
>>> + || lc->n_entries > lc->high_watermark / 2)) {
>>> + return;
>>> + }
>>> +
>>> + COVERAGE_INC(lflow_cache_trim);
>>> + for (size_t i = 0; i < LCACHE_T_MAX; i++) {
>>> + hmap_shrink(&lc->entries[i]);
>>> + }
>>> +
>>> +#if HAVE_DECL_MALLOC_TRIM
>>> + malloc_trim(0);
>>> +#endif
>>> +
>>> + lc->high_watermark = lc->n_entries;
>>> +}
>>> diff --git a/tests/ovn-lflow-cache.at b/tests/ovn-lflow-cache.at
>>> index e5e9ed1e8..df483c9d7 100644
>>> --- a/tests/ovn-lflow-cache.at
>>> +++ b/tests/ovn-lflow-cache.at
>>> @@ -12,6 +12,8 @@ AT_CHECK(
>>> add matches 3 | grep -v 'Mem usage (KB)'],
>>> [0], [dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -21,6 +23,8 @@ LOOKUP:
>>> conj_id_ofs: 1
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -30,6 +34,8 @@ LOOKUP:
>>> conj_id_ofs: 2
>>> type: expr
>>> Enabled: true
>>> +high-watermark : 2
>>> +total : 2
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 0
>>> @@ -39,6 +45,8 @@ LOOKUP:
>>> conj_id_ofs: 0
>>> type: matches
>>> Enabled: true
>>> +high-watermark : 3
>>> +total : 3
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 1
>>> @@ -54,6 +62,8 @@ AT_CHECK(
>>> add-del matches 3 | grep -v 'Mem usage (KB)'],
>>> [0], [dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -66,6 +76,8 @@ DELETE
>>> LOOKUP:
>>> not found
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -78,6 +90,8 @@ DELETE
>>> LOOKUP:
>>> not found
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -90,6 +104,8 @@ DELETE
>>> LOOKUP:
>>> not found
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -105,6 +121,8 @@ AT_CHECK(
>>> add matches 3 | grep -v 'Mem usage (KB)'],
>>> [0], [dnl
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -113,6 +131,8 @@ ADD conj-id:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -121,6 +141,8 @@ ADD expr:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -129,6 +151,8 @@ ADD matches:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -153,6 +177,8 @@ AT_CHECK(
>>> flush | grep -v 'Mem usage (KB)'],
>>> [0], [dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -162,6 +188,8 @@ LOOKUP:
>>> conj_id_ofs: 1
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -171,6 +199,8 @@ LOOKUP:
>>> conj_id_ofs: 2
>>> type: expr
>>> Enabled: true
>>> +high-watermark : 2
>>> +total : 2
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 0
>>> @@ -180,11 +210,15 @@ LOOKUP:
>>> conj_id_ofs: 0
>>> type: matches
>>> Enabled: true
>>> +high-watermark : 3
>>> +total : 3
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 1
>>> DISABLE
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -193,6 +227,8 @@ ADD conj-id:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -201,6 +237,8 @@ ADD expr:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -209,11 +247,15 @@ ADD matches:
>>> LOOKUP:
>>> not found
>>> Enabled: false
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> ENABLE
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -223,6 +265,8 @@ LOOKUP:
>>> conj_id_ofs: 7
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -232,6 +276,8 @@ LOOKUP:
>>> conj_id_ofs: 8
>>> type: expr
>>> Enabled: true
>>> +high-watermark : 2
>>> +total : 2
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 0
>>> @@ -241,11 +287,15 @@ LOOKUP:
>>> conj_id_ofs: 0
>>> type: matches
>>> Enabled: true
>>> +high-watermark : 3
>>> +total : 3
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 1
>>> FLUSH
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -270,6 +320,8 @@ AT_CHECK(
>>> add matches 10 | grep -v 'Mem usage (KB)'],
>>> [0], [dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -279,6 +331,8 @@ LOOKUP:
>>> conj_id_ofs: 1
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -288,6 +342,8 @@ LOOKUP:
>>> conj_id_ofs: 2
>>> type: expr
>>> Enabled: true
>>> +high-watermark : 2
>>> +total : 2
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 0
>>> @@ -297,6 +353,8 @@ LOOKUP:
>>> conj_id_ofs: 0
>>> type: matches
>>> Enabled: true
>>> +high-watermark : 3
>>> +total : 3
>>> cache-conj-id : 1
>>> cache-expr : 1
>>> cache-matches : 1
>>> @@ -305,6 +363,8 @@ dnl
>>> dnl Max capacity smaller than current usage, cache should be flushed.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -314,6 +374,8 @@ LOOKUP:
>>> conj_id_ofs: 4
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -327,6 +389,8 @@ dnl Cache is full but we can evict the conj-id entry because we're adding
>>> dnl an expr one.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 0
>>> cache-expr : 1
>>> cache-matches : 0
>>> @@ -340,6 +404,8 @@ dnl Cache is full but we can evict the expr entry because we're adding
>>> dnl a matches one.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 1
>>> @@ -352,6 +418,8 @@ dnl Cache is full and we're adding a conj-id entry so we shouldn't evict
>>> dnl anything else.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 1
>>> @@ -361,6 +429,8 @@ dnl Max memory usage smaller than current memory usage, cache should be
>>> dnl flushed.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 0
>>> +total : 0
>>> cache-conj-id : 0
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -370,6 +440,8 @@ LOOKUP:
>>> conj_id_ofs: 8
>>> type: conj-id
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -382,6 +454,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
>>> dnl memory limit so adding should fail.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>> @@ -394,6 +468,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
>>> dnl memory limit so adding should fail.
>>> dnl
>>> Enabled: true
>>> +high-watermark : 1
>>> +total : 1
>>> cache-conj-id : 1
>>> cache-expr : 0
>>> cache-matches : 0
>>>
>>
>> You could add a test for the LIMIT?
>>
>
> I'm not sure I follow, what test did you have in mind here?
Sorry, I should have been clearer. If we delete an entry when the
watermark is above the LFLOW_CACHE_TRIM_LIMIT we should trim and the
watermark. We could then check the coverage metric. Maybe we should
check trim coverage metrics in all these tests?
>
> Thanks,
> Dumitru
>
More information about the dev
mailing list