[ovs-dev] [PATCH] json: Optimize string serialization.

Numan Siddique numans at ovn.org
Thu Aug 26 02:31:03 UTC 2021


On Tue, Aug 24, 2021 at 5:07 PM Ilya Maximets <i.maximets at ovn.org> wrote:
>
> Current string serialization code puts all characters one by one.
> This is slow because dynamic string needs to perform length checks
> on every ds_put_char() and it's also doesn't allow compiler to use
> better memory copy operations, i.e. doesn't allow copying few bytes
> at once.
>
> Special symbols are rare in a typical database.  Quotes are frequent,
> but not too frequent.  In databases created by ovn-kubernetes, for
> example, usually there are at least 10 to 50 chars between quotes.
> So, it's better to count characters that doesn't require escaping
> and use fast data copy for the whole sequential block.
>
> Testing with a synthetic benchmark (included) on my laptop shows
> following performance improvement:
>
>    Size      Q  S       Before       After       Diff
>  -----------------------------------------------------
>  100000      0  0 :    0.227 ms     0.142 ms   -37.4 %
>  100000      2  1 :    0.277 ms     0.186 ms   -32.8 %
>  100000      10 1 :    0.361 ms     0.309 ms   -14.4 %
>  10000000    0  0 :   22.720 ms    12.160 ms   -46.4 %
>  10000000    2  1 :   27.470 ms    19.300 ms   -29.7 %
>  10000000    10 1 :   37.950 ms    31.250 ms   -17.6 %
>  100000000   0  0 :  239.600 ms   126.700 ms   -47.1 %
>  100000000   2  1 :  292.400 ms   188.600 ms   -35.4 %
>  100000000   10 1 :  387.700 ms   321.200 ms   -17.1 %
>
> Here Q - probability (%) for a character to be a '\"' and
> S - probability (%) to be a special character ( < 32).
>
> Testing with a closer to real world scenario shows overall decrease
> of the time needed for database compaction by ~5-10 %.  And this
> change also decreases CPU consumption in general, because string
> serialization is used in many different places including ovsdb
> monitors and raft.
>
> Signed-off-by: Ilya Maximets <i.maximets at ovn.org>

Hi Ilya,

The patch LGTM.  It makes sense to optimize this way.

I've one comment below.

You can consider my Ack

Acked-by: Numan Siddique <numans at ovn.org>

Thanks
Numan

> ---
>  lib/json.c        | 23 +++++++++++++---
>  tests/test-json.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 88 insertions(+), 3 deletions(-)
>
> diff --git a/lib/json.c b/lib/json.c
> index 32d25003b..b2c845171 100644
> --- a/lib/json.c
> +++ b/lib/json.c
> @@ -1696,14 +1696,31 @@ json_serialize_string(const char *string, struct ds *ds)
>  {
>      uint8_t c;
>      uint8_t c2;
> +    int count;
>      const char *escape;
> +    const char *start;
>
>      ds_put_char(ds, '"');
> +    count = 0;
> +    start = string;
>      while ((c = *string++) != '\0') {
> -        escape = chars_escaping[c];
> -        while ((c2 = *escape++) != '\0') {
> -            ds_put_char(ds, c2);
> +        if (c >= ' ' && c != '\"' && c != '\\') {
> +            count++;
> +            continue;
> +        } else {
> +            if (count) {
> +                ds_put_buffer(ds, start, count);
> +                count = 0;
> +            }
> +            start = string;
> +            escape = chars_escaping[c];
> +            while ((c2 = *escape++) != '\0') {
> +                ds_put_char(ds, c2);
> +            }

Instead of the above while() {}, why can't we do -  ds_put_buffer(ds,
escape, strlen(escape));

Thanks
Numan

>          }
>      }
> +    if (count) {
> +        ds_put_buffer(ds, start, count);
> +    }
>      ds_put_char(ds, '"');
>  }
> diff --git a/tests/test-json.c b/tests/test-json.c
> index a7ee595e0..60be82bc8 100644
> --- a/tests/test-json.c
> +++ b/tests/test-json.c
> @@ -22,6 +22,8 @@
>  #include <getopt.h>
>  #include <stdio.h>
>  #include "ovstest.h"
> +#include "random.h"
> +#include "timeval.h"
>  #include "util.h"
>
>  /* --pretty: If set, the JSON output is pretty-printed, instead of printed as
> @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[])
>  }
>
>  OVSTEST_REGISTER("test-json", test_json_main);
> +
> +static void
> +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
> +{
> +    struct {
> +        int n;
> +        int quote_probablility;
> +        int special_probability;
> +        int iter;
> +    } configs[] = {
> +        { 100000,     0, 0, 1000, },
> +        { 100000,     2, 1, 1000, },
> +        { 100000,    10, 1, 1000, },
> +        { 10000000,   0, 0, 100,  },
> +        { 10000000,   2, 1, 100,  },
> +        { 10000000,  10, 1, 100,  },
> +        { 100000000,  0, 0, 10.   },
> +        { 100000000,  2, 1, 10,   },
> +        { 100000000, 10, 1, 10,   },
> +    };
> +
> +    printf("  SIZE      Q  S            TIME\n");
> +    printf("--------------------------------------\n");
> +
> +    for (int i = 0; i < ARRAY_SIZE(configs); i++) {
> +        int iter = configs[i].iter;
> +        int n = configs[i].n;
> +        char *str = xzalloc(n);
> +
> +        for (int j = 0; j < n - 1; j++) {
> +            int r = random_range(100);
> +
> +            if (r < configs[i].special_probability) {
> +                str[j] = random_range(' ' - 1) + 1;
> +            } else if (r < (configs[i].special_probability
> +                            + configs[i].quote_probablility)) {
> +                str[j] = '\"';
> +            } else {
> +                str[j] = random_range(256 - ' ') + ' ';
> +            }
> +        }
> +
> +        printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility,
> +                                       configs[i].special_probability);
> +        fflush(stdout);
> +
> +        struct json *json = json_string_create_nocopy(str);
> +        uint64_t start = time_msec();
> +
> +        char **res = xzalloc(iter * sizeof *res);
> +        for (int j = 0; j < iter; j++ ) {
> +            res[j] = json_to_string(json, 0);
> +        }
> +
> +        printf("%16.3lf ms\n", (double) (time_msec() - start) / iter);
> +        json_destroy(json);
> +        for (int j = 0; j < iter; j++ ) {
> +            free(res[j]);
> +        }
> +        free(res);
> +    }
> +
> +    exit(0);
> +}
> +
> +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main);
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list