[ovs-dev] [PATCH 1/1] stream_ssl: fix important memory leak in ssl_connect() function

Damijan Skvarc damjan.skvarc at gmail.com
Mon Sep 16 07:19:50 UTC 2019


Hi William and thanks for your review.
I agree with your proposed changes.
Is there still some action expected from me?

thanks, Damijan



On Thu, Sep 12, 2019 at 8:00 PM William Tu <u9012063 at gmail.com> wrote:

> On Fri, Jul 26, 2019 at 10:11:03AM +0200, Damijan Skvarc wrote:
> >
> > While checking valgrind reports after running "make check-valgrind" I
> have noticed
> > reports for several tests similar to the following:
> >
> > ....
> > ==5345== Memcheck, a memory error detector
> > ==5345== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
> > ==5345== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright
> info
> > ==5345== Command: ovsdb-client
> --private-key=/home/damijan.skvarc/doma/ovs/tests/testpki-privkey.pem
> --certificate=/home/damijan.skvarc/doma/ovs/tests/testpki-cert.pem
> --ca-cert=/home/damijan.skvarc/doma/ovs/tests/testpki-cacert.pem transact
> ssl:127.0.0.1:40111 \ \ \ ["ordinals",
> > ==5345== \ \ \ \ \ \ {"op":\ "update",
> > ==5345== \ \ \ \ \ \ \ "table":\ "ordinals",
> > ==5345== \ \ \ \ \ \ \ "where":\ [["number",\ "==",\ 1]],
> > ==5345== \ \ \ \ \ \ \ "row":\ {"number":\ 2,\ "name":\ "old\ two"}},
> > ==5345== \ \ \ \ \ \ {"op":\ "update",
> > ==5345== \ \ \ \ \ \ \ "table":\ "ordinals",
> > ==5345== \ \ \ \ \ \ \ "where":\ [["name",\ "==",\ "two"]],
> > ==5345== \ \ \ \ \ \ \ "row":\ {"number":\ 1,\ "name":\ "old\ one"}}]
> > ==5345== Parent PID: 5344
> > ==5345==
> > ==5345==
> > ==5345== HEAP SUMMARY:
> > ==5345==     in use at exit: 116,551 bytes in 3,341 blocks
> > ==5345==   total heap usage: 5,134 allocs, 1,793 frees, 412,290 bytes
> allocated
> > ==5345==
> > ==5345== 6,221 (184 direct, 6,037 indirect) bytes in 1 blocks are
> definitely lost in loss record 498 of 500
> > ==5345==    at 0x4C2DB8F: malloc (in
> /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==5345==    by 0x5105E77: CRYPTO_malloc (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5345==    by 0x51E1D23: ??? (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5345==    by 0x51E4861: ??? (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5345==    by 0x51E5414: ASN1_item_ex_d2i (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5345==    by 0x51E546A: ASN1_item_d2i (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5345==    by 0x4E56B27: ??? (in /lib/x86_64-linux-gnu/libssl.so.1.0.0)
> > ==5345==    by 0x4E5BA11: ??? (in /lib/x86_64-linux-gnu/libssl.so.1.0.0)
> > ==5345==    by 0x4E65145: ??? (in /lib/x86_64-linux-gnu/libssl.so.1.0.0)
> > ==5345==    by 0x4522DF: ssl_connect (stream-ssl.c:530)
> > ==5345==    by 0x443D38: scs_connecting (stream.c:315)
> > ==5345==    by 0x443D38: stream_connect (stream.c:338)
> > ==5345==    by 0x443FA1: stream_open_block (stream.c:266)
> > ==5345==    by 0x40AB79: open_jsonrpc (ovsdb-client.c:507)
> > ==5345==    by 0x40AB79: open_rpc (ovsdb-client.c:143)
> > ==5345==    by 0x40B06B: do_transact__ (ovsdb-client.c:871)
> > ==5345==    by 0x40B245: do_transact (ovsdb-client.c:893)
> > ==5345==    by 0x405F76: main (ovsdb-client.c:282)
> > ==5345==
> > ==5345== LEAK SUMMARY:
> > ==5345==    definitely lost: 184 bytes in 1 blocks
> > ==5345==    indirectly lost: 6,037 bytes in 117 blocks
> > ==5345==      possibly lost: 0 bytes in 0 blocks
> > ==5345==    still reachable: 110,330 bytes in 3,223 blocks
> > ==5345==         suppressed: 0 bytes in 0 blocks
> > ==5345== Reachable blocks (those to which a pointer was found) are not
> shown.
> > ==5345== To see them, rerun with: --leak-check=full --show-leak-kinds=all
> > ==5345==
> > ==5345== For counts of detected and suppressed errors, rerun with: -v
> > ==5345== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
> > ....
> >
> >
> > This report was extracted from "index uniqueness checking" test and
> complains about
> > leaking memory in ovsdb-client application. The problem is not huge,
> since ovsdb-client
> > is CLI tool which is constantly reinvoked/restarted, thus leaked memory
> is not accumulated.
> >
> > More problematic issue is that for the same test valgrind reports the
> similar problem also for
> > ovsdb-server:
> >
> > ....
> > ==5290== Memcheck, a memory error detector
> > ==5290== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
> > ==5290== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright
> info
> > ==5290== Command: ovsdb-server --log-file --detach --no-chdir --pidfile
> --private-key=/home/damijan.skvarc/doma/ovs/tests/testpki-privkey2.pem
> --certificate=/home/damijan.skvarc/doma/ovs/tests/testpki-cert2.pem
> --ca-cert=/home/damijan.skvarc/doma/ovs/tests/testpki-cacert.pem
> --remote=pssl:0:127.0.0.1 db
> > ==5290== Parent PID: 5289
> > ==5290==
> > ==5292== Warning: noted but unhandled ioctl 0x2403 with no
> size/direction hints.
> > ==5292==    This could cause spurious value errors to appear.
> > ==5292==    See README_MISSING_SYSCALL_OR_IOCTL for guidance on writing
> a proper wrapper.
> > ==5292== Warning: noted but unhandled ioctl 0x2400 with no
> size/direction hints.
> > ==5292==    This could cause spurious value errors to appear.
> > ==5292==    See README_MISSING_SYSCALL_OR_IOCTL for guidance on writing
> a proper wrapper.
> > ==5290==
> > ==5290== HEAP SUMMARY:
> > ==5290==     in use at exit: 2,066 bytes in 48 blocks
> > ==5290==   total heap usage: 87 allocs, 39 frees, 14,152 bytes allocated
> > ==5290==
> > ==5290== LEAK SUMMARY:
> > ==5290==    definitely lost: 0 bytes in 0 blocks
> > ==5290==    indirectly lost: 0 bytes in 0 blocks
> > ==5290==      possibly lost: 0 bytes in 0 blocks
> > ==5290==    still reachable: 2,066 bytes in 48 blocks
> > ==5290==         suppressed: 0 bytes in 0 blocks
> > ==5290== Reachable blocks (those to which a pointer was found) are not
> shown.
> > ==5290== To see them, rerun with: --leak-check=full --show-leak-kinds=all
> > ==5290==
> > ==5290== For counts of detected and suppressed errors, rerun with: -v
> > ==5290== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
> > ==5292== Warning: noted but unhandled ioctl 0x2401 with no
> size/direction hints.
> > ==5292==    This could cause spurious value errors to appear.
> > ==5292==    See README_MISSING_SYSCALL_OR_IOCTL for guidance on writing
> a proper wrapper.
> > ==5292==
> > ==5292== HEAP SUMMARY:
> > ==5292==     in use at exit: 164,018 bytes in 4,252 blocks
> > ==5292==   total heap usage: 17,910 allocs, 13,658 frees, 1,907,468
> bytes allocated
> > ==5292==
> > ==5292== 49,720 (1,472 direct, 48,248 indirect) bytes in 8 blocks are
> definitely lost in loss record 580 of 580
> > ==5292==    at 0x4C2DB8F: malloc (in
> /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==5292==    by 0x5105E77: CRYPTO_malloc (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5292==    by 0x51E1D23: ??? (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5292==    by 0x51E4861: ??? (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5292==    by 0x51E5414: ASN1_item_ex_d2i (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5292==    by 0x51E546A: ASN1_item_d2i (in
> /lib/x86_64-linux-gnu/libcrypto.so.1.0.0)
> > ==5292==    by 0x4E53E00: ??? (in /lib/x86_64-linux-gnu/libssl.so.1.0.0)
> > ==5292==    by 0x4E55727: ??? (in /lib/x86_64-linux-gnu/libssl.so.1.0.0)
> > ==5292==    by 0x452C4B: ssl_connect (stream-ssl.c:530)
> > ==5292==    by 0x445B18: scs_connecting (stream.c:315)
> > ==5292==    by 0x445B18: stream_connect (stream.c:338)
> > ==5292==    by 0x445B91: stream_recv (stream.c:369)
> > ==5292==    by 0x432A9C: jsonrpc_recv.part.7 (jsonrpc.c:310)
> > ==5292==    by 0x433977: jsonrpc_recv (jsonrpc.c:1139)
> > ==5292==    by 0x433977: jsonrpc_session_recv (jsonrpc.c:1112)
> > ==5292==    by 0x40CCE3: ovsdb_jsonrpc_session_run (jsonrpc-server.c:553)
> > ==5292==    by 0x40CCE3: ovsdb_jsonrpc_session_run_all
> (jsonrpc-server.c:586)
> > ==5292==    by 0x40CCE3: ovsdb_jsonrpc_server_run (jsonrpc-server.c:401)
> > ==5292==    by 0x40682E: main_loop (ovsdb-server.c:209)
> > ==5292==    by 0x40682E: main (ovsdb-server.c:460)
> > ==5292==
> > ==5292== LEAK SUMMARY:
> > ==5292==    definitely lost: 1,472 bytes in 8 blocks
> > ==5292==    indirectly lost: 48,248 bytes in 936 blocks
> > ==5292==      possibly lost: 0 bytes in 0 blocks
> > ==5292==    still reachable: 114,298 bytes in 3,308 blocks
> > ==5292==         suppressed: 0 bytes in 0 blocks
> > ==5292== Reachable blocks (those to which a pointer was found) are not
> shown.
> > ==5292== To see them, rerun with: --leak-check=full --show-leak-kinds=all
> > ==5292==
> > ==5292== For counts of detected and suppressed errors, rerun with: -v
> > ==5292== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 1 from 1)
> > ....
> >
> >
> > In this case ovsdb-server is running as daemon process (--detach option)
> and leaking memory is
> > accumulated whenever ovsdb-client is reconnected. Within observed test
> ovsdb-client CLI tool
> > connects 8 times to ovsdb-server. Leaked memory in ovsdb-client (for
> each invocation) is approx.
> > 6K bytes, while leaked memory in ovsdb-server is aprox. 48Kbytes what is
> actually 8*6K. Thus per
> > each connection both ovsdb-client and ovsdb-server leak approx. 6K bytes.
> >
> > I have done a small manual test to check if ovsdb-server is indeed
> accumulating leaked memory
> > by dumping ovsdb-server in a loop:
> >
> > console1:
> > ovsdb-server \
> > --log-file \
> > --detach --no-chdir --pidfile \
> > --private-key=testpki-privkey2.pem \
> > --certificate=testpki-cert2.pem \
> > --ca-cert=testpki-cacert.pem \
> > --remote=pssl:0:127.0.0.1 \
> > db
> >
> > while (true); do \
> > ovsdb-client \
> > --private-key=testpki-privkey.pem \
> > --certificate=testpki-cert.pem \
> > --ca-cert=testpki-cacert.pem \
> > dump ssl:127.0.0.1:42067; \
> > done
> >
> > console2:
> > watch -n 0.5 'cat /proc/$(pidof ovsdb-server)/status | grep VmSize'
> >
> > In console2 it was evidently seen ovsdb-server is constantly leaking
> memory. After a while
> > (i.e. after a certain number of reconnections) the OOM killer jumps out
> and kills ovsdb-server.
> >
> > Very similar situation was already noticed and described in
> > https://github.com/openvswitch/ovs-issues/issues/168. There, the
> problem pops up while connecting
> > controller to ovs-vswitchd daemon.
> >
> >
> > Valgrind reports point to a problem in openssl library, however after
> studying openssl code for
> > a while I have found out the problem is actually in ovs. When connection
> through SSL channel is
> > taken place openssl library allocates memory for keeping track of
> certificate. Reference to this
> > memory works very similar as std::shared_ptr pointer in recent C++
> dialects. i.e. when allocated
> > memory is referenced its reference counter is incremented and
> decremented after the memory is
> > derefered. When reference counter becomes zero allocated memory is
> automatically deallocated.
> >
> > In openssl library environment certificate is retrieved by calling
> SSL_get_peer_certificate()
> > where its reference counter is incremented. After retrieved certificate
> is not used any more its
> > reference counter must be decremented by calling X509_free(). If not,
> allocated memory is never
> > freed despite the ssl connection is properly closed.
> >
> > The problem was caused in stream-ssl.c in function ssl_connect(), which
> retrieves common peer name
> > by calling SSL_get_peer_certificate() function and without calling
> X509_free() function afterwards.
> >
> >
> > Signed-off-by: Damijan Skvarc <damjan.skvarc at gmail.com>
> >
> > ---
> >  lib/stream-ssl.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
> > index 723fde9..231c362 100644
> > --- a/lib/stream-ssl.c
> > +++ b/lib/stream-ssl.c
> > @@ -478,17 +478,20 @@ get_peer_common_name(const struct ssl_stream *sslv)
> >      int cn_index =
> X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert),
> >                                                NID_commonName, -1);
> >      if (cn_index < 0) {
> > +        X509_free(peer_cert);
> >          return NULL;
> >      }
> >
> >      X509_NAME_ENTRY *cn_entry = X509_NAME_get_entry(
> >          X509_get_subject_name(peer_cert), cn_index);
> >      if (!cn_entry) {
> > +        X509_free(peer_cert);
> >          return NULL;
> >      }
> >
> >      ASN1_STRING *cn_data = X509_NAME_ENTRY_get_data(cn_entry);
> >      if (!cn_data) {
> > +        X509_free(peer_cert);
> >          return NULL;
> >      }
> >
> > @@ -499,7 +502,9 @@ get_peer_common_name(const struct ssl_stream *sslv)
> >  #else
> >      cn = (const char *)ASN1_STRING_get0_data(cn_data);
> >   #endif
> > -    return xstrdup(cn);
> > +    char *p = xstrdup(cn);
> > +    X509_free(peer_cert);
> > +    return p;
> >  }
> >
> >  static int
>
> Looks good to me.
>
> Thanks for the patch, maybe it's better avoiding duplicates of
> X509_free by doing
>
> diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
> index 723fde9ad8fe..078fcbc3aa4a 100644
> --- a/lib/stream-ssl.c
> +++ b/lib/stream-ssl.c
> @@ -470,6 +470,7 @@ do_ca_cert_bootstrap(struct stream *stream)
>  static char *
>  get_peer_common_name(const struct ssl_stream *sslv)
>  {
> +    char *peer_name = NULL;
>      X509 *peer_cert = SSL_get_peer_certificate(sslv->ssl);
>      if (!peer_cert) {
>          return NULL;
> @@ -478,18 +479,18 @@ get_peer_common_name(const struct ssl_stream *sslv)
>      int cn_index =
> X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert),
>                                                NID_commonName, -1);
>      if (cn_index < 0) {
> -        return NULL;
> +        goto error;
>      }
>
>      X509_NAME_ENTRY *cn_entry = X509_NAME_get_entry(
>          X509_get_subject_name(peer_cert), cn_index);
>      if (!cn_entry) {
> -        return NULL;
> +        goto error;
>      }
>
>      ASN1_STRING *cn_data = X509_NAME_ENTRY_get_data(cn_entry);
>      if (!cn_data) {
> -        return NULL;
> +        goto error;
>      }
>
>      const char *cn;
> @@ -499,7 +500,11 @@ get_peer_common_name(const struct ssl_stream *sslv)
>  #else
>      cn = (const char *)ASN1_STRING_get0_data(cn_data);
>   #endif
> -    return xstrdup(cn);
> +    peer_name = xstrdup(cn);
> +
> +error:
> +    X509_free(peer_cert);
> +    return peer_name;
>  }
>
>  static int
>
>
> Regards,
> William
>
> > --
> > 2.7.4
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list