[ovs-dev] [ovs-dev,V2] netdev-dpdk: fix memory leak

Ilya Maximets i.maximets at samsung.com
Fri Aug 5 14:48:05 UTC 2016


On 04.08.2016 12:49, Mark Kavanagh wrote:
> DPDK v16.07 introduces the ability to free memzones.
> Up until this point, DPDK memory pools created in OVS could
> not be destroyed, thus incurring a memory leak.
> 
> Leverage the DPDK v16.07 rte_mempool API to free DPDK
> mempools when their associated reference count reaches 0 (this
> indicates that the memory pool is no longer in use).
> 
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
> ---
> 
> v2->v1: rebase to head of master, and remove 'RFC' tag
> 
>  lib/netdev-dpdk.c | 29 +++++++++++++++--------------
>  1 file changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index aaac0d1..ffcd35c 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -506,7 +506,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
>  }
>  
>  static void
> -dpdk_mp_put(struct dpdk_mp *dmp)
> +dpdk_mp_put(struct dpdk_mp *dmp) OVS_REQUIRES(dpdk_mutex)
>  {
>  
>      if (!dmp) {
> @@ -514,15 +514,12 @@ dpdk_mp_put(struct dpdk_mp *dmp)
>      }
>  
>      dmp->refcount--;
> -    ovs_assert(dmp->refcount >= 0);
>  
> -#if 0
> -    /* I could not find any API to destroy mp. */
> -    if (dmp->refcount == 0) {
> -        list_delete(dmp->list_node);
> -        /* destroy mp-pool. */
> -    }
> -#endif
> +    if (OVS_UNLIKELY(!dmp->refcount)) {
> +        ovs_list_remove(&dmp->list_node);
> +        rte_mempool_free(dmp->mp);
> +     }
> +
>  }
>  
>  static void
> @@ -928,16 +925,18 @@ netdev_dpdk_destruct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> +    ovs_mutex_lock(&dpdk_mutex);
>      ovs_mutex_lock(&dev->mutex);
> +
>      rte_eth_dev_stop(dev->port_id);
>      free(ovsrcu_get_protected(struct ingress_policer *,
>                                &dev->ingress_policer));
> -    ovs_mutex_unlock(&dev->mutex);
>  
> -    ovs_mutex_lock(&dpdk_mutex);
>      rte_free(dev->tx_q);
>      ovs_list_remove(&dev->list_node);
>      dpdk_mp_put(dev->dpdk_mp);
> +
> +    ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>  }
>  
> @@ -946,6 +945,9 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> +    ovs_mutex_lock(&dpdk_mutex);
> +    ovs_mutex_lock(&dev->mutex);
> +
>      /* Guest becomes an orphan if still attached. */
>      if (netdev_dpdk_get_vid(dev) >= 0) {
>          VLOG_ERR("Removing port '%s' while vhost device still attached.",
> @@ -961,15 +963,14 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>          fatal_signal_remove_file_to_unlink(dev->vhost_id);
>      }
>  
> -    ovs_mutex_lock(&dev->mutex);
>      free(ovsrcu_get_protected(struct ingress_policer *,
>                                &dev->ingress_policer));
> -    ovs_mutex_unlock(&dev->mutex);
>  
> -    ovs_mutex_lock(&dpdk_mutex);
>      rte_free(dev->tx_q);
>      ovs_list_remove(&dev->list_node);
>      dpdk_mp_put(dev->dpdk_mp);
> +
> +    ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>  }

I agree that locking here was wrong but this change introduces issue because
'rte_vhost_driver_unregister()' may call 'destroy_device()' and OVS will be aborted
on attempt to lock 'dpdk_mutex' again:

VHOST_CONFIG: free connfd = 37 for device '/vhost1'
ovs-vswitchd: lib/netdev-dpdk.c:2305: pthread_mutex_lock failed (Resource deadlock avoided)

Program received signal SIGABRT, Aborted.
0x0000007fb7ad6d38 in raise () from /lib64/libc.so.6
(gdb) bt
#0  0x0000007fb7ad6d38 in raise () from /lib64/libc.so.6
#1  0x0000007fb7ad8aa8 in abort () from /lib64/libc.so.6
#2  0x0000000000692be0 in ovs_abort_valist at lib/util.c:335
#3  0x0000000000692ba0 in ovs_abort at lib/util.c:327
#4  0x0000000000651800 in ovs_mutex_lock_at (l_=0x899ab0 <dpdk_mutex>, where=0x78a458 "lib/netdev-dpdk.c:2305") at lib/ovs-thread.c:76
#5  0x00000000006c0190 in destroy_device (vid=0) at lib/netdev-dpdk.c:2305
#6  0x00000000004ea850 in vhost_destroy_device ()
#7  0x00000000004ee578 in rte_vhost_driver_unregister ()
#8  0x00000000006bc8c8 in netdev_dpdk_vhost_destruct (netdev=0x7f6bffed00) at lib/netdev-dpdk.c:944
#9  0x00000000005e4ad4 in netdev_unref (dev=0x7f6bffed00) at lib/netdev.c:499
#10 0x00000000005e4b9c in netdev_close (netdev=0x7f6bffed00) at lib/netdev.c:523
[...]
#20 0x000000000053ad94 in main (argc=7, argv=0x7ffffff318) at vswitchd/ovs-vswitchd.c:112

May be reproduced by removing port while virtio still attached.
This blocks reconnection feature and deletion of port while QEMU still attached.

Someone should fix this. Any thoughts?

Best regards, Ilya Maximets.



More information about the dev mailing list