[ovs-dev] [ovs-dev,V2] netdev-dpdk: fix memory leak

Kavanagh, Mark B mark.b.kavanagh at intel.com
Mon Aug 8 07:52:50 UTC 2016


>
>Thanks for the report, I didn't realize that the callback could come in the same thread.
>

Likewise - thanks for the catch, Ilya.

>I sent a patch that I believe should fix the deadlock here:

Thanks for resolving the issue, Daniele.

>
>http://openvswitch.org/pipermail/dev/2016-August/077315.html
>
>2016-08-05 7:48 GMT-07:00 Ilya Maximets <i.maximets at samsung.com>:
>On 04.08.2016 12:49, Mark Kavanagh wrote:
>> DPDK v16.07 introduces the ability to free memzones.
>> Up until this point, DPDK memory pools created in OVS could
>> not be destroyed, thus incurring a memory leak.
>>
>> Leverage the DPDK v16.07 rte_mempool API to free DPDK
>> mempools when their associated reference count reaches 0 (this
>> indicates that the memory pool is no longer in use).
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
>> ---
>>
>> v2->v1: rebase to head of master, and remove 'RFC' tag
>>
>>  lib/netdev-dpdk.c | 29 +++++++++++++++--------------
>>  1 file changed, 15 insertions(+), 14 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index aaac0d1..ffcd35c 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -506,7 +506,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
>>  }
>>
>>  static void
>> -dpdk_mp_put(struct dpdk_mp *dmp)
>> +dpdk_mp_put(struct dpdk_mp *dmp) OVS_REQUIRES(dpdk_mutex)
>>  {
>>
>>      if (!dmp) {
>> @@ -514,15 +514,12 @@ dpdk_mp_put(struct dpdk_mp *dmp)
>>      }
>>
>>      dmp->refcount--;
>> -    ovs_assert(dmp->refcount >= 0);
>>
>> -#if 0
>> -    /* I could not find any API to destroy mp. */
>> -    if (dmp->refcount == 0) {
>> -        list_delete(dmp->list_node);
>> -        /* destroy mp-pool. */
>> -    }
>> -#endif
>> +    if (OVS_UNLIKELY(!dmp->refcount)) {
>> +        ovs_list_remove(&dmp->list_node);
>> +        rte_mempool_free(dmp->mp);
>> +     }
>> +
>>  }
>>
>>  static void
>> @@ -928,16 +925,18 @@ netdev_dpdk_destruct(struct netdev *netdev)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> +    ovs_mutex_lock(&dpdk_mutex);
>>      ovs_mutex_lock(&dev->mutex);
>> +
>>      rte_eth_dev_stop(dev->port_id);
>>      free(ovsrcu_get_protected(struct ingress_policer *,
>>                                &dev->ingress_policer));
>> -    ovs_mutex_unlock(&dev->mutex);
>>
>> -    ovs_mutex_lock(&dpdk_mutex);
>>      rte_free(dev->tx_q);
>>      ovs_list_remove(&dev->list_node);
>>      dpdk_mp_put(dev->dpdk_mp);
>> +
>> +    ovs_mutex_unlock(&dev->mutex);
>>      ovs_mutex_unlock(&dpdk_mutex);
>>  }
>>
>> @@ -946,6 +945,9 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> +    ovs_mutex_lock(&dpdk_mutex);
>> +    ovs_mutex_lock(&dev->mutex);
>> +
>>      /* Guest becomes an orphan if still attached. */
>>      if (netdev_dpdk_get_vid(dev) >= 0) {
>>          VLOG_ERR("Removing port '%s' while vhost device still attached.",
>> @@ -961,15 +963,14 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>>          fatal_signal_remove_file_to_unlink(dev->vhost_id);
>>      }
>>
>> -    ovs_mutex_lock(&dev->mutex);
>>      free(ovsrcu_get_protected(struct ingress_policer *,
>>                                &dev->ingress_policer));
>> -    ovs_mutex_unlock(&dev->mutex);
>>
>> -    ovs_mutex_lock(&dpdk_mutex);
>>      rte_free(dev->tx_q);
>>      ovs_list_remove(&dev->list_node);
>>      dpdk_mp_put(dev->dpdk_mp);
>> +
>> +    ovs_mutex_unlock(&dev->mutex);
>>      ovs_mutex_unlock(&dpdk_mutex);
>>  }
>I agree that locking here was wrong but this change introduces issue because
>'rte_vhost_driver_unregister()' may call 'destroy_device()' and OVS will be aborted
>on attempt to lock 'dpdk_mutex' again:
>
>VHOST_CONFIG: free connfd = 37 for device '/vhost1'
>ovs-vswitchd: lib/netdev-dpdk.c:2305: pthread_mutex_lock failed (Resource deadlock avoided)
>
>Program received signal SIGABRT, Aborted.
>0x0000007fb7ad6d38 in raise () from /lib64/libc.so.6
>(gdb) bt
>#0  0x0000007fb7ad6d38 in raise () from /lib64/libc.so.6
>#1  0x0000007fb7ad8aa8 in abort () from /lib64/libc.so.6
>#2  0x0000000000692be0 in ovs_abort_valist at lib/util.c:335
>#3  0x0000000000692ba0 in ovs_abort at lib/util.c:327
>#4  0x0000000000651800 in ovs_mutex_lock_at (l_=0x899ab0 <dpdk_mutex>, where=0x78a458
>"lib/netdev-dpdk.c:2305") at lib/ovs-thread.c:76
>#5  0x00000000006c0190 in destroy_device (vid=0) at lib/netdev-dpdk.c:2305
>#6  0x00000000004ea850 in vhost_destroy_device ()
>#7  0x00000000004ee578 in rte_vhost_driver_unregister ()
>#8  0x00000000006bc8c8 in netdev_dpdk_vhost_destruct (netdev=0x7f6bffed00) at lib/netdev-
>dpdk.c:944
>#9  0x00000000005e4ad4 in netdev_unref (dev=0x7f6bffed00) at lib/netdev.c:499
>#10 0x00000000005e4b9c in netdev_close (netdev=0x7f6bffed00) at lib/netdev.c:523
>[...]
>#20 0x000000000053ad94 in main (argc=7, argv=0x7ffffff318) at vswitchd/ovs-vswitchd.c:112
>
>May be reproduced by removing port while virtio still attached.
>This blocks reconnection feature and deletion of port while QEMU still attached.
>
>Someone should fix this. Any thoughts?
>
>Best regards, Ilya Maximets.
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list