[ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack lock implementation.

Alin Serdean aserdean at cloudbasesolutions.com
Tue Nov 28 14:45:35 UTC 2017


Hi Anand,

Thanks a lot for implementing the series.

I saw some small nits while initializing and cleaning `ovsCtBucketLock`, otherwise it looks good.
I think I'm comfortable to acknowledge the next revision.

Thanks,
Alin.

> -----Original Message-----
> From: ovs-dev-bounces at openvswitch.org [mailto:ovs-dev-
> bounces at openvswitch.org] On Behalf Of Anand Kumar
> Sent: Tuesday, November 14, 2017 8:48 PM
> To: dev at openvswitch.org
> Subject: [ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack
> lock implementation.
> 
> Currently, there is one global lock for conntrack module, which protects
> conntrack entries and conntrack table. All the NAT operations are performed
> holding this lock.
> 
> This becomes inefficient, as the number of conntrack entries grow.
> With new implementation, we will have two PNDIS_RW_LOCK_EX locks in
> conntrack.
> 
> 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is
> shared by all the ct entries that belong to the same bucket.
> 2. lock -  a rw lock in OVS_CT_ENTRY structure that protects the members of
> conntrack entry.
> 
> Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to
> the corresponding OvsCtBucketLock of conntrack table.
> We need this reference to retrieve ovsCtBucketLock from ct entry for delete
> operation.
> 
> Signed-off-by: Anand Kumar <kumaranand at vmware.com>
> ---
>  datapath-windows/ovsext/Conntrack-nat.c |   6 +
>  datapath-windows/ovsext/Conntrack.c     | 231 ++++++++++++++++++++---
> ---------
>  datapath-windows/ovsext/Conntrack.h     |   3 +
>  3 files changed, 154 insertions(+), 86 deletions(-)
> 
> diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-
> windows/ovsext/Conntrack-nat.c
> index 7975770..33a86cf 100644
> --- a/datapath-windows/ovsext/Conntrack-nat.c
> +++ b/datapath-windows/ovsext/Conntrack-nat.c
> @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> {
>      UINT32 natFlag;
>      const struct ct_endpoint* endpoint;
> +    LOCK_STATE_EX lockState;
> +    /* XXX Move conntrack locks out of NAT after implementing lock in NAT.
> */
> +    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
>      /* When it is NAT, only entry->rev_key contains NATTED address;
>         When it is unNAT, only entry->key contains the UNNATTED address;*/
>      const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
>      BOOLEAN isSrcNat;
> 
>      if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
> +        NdisReleaseRWLock(entry->lock, &lockState);
>          return;
>      }
>      isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6
> +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>          }
>      } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
>          // XXX: IPv6 packet not supported yet.
> +        NdisReleaseRWLock(entry->lock, &lockState);
>          return;
>      }
>      if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
> @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>              }
>          }
>      }
> +    NdisReleaseRWLock(entry->lock, &lockState);
>  }
> 
> 
> diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-
> windows/ovsext/Conntrack.c
> index ba0dc88..f5e4996 100644
> --- a/datapath-windows/ovsext/Conntrack.c
> +++ b/datapath-windows/ovsext/Conntrack.c
> @@ -31,7 +31,7 @@
>  KSTART_ROUTINE OvsConntrackEntryCleaner;  static PLIST_ENTRY
> ovsConntrackTable;  static OVS_CT_THREAD_CTX ctThreadCtx; -static
> PNDIS_RW_LOCK_EX ovsConntrackLockObj;
> +static PNDIS_RW_LOCK_EX *ovsCtBucketLock;
>  static PNDIS_RW_LOCK_EX ovsCtNatLockObj;  extern
> POVS_SWITCH_CONTEXT gOvsSwitchContext;  static LONG ctTotalEntries;
> @@ -47,20 +47,13 @@ static __inline NDIS_STATUS OvsCtFlush(UINT16
> zone);  NTSTATUS  OvsInitConntrack(POVS_SWITCH_CONTEXT context)  {
> -    NTSTATUS status;
> +    NTSTATUS status = STATUS_SUCCESS;
>      HANDLE threadHandle = NULL;
>      ctTotalEntries = 0;
> 
>      /* Init the sync-lock */
> -    ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
> -    if (ovsConntrackLockObj == NULL) {
> -        return STATUS_INSUFFICIENT_RESOURCES;
> -    }
> -
>      ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
>      if (ovsCtNatLockObj == NULL) {
> -        NdisFreeRWLock(ovsConntrackLockObj);
> -        ovsConntrackLockObj = NULL;
>          return STATUS_INSUFFICIENT_RESOURCES;
>      }
> 
> @@ -69,15 +62,26 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>                                                   * CT_HASH_TABLE_SIZE,
>                                                   OVS_CT_POOL_TAG);
>      if (ovsConntrackTable == NULL) {
> -        NdisFreeRWLock(ovsConntrackLockObj);
> -        ovsConntrackLockObj = NULL;
>          NdisFreeRWLock(ovsCtNatLockObj);
>          ovsCtNatLockObj = NULL;
>          return STATUS_INSUFFICIENT_RESOURCES;
>      }
> 
> -    for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> +    ovsCtBucketLock =
> OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX)
> +                                               * CT_HASH_TABLE_SIZE,
> +                                               OVS_CT_POOL_TAG);
> +    if (ovsCtBucketLock == NULL) {
> +        status = STATUS_INSUFFICIENT_RESOURCES;
> +        goto freeTable;
> +    }
> +
> +    for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
>          InitializeListHead(&ovsConntrackTable[i]);
> +        ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);
> +        if (ovsCtBucketLock[i] == NULL) {
> +            status = STATUS_INSUFFICIENT_RESOURCES;
> +            goto freeBucketLock;
> +        }
>      }
> 
>      /* Init CT Cleaner Thread */
> @@ -87,16 +91,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>                                    &ctThreadCtx);
> 
>      if (status != STATUS_SUCCESS) {
> -        NdisFreeRWLock(ovsConntrackLockObj);
> -        ovsConntrackLockObj = NULL;
> -
> -        NdisFreeRWLock(ovsCtNatLockObj);
> -        ovsCtNatLockObj = NULL;
> -
> -        OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
> -        ovsConntrackTable = NULL;
> -
> -        return status;
> +        goto freeBucketLock;
>      }
> 
>      ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL,
> KernelMode, @@ -104,13 +99,28 @@
> OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>      ZwClose(threadHandle);
>      threadHandle = NULL;
> 
> -    status = OvsNatInit();
> +    status = OvsNatInit(context);
> 
>      if (status != STATUS_SUCCESS) {
>          OvsCleanupConntrack();
> -        return status;
> +        goto freeBucketLock;
 [Alin Serdean] Double free of ovsCtBucketLock can happen.
>      }
>      return STATUS_SUCCESS;
> +
> +freeBucketLock:
> +    for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> +        if (ovsCtBucketLock[i] != NULL) {
[Alin Serdean] The list may not be initialized at this point. You can leave the loop that contains `ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);` without a goto and check the status after.
> +            NdisFreeRWLock(ovsCtBucketLock[i]);
> +        }
> +    }
> +    OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
> +    ovsCtBucketLock = NULL;
> +freeTable:
> +    OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
> +    ovsConntrackTable = NULL;
> +    NdisFreeRWLock(ovsCtNatLockObj);
> +    ovsCtNatLockObj = NULL;
> +    return status;
>  }
> 


More information about the dev mailing list