[ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack lock implementation.
Alin Serdean
aserdean at cloudbasesolutions.com
Tue Nov 28 14:45:35 UTC 2017
Hi Anand,
Thanks a lot for implementing the series.
I saw some small nits while initializing and cleaning `ovsCtBucketLock`, otherwise it looks good.
I think I'm comfortable to acknowledge the next revision.
Thanks,
Alin.
> -----Original Message-----
> From: ovs-dev-bounces at openvswitch.org [mailto:ovs-dev-
> bounces at openvswitch.org] On Behalf Of Anand Kumar
> Sent: Tuesday, November 14, 2017 8:48 PM
> To: dev at openvswitch.org
> Subject: [ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack
> lock implementation.
>
> Currently, there is one global lock for conntrack module, which protects
> conntrack entries and conntrack table. All the NAT operations are performed
> holding this lock.
>
> This becomes inefficient, as the number of conntrack entries grow.
> With new implementation, we will have two PNDIS_RW_LOCK_EX locks in
> conntrack.
>
> 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is
> shared by all the ct entries that belong to the same bucket.
> 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of
> conntrack entry.
>
> Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to
> the corresponding OvsCtBucketLock of conntrack table.
> We need this reference to retrieve ovsCtBucketLock from ct entry for delete
> operation.
>
> Signed-off-by: Anand Kumar <kumaranand at vmware.com>
> ---
> datapath-windows/ovsext/Conntrack-nat.c | 6 +
> datapath-windows/ovsext/Conntrack.c | 231 ++++++++++++++++++++---
> ---------
> datapath-windows/ovsext/Conntrack.h | 3 +
> 3 files changed, 154 insertions(+), 86 deletions(-)
>
> diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-
> windows/ovsext/Conntrack-nat.c
> index 7975770..33a86cf 100644
> --- a/datapath-windows/ovsext/Conntrack-nat.c
> +++ b/datapath-windows/ovsext/Conntrack-nat.c
> @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> {
> UINT32 natFlag;
> const struct ct_endpoint* endpoint;
> + LOCK_STATE_EX lockState;
> + /* XXX Move conntrack locks out of NAT after implementing lock in NAT.
> */
> + NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> /* When it is NAT, only entry->rev_key contains NATTED address;
> When it is unNAT, only entry->key contains the UNNATTED address;*/
> const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
> BOOLEAN isSrcNat;
>
> if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
> + NdisReleaseRWLock(entry->lock, &lockState);
> return;
> }
> isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6
> +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> }
> } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
> // XXX: IPv6 packet not supported yet.
> + NdisReleaseRWLock(entry->lock, &lockState);
> return;
> }
> if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
> @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> }
> }
> }
> + NdisReleaseRWLock(entry->lock, &lockState);
> }
>
>
> diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-
> windows/ovsext/Conntrack.c
> index ba0dc88..f5e4996 100644
> --- a/datapath-windows/ovsext/Conntrack.c
> +++ b/datapath-windows/ovsext/Conntrack.c
> @@ -31,7 +31,7 @@
> KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY
> ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static
> PNDIS_RW_LOCK_EX ovsConntrackLockObj;
> +static PNDIS_RW_LOCK_EX *ovsCtBucketLock;
> static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern
> POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries;
> @@ -47,20 +47,13 @@ static __inline NDIS_STATUS OvsCtFlush(UINT16
> zone); NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) {
> - NTSTATUS status;
> + NTSTATUS status = STATUS_SUCCESS;
> HANDLE threadHandle = NULL;
> ctTotalEntries = 0;
>
> /* Init the sync-lock */
> - ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
> - if (ovsConntrackLockObj == NULL) {
> - return STATUS_INSUFFICIENT_RESOURCES;
> - }
> -
> ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
> if (ovsCtNatLockObj == NULL) {
> - NdisFreeRWLock(ovsConntrackLockObj);
> - ovsConntrackLockObj = NULL;
> return STATUS_INSUFFICIENT_RESOURCES;
> }
>
> @@ -69,15 +62,26 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
> * CT_HASH_TABLE_SIZE,
> OVS_CT_POOL_TAG);
> if (ovsConntrackTable == NULL) {
> - NdisFreeRWLock(ovsConntrackLockObj);
> - ovsConntrackLockObj = NULL;
> NdisFreeRWLock(ovsCtNatLockObj);
> ovsCtNatLockObj = NULL;
> return STATUS_INSUFFICIENT_RESOURCES;
> }
>
> - for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> + ovsCtBucketLock =
> OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX)
> + * CT_HASH_TABLE_SIZE,
> + OVS_CT_POOL_TAG);
> + if (ovsCtBucketLock == NULL) {
> + status = STATUS_INSUFFICIENT_RESOURCES;
> + goto freeTable;
> + }
> +
> + for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> InitializeListHead(&ovsConntrackTable[i]);
> + ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);
> + if (ovsCtBucketLock[i] == NULL) {
> + status = STATUS_INSUFFICIENT_RESOURCES;
> + goto freeBucketLock;
> + }
> }
>
> /* Init CT Cleaner Thread */
> @@ -87,16 +91,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
> &ctThreadCtx);
>
> if (status != STATUS_SUCCESS) {
> - NdisFreeRWLock(ovsConntrackLockObj);
> - ovsConntrackLockObj = NULL;
> -
> - NdisFreeRWLock(ovsCtNatLockObj);
> - ovsCtNatLockObj = NULL;
> -
> - OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
> - ovsConntrackTable = NULL;
> -
> - return status;
> + goto freeBucketLock;
> }
>
> ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL,
> KernelMode, @@ -104,13 +99,28 @@
> OvsInitConntrack(POVS_SWITCH_CONTEXT context)
> ZwClose(threadHandle);
> threadHandle = NULL;
>
> - status = OvsNatInit();
> + status = OvsNatInit(context);
>
> if (status != STATUS_SUCCESS) {
> OvsCleanupConntrack();
> - return status;
> + goto freeBucketLock;
[Alin Serdean] Double free of ovsCtBucketLock can happen.
> }
> return STATUS_SUCCESS;
> +
> +freeBucketLock:
> + for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> + if (ovsCtBucketLock[i] != NULL) {
[Alin Serdean] The list may not be initialized at this point. You can leave the loop that contains `ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);` without a goto and check the status after.
> + NdisFreeRWLock(ovsCtBucketLock[i]);
> + }
> + }
> + OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
> + ovsCtBucketLock = NULL;
> +freeTable:
> + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
> + ovsConntrackTable = NULL;
> + NdisFreeRWLock(ovsCtNatLockObj);
> + ovsCtNatLockObj = NULL;
> + return status;
> }
>
More information about the dev
mailing list