[ovs-dev] [PATCH v7 3/4] datapath-windows: NAT integration with conntrack

Sairam Venugopal vsairam at vmware.com
Wed May 17 18:24:02 UTC 2017


Please find my comments inline.

Thanks,
Sairam




On 5/16/17, 5:34 PM, "Yin Lin" <linyi at vmware.com> wrote:

>See inline comments for reply.
>
>-----Original Message-----
>From: Sairam Venugopal 
>Sent: Tuesday, May 16, 2017 5:09 PM
>To: Yin Lin <linyi at vmware.com>; dev at openvswitch.org
>Subject: Re: [ovs-dev] [PATCH v7 3/4] datapath-windows: NAT integration with conntrack
>
>Please find my comments inline.
>
>Thanks,
>Sairam
>
>
>
>
>On 5/9/17, 3:59 PM, "ovs-dev-bounces at openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces at openvswitch.org on behalf of linyi at vmware.com> wrote:
>
>>This patch integrates NAT module with existing conntrack module. NAT
>>action is now supported.
>>
>>Signed-off-by: Yin Lin <linyi at vmware.com>
>>---
>> datapath-windows/automake.mk           |   4 +-
>> datapath-windows/ovsext/Actions.c      | 119 ++++++++++++++++++++-
>> datapath-windows/ovsext/Actions.h      |  20 ++++
>> datapath-windows/ovsext/Conntrack.c    | 187 +++++++++++++++++++++------------
>> datapath-windows/ovsext/Conntrack.h    |  25 +++--
>> datapath-windows/ovsext/ovsext.vcxproj |   2 +
>> 6 files changed, 274 insertions(+), 83 deletions(-)
>>
>>diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
>>index 7177630..f1632cc 100644
>>--- a/datapath-windows/automake.mk
>>+++ b/datapath-windows/automake.mk
>>@@ -16,9 +16,9 @@ EXTRA_DIST += \
>> 	datapath-windows/ovsext/Conntrack-icmp.c \
>> 	datapath-windows/ovsext/Conntrack-other.c \
>> 	datapath-windows/ovsext/Conntrack-related.c \
>>-    datapath-windows/ovsext/Conntrack-nat.c \
>>+	datapath-windows/ovsext/Conntrack-nat.c \
>> 	datapath-windows/ovsext/Conntrack-tcp.c \
>>-    datapath-windows/ovsext/Conntrack-nat.h \
>>+	datapath-windows/ovsext/Conntrack-nat.h \
>> 	datapath-windows/ovsext/Conntrack.c \
>> 	datapath-windows/ovsext/Conntrack.h \
>> 	datapath-windows/ovsext/Datapath.c \
>>diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
>>index e2eae9a..a808d2c 100644
>>--- a/datapath-windows/ovsext/Actions.c
>>+++ b/datapath-windows/ovsext/Actions.c
>>@@ -1380,7 +1380,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
>>  *      based on the specified key.
>>  *----------------------------------------------------------------------------
>>  */
>>-static __inline NDIS_STATUS
>>+NDIS_STATUS
>> OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
>>                   const struct ovs_key_udp *udpAttr)
>> {
>>@@ -1427,7 +1427,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
>>  *      based on the specified key.
>>  *----------------------------------------------------------------------------
>>  */
>>-static __inline NDIS_STATUS
>>+NDIS_STATUS
>> OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
>>                   const struct ovs_key_tcp *tcpAttr)
>> {
>>@@ -1465,12 +1465,125 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
>> 
>> /*
>>  *----------------------------------------------------------------------------
>>+ * OvsUpdateAddressAndPort --
>>+ *      Updates the source/destination IP and port fields in
>>+ *      ovsFwdCtx.curNbl inline based on the specified key.
>>+ *----------------------------------------------------------------------------
>>+ */
>>+NDIS_STATUS
>>+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
>>+                        UINT32 newAddr, UINT16 newPort,
>>+                        BOOLEAN isSource, BOOLEAN isTx)
>>+{
>>+    PUINT8 bufferStart;
>>+    UINT32 hdrSize;
>>+    OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
>>+    IPHdr *ipHdr;
>>+    TCPHdr *tcpHdr = NULL;
>>+    UDPHdr *udpHdr = NULL;
>>+    UINT32 *addrField = NULL;
>>+    UINT16 *portField = NULL;
>>+    UINT16 *checkField = NULL;
>>+    BOOLEAN l4Offload = FALSE;
>>+    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
>>+
>>+    ASSERT(layers->value != 0);
>>+
>>+    if (layers->isTcp || layers->isUdp) {
>>+        hdrSize = layers->l4Offset +
>>+                  layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
>>+    } else {
>>+        hdrSize = layers->l3Offset + sizeof (*ipHdr);
>>+    }
>>+
>>+    bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize);
>>+    if (!bufferStart) {
>>+        return NDIS_STATUS_RESOURCES;
>>+    }
>>+
>>+    ipHdr = (IPHdr *)(bufferStart + layers->l3Offset);
>>+
>>+    if (layers->isTcp) {
>>+        tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset);
>>+    } else if (layers->isUdp) {
>>+        udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset);
>>+    }
>>+
>>+    csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
>>+                                          TcpIpChecksumNetBufferListInfo);
>>+    /*
>>+     * Adjust the IP header inline as dictated by the action, and also update
>>+     * the IP and the TCP checksum for the data modified.
>>+     *
>>+     * In the future, this could be optimized to make one call to
>>+     * ChecksumUpdate32(). Ignoring this for now, since for the most common
>>+     * case, we only update the TTL.
>>+     */
>>+
>>+    if (isSource) {
>>+        addrField = &ipHdr->saddr;
>>+        if (tcpHdr) {
>>+            portField = &tcpHdr->source;
>>+            checkField = &tcpHdr->check;
>>+            l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.TcpChecksum :
>>+                        ((BOOLEAN)csumInfo.Receive.TcpChecksumSucceeded ||
>>+                         (BOOLEAN)csumInfo.Receive.TcpChecksumFailed);
>>+        } else if (udpHdr) {
>>+            portField = &udpHdr->source;
>>+            checkField = &udpHdr->check;
>>+            l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.UdpChecksum :
>>+                        ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded ||
>>+                         (BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
>>+        }
>>+    } else {
>>+        addrField = &ipHdr->daddr;
>>+        if (tcpHdr) {
>>+            portField = &tcpHdr->dest;
>>+            checkField = &tcpHdr->check;
>>+        } else if (udpHdr) {
>>+            portField = &udpHdr->dest;
>>+            checkField = &udpHdr->check;
>>+        }
>>+    }
>>+    if (*addrField != newAddr) {
>>+        UINT32 oldAddr = *addrField;
>>+        if (checkField && *checkField != 0) {
>>+            if (l4Offload) {
>>+                /* Recompute IP pseudo checksum */
>>+                *checkField = ~(*checkField);
>>+                *checkField = ChecksumUpdate32(*checkField, oldAddr,
>>+                                               newAddr);
>>+                *checkField = ~(*checkField);
>>+            } else {
>>+                *checkField = ChecksumUpdate32(*checkField, oldAddr,
>>+                                               newAddr);
>>+            }
>>+        }
>>+        if (ipHdr->check != 0) {
>>+            ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
>>+                                            newAddr);
>>+        }
>>+        *addrField = newAddr;
>>+    }
>>+
>>+    if (portField && *portField != newPort) {
>>+        if (checkField && !l4Offload) {
>>+            *checkField = ChecksumUpdate16(*checkField, *portField,
>>+                                           newPort);
>>+        }
>>+        *portField = newPort;
>>+    }
>>+    return NDIS_STATUS_SUCCESS;
>>+}
>>+
>>+/*
>>+ *----------------------------------------------------------------------------
>>  * OvsUpdateIPv4Header --
>>  *      Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
>>  *      specified key.
>>  *----------------------------------------------------------------------------
>>  */
>>-static __inline NDIS_STATUS
>>+NDIS_STATUS
>> OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
>>                     const struct ovs_key_ipv4 *ipAttr)
>> {
>>diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
>>index 1ce6c20..fd050d5 100644
>>--- a/datapath-windows/ovsext/Actions.h
>>+++ b/datapath-windows/ovsext/Actions.h
>>@@ -110,4 +110,24 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
>>             UINT32 srcPortNo,
>>             OVS_PACKET_HDR_INFO *layers);
>> 
>>+PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
>>+                          UINT32 size);
>>+
>>+NDIS_STATUS
>>+OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
>>+                  const struct ovs_key_udp *udpAttr);
>>+
>>+NDIS_STATUS
>>+OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
>>+                  const struct ovs_key_tcp *tcpAttr);
>>+
>>+NDIS_STATUS
>>+OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
>>+                    const struct ovs_key_ipv4 *ipAttr);
>>+
>>+NDIS_STATUS
>>+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
>>+                        UINT32 newAddr, UINT16 newPort,
>>+                        BOOLEAN isSource, BOOLEAN isTx);
>>+
>> #endif /* __ACTIONS_H_ */
>>diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
>>index 9824368..6e95f47 100644
>>--- a/datapath-windows/ovsext/Conntrack.c
>>+++ b/datapath-windows/ovsext/Conntrack.c
>>@@ -19,6 +19,7 @@
>> #include "Jhash.h"
>> #include "PacketParser.h"
>> #include "Event.h"
>>+#include "Conntrack-nat.h"
>> 
>> #pragma warning(push)
>> #pragma warning(disable:4311)
>>@@ -27,7 +28,7 @@
>> #define SEC_TO_UNIX_EPOCH 11644473600LL
>> #define SEC_TO_NANOSEC 1000000000LL
>> 
>>-KSTART_ROUTINE ovsConntrackEntryCleaner;
>>+KSTART_ROUTINE OvsConntrackEntryCleaner;
>> static PLIST_ENTRY ovsConntrackTable;
>> static OVS_CT_THREAD_CTX ctThreadCtx;
>> static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
>>@@ -72,7 +73,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>>     /* Init CT Cleaner Thread */
>>     KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
>>     status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
>>-                                  NULL, ovsConntrackEntryCleaner,
>>+                                  NULL, OvsConntrackEntryCleaner,
>>                                   &ctThreadCtx);
>> 
>>     if (status != STATUS_SUCCESS) {
>>@@ -89,6 +90,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>>                               &ctThreadCtx.threadObject, NULL);
>>     ZwClose(threadHandle);
>>     threadHandle = NULL;
>>+
>>+    status = OvsNatInit();
>>+
>>+    if (status != STATUS_SUCCESS) {
>>+        OvsCleanupConntrack();
>>+        return status;
>>+    }
>>     return STATUS_SUCCESS;
>> }
>> 
>>@@ -121,6 +129,7 @@ OvsCleanupConntrack(VOID)
>> 
>>     NdisFreeRWLock(ovsConntrackLockObj);
>>     ovsConntrackLockObj = NULL;
>
>[Sai]: Where do you cleanup the entries of the ovsUnNatTable table?
>[Yin]: If you look inside OvsNatCleanup() we always clean up two tables. The NAT module ensures that	
>NatTable an UnNatTable is always in sync. Every operation will modify both tables so that the external caller 

[Sai] - Perhaps am misunderstanding this. Can you clarify if this is correct:

The OvsNatFlush just deletes the entries of the ovsNatTable. Are you saying that entries ovsUnNatTable
are deleted automatically?
 
VOID OvsNatFlush(UINT16 zone)
{
        PLIST_ENTRY link, next;
        for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
                LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
                        POVS_NAT_ENTRY entry =
                                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
                        /* zone is a non-zero value */
                        if (!zone || zone == entry->key.zone) {
                                OvsNatDeleteEntry(entry);
                        }
                }
        }
}



>doesn't need to worry about it.

>
>>+    OvsNatCleanup();
>> }
>> 
>> static __inline VOID
>>@@ -160,25 +169,42 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
>>     OvsPostCtEvent(&ctEventEntry);
>> }
>> 
>>-static __inline VOID
>>-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
>>+static __inline BOOLEAN
>>+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
>>+              PNAT_ACTION_INFO natInfo, UINT64 now)
>> {
>>-    NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
>>-    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
>>+    NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
>>+    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
>>     OvsCtKeyReverse(&entry->rev_key);
>>+
>>+    /* NatInfo is always initialized to be disabled, so that if NAT action
>>+     * fails, we will not end up deleting an non-existent NAT entry.
>>+     */
>
>[Sai]: natInfo is never NULL, so we always perform the OvsIsForwardNAT check
>[Yin]: OvsIsFowardNat is a MACRO that only cost 3-5 CPU instructions. I put a NULL check regardless because you cannot expect how caller uses the function.
>In our particular case OvsCtExecute_ happens not pass a not-NULL value all the way down the call stack.

[Sai]: The null check is unnecessary in this case since natInfo has never been set to NULL.
You have refactored other parts of the code to get rid of similar checks. This usage is misleading
since it now implies that we can have natInfo set to NULL. Then we need to add these checks in other areas
too. Eg: In OvsCtExecute_  if (entry && natInfo->natAction != NAT_ACTION_NONE) { … }

IMO, the correct approach would be to set it to NULL in OvsExecuteConntrackAction() and then set it to {0} only
If(NlAttrFindNested(a, OVS_CT_ATTR_NAT) != NULL). That will reduce the memory overhead of having NatInfo in every call.
That will involve a lot of changes, and can be refactored at a later date. In the meantime, we can go under the assumption
that natInfo is never NULL and we can determine if it’s set by checking : (natInfo->natAction != NAT_ACTION_NONE)


>
>>+    if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) {
>>+        entry->natInfo = *natInfo;
>>+        if (!OvsNatCtEntry(entry)) {
>>+            return FALSE;
>>+        }
>>+        ctx->hash = OvsHashCtKey(&entry->key);
>>+    } else {
>>+        entry->natInfo.natAction = natInfo->natAction;
>>+    }
>>+
>>     entry->timestampStart = now;
>>     InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
>>                    &entry->link);
>> 
>>     ctTotalEntries++;
>>+    return TRUE;
>> }
>> 
>> static __inline POVS_CT_ENTRY
>>-OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>>+OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>>                  UINT8 ipProto,
>>                  UINT32 l4Offset,
>>                  OvsConntrackKeyLookupCtx *ctx,
>>                  OvsFlowKey *key,
>>+                 PNAT_ACTION_INFO natInfo,
>>                  BOOLEAN commit,
>>                  UINT64 currentTime,
>>                  BOOLEAN *entryCreated)
>>@@ -186,6 +212,7 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>>     POVS_CT_ENTRY entry = NULL;
>>     *entryCreated = FALSE;
>>     UINT32 state = 0;
>>+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>>     switch (ipProto)
>>     {
>>         case IPPROTO_TCP:
>>@@ -213,12 +240,9 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>>                 if (parentEntry != NULL) {
>>                     entry->parent = parentEntry;
>>                 }
>>-                OvsCtAddEntry(entry, ctx, currentTime);
>>                 *entryCreated = TRUE;
>
>[Sai]: Did you address this in a different patch? Can you consolidate
>the initializing of *entryCreated with OvsCtAddEntry
>[Yin]: I still haven't figure out what this "entryCreated" is intended for. Can't we infer it from the return value of OvsCtEntryCreate?
>If return value is NULL, then entry is not created. Without knowing what this entryCreated means I really have no idea what value I should set it to.

[Sai]: The usage is very simple - if an entry was created as part of this call, then set this to True.
In other words, if OvsCtAddEntry(entry, ctx, natInfo, currentTime) is called, set this boolean.
This is used for triggering an Entry Created event. If you plan to consolidate the usage 
of OvsCtAddEntry, then set this to true wherever you call OvsCtAddEntry. If you want to refactor this code,
then please do so in a separate patch. If not, then just move the *entryCreated = TRUE; under 
OvsCtAddEntry(entry, ctx, natInfo, currentTime). I hope this clarifies your confusion. 


>
>>             }
>>-
>>-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>>-            return entry;
>>+            break;
>>         }
>>         case IPPROTO_ICMP:
>>         {
>>@@ -232,35 +256,31 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>>             state |= OVS_CS_F_NEW;
>>             if (commit) {
>>                 entry = OvsConntrackCreateIcmpEntry(currentTime);
>>-                if (!entry) {
>>-                    return NULL;
>>-                }
>>-                OvsCtAddEntry(entry, ctx, currentTime);
>>                 *entryCreated = TRUE;
>>             }
>>-
>>-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>>-            return entry;
>>+            break;
>>         }
>>         case IPPROTO_UDP:
>>         {
>>             state |= OVS_CS_F_NEW;
>>             if (commit) {
>>                 entry = OvsConntrackCreateOtherEntry(currentTime);
>>-                if (!entry) {
>>-                    return NULL;
>>-                }
>>-                OvsCtAddEntry(entry, ctx, currentTime);
>>                 *entryCreated = TRUE;
>>             }
>>-
>>-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>>-            return entry;
>>+            break;
>>         }
>>         default:
>>             goto invalid;
>>     }
>> 
>>+    if (commit && !entry) {
>>+        return NULL;
>>+    }
>>+    if (entry && !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
>>+        return NULL;
>>+    }
>>+    OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>>+    return entry;
>> invalid:
>>     state |= OVS_CS_F_INVALID;
>>     OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>>@@ -269,11 +289,11 @@ invalid:
>> 
>> static enum CT_UPDATE_RES
>> OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>>-                        PNET_BUFFER_LIST nbl,
>>-                        UINT8 ipProto,
>>-                        UINT32 l4Offset,
>>-                        BOOLEAN reply,
>>-                        UINT64 now)
>>+                 PNET_BUFFER_LIST nbl,
>>+                 UINT8 ipProto,
>>+                 UINT32 l4Offset,
>>+                 BOOLEAN reply,
>>+                 UINT64 now)
>> {
>>     switch (ipProto)
>>     {
>>@@ -299,6 +319,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>> static __inline VOID
>> OvsCtEntryDelete(POVS_CT_ENTRY entry)
>> {
>>+    if (entry == NULL) {
>>+        return;
>>+    }
>>+    if (entry->natInfo.natAction) {
>>+        OvsNatDeleteKey(&entry->key);
>>+    }
>>     OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
>>     RemoveEntryList(&entry->link);
>>     OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
>>@@ -308,10 +334,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry)
>> static __inline BOOLEAN
>> OvsCtEntryExpired(POVS_CT_ENTRY entry)
>> {
>>-    if (entry == NULL) {
>>-        return TRUE;
>>-    }
>>-
>>     UINT64 currentTime;
>>     NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
>>     return entry->expiration < currentTime;
>>@@ -346,7 +368,7 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
>>     return NDIS_STATUS_NOT_SUPPORTED;
>> }
>> 
>>-static __inline BOOLEAN
>>+BOOLEAN
>> OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
>> {
>>     return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
>>@@ -372,13 +394,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
>>     }
>> }
>> 
>>-static __inline POVS_CT_ENTRY
>>+POVS_CT_ENTRY
>> OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>> {
>>     PLIST_ENTRY link;
>>     POVS_CT_ENTRY entry;
>>     BOOLEAN reply = FALSE;
>>     POVS_CT_ENTRY found = NULL;
>>+    OVS_CT_KEY key = ctx->key;
>> 
>>     if (!ctTotalEntries) {
>>         return found;
>>@@ -387,13 +410,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>>     LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
>>         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
>> 
>>-        if (OvsCtKeyAreSame(ctx->key,entry->key)) {
>>+        if (OvsCtKeyAreSame(key,entry->key)) {
>>             found = entry;
>>             reply = FALSE;
>>             break;
>>         }
>> 
>>-        if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
>>+        /* Reverse NAT must be performed before OvsCtLookup, so here
>>+         * we simply need to flip the src and dst in key and compare
>>+         * they are equal. Note that flipped key is not equal to
>>+         * rev_key due to NAT effect.
>>+         */
>>+        OvsCtKeyReverse(&key);
>>+        if (OvsCtKeyAreSame(key, entry->key)) {
>>             found = entry;
>>             reply = TRUE;
>>             break;
>>@@ -412,17 +441,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>>     return found;
>> }
>> 
>>-static __inline UINT32
>>-OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx)
>>+UINT32
>>+OvsHashCtKey(const OVS_CT_KEY *key)
>> {
>>-    UINT32 hsrc, hdst,hash;
>>-    hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
>>-    hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
>>+    UINT32 hsrc, hdst, hash;
>>+    hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
>>+    hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
>>     hash = hsrc ^ hdst; /* TO identify reverse traffic */
>>-    return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
>>-                         ((uint32_t *) (&ctx->key + 1) -
>>-                         (uint32_t *) (&ctx->key.dst + 1)),
>>+    hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
>>+                         ((uint32_t *) (key + 1) -
>>+                         (uint32_t *) (&key->dst + 1)),
>>                          hash);
>>+    return hash;
>> }
>> 
>> static UINT8
>>@@ -453,6 +483,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
>>                     PNET_BUFFER_LIST curNbl,
>>                     UINT32 l4Offset)
>> {
>>+    const OVS_NAT_ENTRY *natEntry;
>>     ctx->key.zone = zone;
>>     ctx->key.dl_type = flowKey->l2.dlType;
>>     ctx->related = FALSE;
>>@@ -514,7 +545,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
>>         return NDIS_STATUS_INVALID_PACKET;
>>     }
>> 
>>-    ctx->hash = OvsExtractLookupCtxHash(ctx);
>>+    natEntry = OvsNatLookup(&ctx->key, TRUE);
>>+    if (natEntry) {
>>+        /* Translate address first for reverse NAT */
>>+        ctx->key = natEntry->ctEntry->key;
>>+        OvsCtKeyReverse(&ctx->key);
>>+    }
>>+
>>+    ctx->hash = OvsHashCtKey(&ctx->key);
>>     return NDIS_STATUS_SUCCESS;
>> }
>> 
>>@@ -532,17 +570,19 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
>>  *----------------------------------------------------------------------------
>>  */
>> static __inline POVS_CT_ENTRY
>>-OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
>>+OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>>                          UINT32 l4Offset,
>>                          OvsConntrackKeyLookupCtx *ctx,
>>                          OvsFlowKey *key,
>>                          UINT16 zone,
>>+                         NAT_ACTION_INFO *natInfo,
>>                          BOOLEAN commit,
>>                          UINT64 currentTime,
>>                          BOOLEAN *entryCreated)
>> {
>>     POVS_CT_ENTRY entry = ctx->entry;
>>     UINT32 state = 0;
>>+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>>     *entryCreated = FALSE;
>> 
>>     /* If an entry was found, update the state based on TCP flags */
>>@@ -569,8 +609,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
>>             //Delete and update the Conntrack
>>             OvsCtEntryDelete(ctx->entry);
>>             ctx->entry = NULL;
>>-            entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
>>-                                     ctx, key, commit, currentTime,
>>+            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
>>+                                     ctx, key, natInfo, commit, currentTime,
>>                                      entryCreated);
>>             if (!entry) {
>>                 return NULL;
>>@@ -637,7 +677,7 @@ OvsConntrackSetLabels(OvsFlowKey *key,
>> }
>> 
>> static __inline NDIS_STATUS
>>-OvsCtExecute_(PNET_BUFFER_LIST curNbl,
>>+OvsCtExecute_(OvsForwardingContext *fwdCtx,
>>               OvsFlowKey *key,
>>               OVS_PACKET_HDR_INFO *layers,
>>               BOOLEAN commit,
>>@@ -650,13 +690,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
>> {
>>     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>>     POVS_CT_ENTRY entry = NULL;
>>+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>>     OvsConntrackKeyLookupCtx ctx = { 0 };
>>     LOCK_STATE_EX lockState;
>>     UINT64 currentTime;
>>     NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
>> 
>>-    /* XXX: Not referenced for now */
>>-    UNREFERENCED_PARAMETER(natInfo);
>> 
>>     /* Retrieve the Conntrack Key related fields from packet */
>>     OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
>>@@ -675,18 +714,31 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
>> 
>>     if (!entry) {
>>         /* If no matching entry was found, create one and add New state */
>>-        entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
>>+        entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
>>                                  layers->l4Offset, &ctx,
>>-                                 key, commit, currentTime,
>>+                                 key, natInfo, commit, currentTime,
>>                                  &entryCreated);
>>     } else {
>>         /* Process the entry and update CT flags */
>>         OvsCtIncrementCounters(entry, ctx.reply, curNbl);
>>-        entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
>>-                                         zone, commit, currentTime,
>>+        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
>>+                                         zone, natInfo, commit, currentTime,
>>                                          &entryCreated);
>>     }
>> 
>>+    /*
>>+     * Note that natInfo is not the same as entry->natInfo here. natInfo
>>+     * is decided by action in the openflow rule, entry->natInfo is decided
>>+     * when the entry is created. In the reverse NAT case, natInfo is
>>+     * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
>>+     * NAT_ACTION_DST without NAT_ACTION_REVERSE
>>+     */
>>+    if (entry && natInfo->natAction != NAT_ACTION_NONE)
>>+    {
>>+        OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
>>+                     key, ctx.reply);
>>+    }
>>+
>>     if (entry && mark) {
>>         OvsConntrackSetMark(key, entry, mark->value, mark->mask);
>>     }
>>@@ -735,10 +787,8 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
>>     MD_LABELS *labels = NULL;
>>     PCHAR helper = NULL;
>>     NAT_ACTION_INFO natActionInfo;
>>-    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>>     OVS_PACKET_HDR_INFO *layers = &fwdCtx->layers;
>>     PNET_BUFFER_LIST newNbl = NULL;
>>-    NAT_ACTION_INFO natActionInfo;
>>     NDIS_STATUS status;
>> 
>>     memset(&natActionInfo, 0, sizeof natActionInfo);
>>@@ -775,12 +825,12 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
>>         BOOLEAN hasMaxIp = FALSE;
>>         BOOLEAN hasMaxPort = FALSE;
>>         NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
>>-            enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr);
>>-            switch(sub_type_nest) {
>>+            enum ovs_nat_attr subtype = NlAttrType(natAttr);
>>+            switch(subtype) {
>>             case OVS_NAT_ATTR_SRC:
>>             case OVS_NAT_ATTR_DST:
>>                 natActionInfo.natAction |=
>>-                    ((sub_type_nest == OVS_NAT_ATTR_SRC)
>>+                    ((subtype == OVS_NAT_ATTR_SRC)
>>                         ? NAT_ACTION_SRC : NAT_ACTION_DST);
>>                 break;
>>             case OVS_NAT_ATTR_IP_MIN:
>>@@ -844,19 +894,19 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
>>         commit = TRUE;
>>     }
>>     /* If newNbl is not allocated, use the current Nbl*/
>>-    status = OvsCtExecute_(newNbl != NULL ? newNbl : curNbl, key, layers,
>>+    status = OvsCtExecute_(fwdCtx, key, layers,
>>                            commit, force, zone, mark, labels, helper, &natActionInfo);
>>     return status;
>> }
>> 
>> /*
>>  *----------------------------------------------------------------------------
>>- * ovsConntrackEntryCleaner
>>+ * OvsConntrackEntryCleaner
>>  *     Runs periodically and cleans up the connection tracker
>>  *----------------------------------------------------------------------------
>>  */
>> VOID
>>-ovsConntrackEntryCleaner(PVOID data)
>>+OvsConntrackEntryCleaner(PVOID data)
>> {
>> 
>>     POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
>>@@ -873,15 +923,13 @@ ovsConntrackEntryCleaner(PVOID data)
>>         }
>> 
>>         /* Set the timeout for the thread and cleanup */
>>-        UINT64 currentTime, threadSleepTimeout;
>>-        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
>>-        threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
>>+        INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
>> 
>>         if (ctTotalEntries) {
>>             for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
>>                 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
>>                     entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
>>-                    if (entry->expiration < currentTime) {
>>+                    if (entry && OvsCtEntryExpired(entry)) {
>>                         OvsCtEntryDelete(entry);
>>                     }
>>                 }
>>@@ -921,6 +969,7 @@ OvsCtFlush(UINT16 zone)
>>         }
>>     }
>> 
>>+    OvsNatFlush(zone);
>>     NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
>>     return NDIS_STATUS_SUCCESS;
>> }
>>diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
>>index 1ad289f..fcdd96e 100644
>>--- a/datapath-windows/ovsext/Conntrack.h
>>+++ b/datapath-windows/ovsext/Conntrack.h
>>@@ -18,8 +18,9 @@
>> #define __OVS_CONNTRACK_H_ 1
>> 
>> #include "precomp.h"
>>-#include "Flow.h"
>>+#include "Actions.h"
>> #include "Debug.h"
>>+#include "Flow.h"
>> #include "Actions.h"
>> #include <stddef.h>
>> 
>>@@ -87,6 +88,14 @@ typedef struct _OVS_CT_KEY {
>>     UINT64 byteCount;
>> } OVS_CT_KEY, *POVS_CT_KEY;
>> 
>>+typedef struct _NAT_ACTION_INFO {
>>+    struct ct_addr minAddr;
>>+    struct ct_addr maxAddr;
>>+    uint16_t minPort;
>>+    uint16_t maxPort;
>>+    uint16_t natAction;
>>+} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
>>+
>> typedef struct OVS_CT_ENTRY {
>>     OVS_CT_KEY  key;
>>     OVS_CT_KEY  rev_key;
>>@@ -95,6 +104,7 @@ typedef struct OVS_CT_ENTRY {
>>     UINT32      mark;
>>     UINT64      timestampStart;
>>     struct ovs_key_ct_labels labels;
>>+    NAT_ACTION_INFO natInfo;
>>     PVOID       parent; /* Points to main connection */
>> } OVS_CT_ENTRY, *POVS_CT_ENTRY;
>> 
>>@@ -119,14 +129,6 @@ typedef struct OvsConntrackKeyLookupCtx {
>>     BOOLEAN         related;
>> } OvsConntrackKeyLookupCtx;
>> 
>>-typedef struct _NAT_ACTION_INFO {
>>-    struct ct_addr minAddr;
>>-    struct ct_addr maxAddr;
>>-    uint16_t minPort;
>>-    uint16_t maxPort;
>>-    uint16_t natAction;
>>-} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
>>-
>> #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
>> #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
>> #define CT_INTERVAL_SEC 10000000LL //1s
>>@@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
>>                            POVS_CT_ENTRY entry,
>>                            BOOLEAN request);
>> 
>>+UINT32 OvsHashCtKey(const OVS_CT_KEY *key);
>>+BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey);
>>+POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx);
>>+
>>+
>> #endif /* __OVS_CONNTRACK_H_ */
>>diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
>>index ecfc0b8..69dfb56 100644
>>--- a/datapath-windows/ovsext/ovsext.vcxproj
>>+++ b/datapath-windows/ovsext/ovsext.vcxproj
>>@@ -103,6 +103,7 @@
>>     <ClInclude Include="Actions.h" />
>>     <ClInclude Include="Atomic.h" />
>>     <ClInclude Include="BufferMgmt.h" />
>>+    <ClInclude Include="Conntrack-nat.h" />
>>     <ClInclude Include="Conntrack.h" />
>>     <ClInclude Include="Datapath.h" />
>>     <ClInclude Include="Debug.h" />
>>@@ -257,6 +258,7 @@
>>   <ItemGroup>
>>     <ClCompile Include="Actions.c" />
>>     <ClCompile Include="BufferMgmt.c" />
>>+    <ClCompile Include="Conntrack-nat.c" />
>>     <ClCompile Include="Conntrack-related.c" />
>>     <ClCompile Include="Conntrack-ftp.c" />
>>     <ClCompile Include="Conntrack-icmp.c" />
>>-- 
>>2.10.2.windows.1
>>
>>_______________________________________________
>>dev mailing list
>>dev at openvswitch.org
>>https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=Z6vowHUOjP5ysP_g372c49Nqc1vEKqHKNBkR5Q5Z7uo&m=krqLYW4fA8lSeZKyXbb9EgpQ_qbvwFut1mnnZnoMm8Y&s=a5LlvAOGUnCamnIwwef7xF1k_uEv6U-FF2yBxbc65Z8&e= 


More information about the dev mailing list