[ovs-dev] [PATCH v6 3/4] datapath-windows: NAT integration with conntrack

Yin Lin linyi at vmware.com
Mon May 8 22:38:44 UTC 2017


Signed-off-by: Yin Lin <linyi at vmware.com>
---
 datapath-windows/automake.mk           |   4 +-
 datapath-windows/ovsext/Actions.c      | 118 ++++++++++++++++++++-
 datapath-windows/ovsext/Actions.h      |  20 ++++
 datapath-windows/ovsext/Conntrack.c    | 187 +++++++++++++++++++++------------
 datapath-windows/ovsext/Conntrack.h    |  25 +++--
 datapath-windows/ovsext/ovsext.vcxproj |   2 +
 6 files changed, 273 insertions(+), 83 deletions(-)

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 7177630..f1632cc 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -16,9 +16,9 @@ EXTRA_DIST += \
 	datapath-windows/ovsext/Conntrack-icmp.c \
 	datapath-windows/ovsext/Conntrack-other.c \
 	datapath-windows/ovsext/Conntrack-related.c \
-    datapath-windows/ovsext/Conntrack-nat.c \
+	datapath-windows/ovsext/Conntrack-nat.c \
 	datapath-windows/ovsext/Conntrack-tcp.c \
-    datapath-windows/ovsext/Conntrack-nat.h \
+	datapath-windows/ovsext/Conntrack-nat.h \
 	datapath-windows/ovsext/Conntrack.c \
 	datapath-windows/ovsext/Conntrack.h \
 	datapath-windows/ovsext/Datapath.c \
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index e2eae9a..d1938f3 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -1380,7 +1380,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
  *      based on the specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
                   const struct ovs_key_udp *udpAttr)
 {
@@ -1427,7 +1427,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
  *      based on the specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
                   const struct ovs_key_tcp *tcpAttr)
 {
@@ -1465,12 +1465,124 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
 
 /*
  *----------------------------------------------------------------------------
+ * OvsUpdateAddressAndPort --
+ *      Updates the source/destination IP and port fields in
+ *      ovsFwdCtx.curNbl inline based on the specified key.
+ *----------------------------------------------------------------------------
+ */
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+                        UINT32 newAddr, UINT16 newPort,
+                        BOOLEAN isSource, BOOLEAN isTx)
+{
+    PUINT8 bufferStart;
+    UINT32 hdrSize;
+    OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
+    IPHdr *ipHdr;
+    TCPHdr *tcpHdr = NULL;
+    UDPHdr *udpHdr = NULL;
+    UINT32 *addrField = NULL;
+    UINT16 *portField = NULL;
+    UINT16 *checkField = NULL;
+    BOOLEAN l4Offload = FALSE;
+    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+
+    ASSERT(layers->value != 0);
+
+    if (layers->isTcp || layers->isUdp) {
+        hdrSize = layers->l4Offset +
+                  layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
+    } else {
+        hdrSize = layers->l3Offset + sizeof (*ipHdr);
+    }
+
+    bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize);
+    if (!bufferStart) {
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    ipHdr = (IPHdr *)(bufferStart + layers->l3Offset);
+
+    if (layers->isTcp) {
+        tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset);
+    } else if (layers->isUdp) {
+        udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset);
+    }
+
+    csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
+                                          TcpIpChecksumNetBufferListInfo);
+    /*
+     * Adjust the IP header inline as dictated by the action, and also update
+     * the IP and the TCP checksum for the data modified.
+     *
+     * In the future, this could be optimized to make one call to
+     * ChecksumUpdate32(). Ignoring this for now, since for the most common
+     * case, we only update the TTL.
+     */
+
+    if (isSource) {
+        addrField = &ipHdr->saddr;
+        if (tcpHdr) {
+            portField = &tcpHdr->source;
+            checkField = &tcpHdr->check;
+            l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.TcpChecksum :
+                        ((BOOLEAN)csumInfo.Receive.TcpChecksumSucceeded ||
+                         (BOOLEAN)csumInfo.Receive.TcpChecksumFailed);
+        } else if (udpHdr) {
+            portField = &udpHdr->source;
+            checkField = &udpHdr->check;
+            l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.UdpChecksum :
+                        ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded ||
+                         (BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
+        }
+    } else {
+        addrField = &ipHdr->daddr;
+        if (tcpHdr) {
+            portField = &tcpHdr->dest;
+            checkField = &tcpHdr->check;
+        } else if (udpHdr) {
+            portField = &udpHdr->dest;
+            checkField = &udpHdr->check;
+        }
+    }
+    if (*addrField != newAddr) {
+        UINT32 oldAddr = *addrField;
+        if (checkField && *checkField != 0) {
+            if (l4Offload) {
+                /* Recompute IP pseudo checksum */
+                *checkField = ~(*checkField);
+                *checkField = ChecksumUpdate32(*checkField, oldAddr,
+                                               newAddr);
+                *checkField = ~(*checkField);
+            } else {
+                *checkField = ChecksumUpdate32(*checkField, oldAddr,
+                                               newAddr);
+            }
+        }
+        if (ipHdr->check != 0) {
+            ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
+                                            newAddr);
+        }
+        *addrField = newAddr;
+    }
+
+    if (portField && *portField != newPort) {
+        if (checkField && !l4Offload) {
+            *checkField = ChecksumUpdate16(*checkField, *portField,
+                                           newPort);
+        }
+        *portField = newPort;
+    }
+    return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
  * OvsUpdateIPv4Header --
  *      Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
  *      specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
                     const struct ovs_key_ipv4 *ipAttr)
 {
diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
index 1ce6c20..fd050d5 100644
--- a/datapath-windows/ovsext/Actions.h
+++ b/datapath-windows/ovsext/Actions.h
@@ -110,4 +110,24 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
             UINT32 srcPortNo,
             OVS_PACKET_HDR_INFO *layers);
 
+PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
+                          UINT32 size);
+
+NDIS_STATUS
+OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
+                  const struct ovs_key_udp *udpAttr);
+
+NDIS_STATUS
+OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
+                  const struct ovs_key_tcp *tcpAttr);
+
+NDIS_STATUS
+OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+                    const struct ovs_key_ipv4 *ipAttr);
+
+NDIS_STATUS
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+                        UINT32 newAddr, UINT16 newPort,
+                        BOOLEAN isSource, BOOLEAN isTx);
+
 #endif /* __ACTIONS_H_ */
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 9824368..6e95f47 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -19,6 +19,7 @@
 #include "Jhash.h"
 #include "PacketParser.h"
 #include "Event.h"
+#include "Conntrack-nat.h"
 
 #pragma warning(push)
 #pragma warning(disable:4311)
@@ -27,7 +28,7 @@
 #define SEC_TO_UNIX_EPOCH 11644473600LL
 #define SEC_TO_NANOSEC 1000000000LL
 
-KSTART_ROUTINE ovsConntrackEntryCleaner;
+KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
@@ -72,7 +73,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
     /* Init CT Cleaner Thread */
     KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
     status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
-                                  NULL, ovsConntrackEntryCleaner,
+                                  NULL, OvsConntrackEntryCleaner,
                                   &ctThreadCtx);
 
     if (status != STATUS_SUCCESS) {
@@ -89,6 +90,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
                               &ctThreadCtx.threadObject, NULL);
     ZwClose(threadHandle);
     threadHandle = NULL;
+
+    status = OvsNatInit();
+
+    if (status != STATUS_SUCCESS) {
+        OvsCleanupConntrack();
+        return status;
+    }
     return STATUS_SUCCESS;
 }
 
@@ -121,6 +129,7 @@ OvsCleanupConntrack(VOID)
 
     NdisFreeRWLock(ovsConntrackLockObj);
     ovsConntrackLockObj = NULL;
+    OvsNatCleanup();
 }
 
 static __inline VOID
@@ -160,25 +169,42 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
     OvsPostCtEvent(&ctEventEntry);
 }
 
-static __inline VOID
-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
+static __inline BOOLEAN
+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
+              PNAT_ACTION_INFO natInfo, UINT64 now)
 {
-    NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
-    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
+    NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
+    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
     OvsCtKeyReverse(&entry->rev_key);
+
+    /* NatInfo is always initialized to be disabled, so that if NAT action
+     * fails, we will not end up deleting an non-existent NAT entry.
+     */
+    if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) {
+        entry->natInfo = *natInfo;
+        if (!OvsNatCtEntry(entry)) {
+            return FALSE;
+        }
+        ctx->hash = OvsHashCtKey(&entry->key);
+    } else {
+        entry->natInfo.natAction = natInfo->natAction;
+    }
+
     entry->timestampStart = now;
     InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
                    &entry->link);
 
     ctTotalEntries++;
+    return TRUE;
 }
 
 static __inline POVS_CT_ENTRY
-OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
+OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
                  UINT8 ipProto,
                  UINT32 l4Offset,
                  OvsConntrackKeyLookupCtx *ctx,
                  OvsFlowKey *key,
+                 PNAT_ACTION_INFO natInfo,
                  BOOLEAN commit,
                  UINT64 currentTime,
                  BOOLEAN *entryCreated)
@@ -186,6 +212,7 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
     POVS_CT_ENTRY entry = NULL;
     *entryCreated = FALSE;
     UINT32 state = 0;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     switch (ipProto)
     {
         case IPPROTO_TCP:
@@ -213,12 +240,9 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
                 if (parentEntry != NULL) {
                     entry->parent = parentEntry;
                 }
-                OvsCtAddEntry(entry, ctx, currentTime);
                 *entryCreated = TRUE;
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         case IPPROTO_ICMP:
         {
@@ -232,35 +256,31 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateIcmpEntry(currentTime);
-                if (!entry) {
-                    return NULL;
-                }
-                OvsCtAddEntry(entry, ctx, currentTime);
                 *entryCreated = TRUE;
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         case IPPROTO_UDP:
         {
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateOtherEntry(currentTime);
-                if (!entry) {
-                    return NULL;
-                }
-                OvsCtAddEntry(entry, ctx, currentTime);
                 *entryCreated = TRUE;
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         default:
             goto invalid;
     }
 
+    if (commit && !entry) {
+        return NULL;
+    }
+    if (entry && !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
+        return NULL;
+    }
+    OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
+    return entry;
 invalid:
     state |= OVS_CS_F_INVALID;
     OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
@@ -269,11 +289,11 @@ invalid:
 
 static enum CT_UPDATE_RES
 OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
-                        PNET_BUFFER_LIST nbl,
-                        UINT8 ipProto,
-                        UINT32 l4Offset,
-                        BOOLEAN reply,
-                        UINT64 now)
+                 PNET_BUFFER_LIST nbl,
+                 UINT8 ipProto,
+                 UINT32 l4Offset,
+                 BOOLEAN reply,
+                 UINT64 now)
 {
     switch (ipProto)
     {
@@ -299,6 +319,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
 static __inline VOID
 OvsCtEntryDelete(POVS_CT_ENTRY entry)
 {
+    if (entry == NULL) {
+        return;
+    }
+    if (entry->natInfo.natAction) {
+        OvsNatDeleteKey(&entry->key);
+    }
     OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
     RemoveEntryList(&entry->link);
     OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
@@ -308,10 +334,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry)
 static __inline BOOLEAN
 OvsCtEntryExpired(POVS_CT_ENTRY entry)
 {
-    if (entry == NULL) {
-        return TRUE;
-    }
-
     UINT64 currentTime;
     NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
     return entry->expiration < currentTime;
@@ -346,7 +368,7 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
     return NDIS_STATUS_NOT_SUPPORTED;
 }
 
-static __inline BOOLEAN
+BOOLEAN
 OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
 {
     return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
@@ -372,13 +394,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
     }
 }
 
-static __inline POVS_CT_ENTRY
+POVS_CT_ENTRY
 OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
 {
     PLIST_ENTRY link;
     POVS_CT_ENTRY entry;
     BOOLEAN reply = FALSE;
     POVS_CT_ENTRY found = NULL;
+    OVS_CT_KEY key = ctx->key;
 
     if (!ctTotalEntries) {
         return found;
@@ -387,13 +410,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
 
-        if (OvsCtKeyAreSame(ctx->key,entry->key)) {
+        if (OvsCtKeyAreSame(key,entry->key)) {
             found = entry;
             reply = FALSE;
             break;
         }
 
-        if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
+        /* Reverse NAT must be performed before OvsCtLookup, so here
+         * we simply need to flip the src and dst in key and compare
+         * they are equal. Note that flipped key is not equal to
+         * rev_key due to NAT effect.
+         */
+        OvsCtKeyReverse(&key);
+        if (OvsCtKeyAreSame(key, entry->key)) {
             found = entry;
             reply = TRUE;
             break;
@@ -412,17 +441,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     return found;
 }
 
-static __inline UINT32
-OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx)
+UINT32
+OvsHashCtKey(const OVS_CT_KEY *key)
 {
-    UINT32 hsrc, hdst,hash;
-    hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
-    hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
+    UINT32 hsrc, hdst, hash;
+    hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
+    hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
     hash = hsrc ^ hdst; /* TO identify reverse traffic */
-    return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
-                         ((uint32_t *) (&ctx->key + 1) -
-                         (uint32_t *) (&ctx->key.dst + 1)),
+    hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
+                         ((uint32_t *) (key + 1) -
+                         (uint32_t *) (&key->dst + 1)),
                          hash);
+    return hash;
 }
 
 static UINT8
@@ -453,6 +483,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
                     PNET_BUFFER_LIST curNbl,
                     UINT32 l4Offset)
 {
+    const OVS_NAT_ENTRY *natEntry;
     ctx->key.zone = zone;
     ctx->key.dl_type = flowKey->l2.dlType;
     ctx->related = FALSE;
@@ -514,7 +545,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
         return NDIS_STATUS_INVALID_PACKET;
     }
 
-    ctx->hash = OvsExtractLookupCtxHash(ctx);
+    natEntry = OvsNatLookup(&ctx->key, TRUE);
+    if (natEntry) {
+        /* Translate address first for reverse NAT */
+        ctx->key = natEntry->ctEntry->key;
+        OvsCtKeyReverse(&ctx->key);
+    }
+
+    ctx->hash = OvsHashCtKey(&ctx->key);
     return NDIS_STATUS_SUCCESS;
 }
 
@@ -532,17 +570,19 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
  *----------------------------------------------------------------------------
  */
 static __inline POVS_CT_ENTRY
-OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
+OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
                          UINT32 l4Offset,
                          OvsConntrackKeyLookupCtx *ctx,
                          OvsFlowKey *key,
                          UINT16 zone,
+                         NAT_ACTION_INFO *natInfo,
                          BOOLEAN commit,
                          UINT64 currentTime,
                          BOOLEAN *entryCreated)
 {
     POVS_CT_ENTRY entry = ctx->entry;
     UINT32 state = 0;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     *entryCreated = FALSE;
 
     /* If an entry was found, update the state based on TCP flags */
@@ -569,8 +609,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
             //Delete and update the Conntrack
             OvsCtEntryDelete(ctx->entry);
             ctx->entry = NULL;
-            entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
-                                     ctx, key, commit, currentTime,
+            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
+                                     ctx, key, natInfo, commit, currentTime,
                                      entryCreated);
             if (!entry) {
                 return NULL;
@@ -637,7 +677,7 @@ OvsConntrackSetLabels(OvsFlowKey *key,
 }
 
 static __inline NDIS_STATUS
-OvsCtExecute_(PNET_BUFFER_LIST curNbl,
+OvsCtExecute_(OvsForwardingContext *fwdCtx,
               OvsFlowKey *key,
               OVS_PACKET_HDR_INFO *layers,
               BOOLEAN commit,
@@ -650,13 +690,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
     POVS_CT_ENTRY entry = NULL;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     OvsConntrackKeyLookupCtx ctx = { 0 };
     LOCK_STATE_EX lockState;
     UINT64 currentTime;
     NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
 
-    /* XXX: Not referenced for now */
-    UNREFERENCED_PARAMETER(natInfo);
 
     /* Retrieve the Conntrack Key related fields from packet */
     OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
@@ -675,18 +714,31 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
 
     if (!entry) {
         /* If no matching entry was found, create one and add New state */
-        entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
+        entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
                                  layers->l4Offset, &ctx,
-                                 key, commit, currentTime,
+                                 key, natInfo, commit, currentTime,
                                  &entryCreated);
     } else {
         /* Process the entry and update CT flags */
         OvsCtIncrementCounters(entry, ctx.reply, curNbl);
-        entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
-                                         zone, commit, currentTime,
+        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
+                                         zone, natInfo, commit, currentTime,
                                          &entryCreated);
     }
 
+    /*
+     * Note that natInfo is not the same as entry->natInfo here. natInfo
+     * is decided by action in the openflow rule, entry->natInfo is decided
+     * when the entry is created. In the reverse NAT case, natInfo is
+     * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
+     * NAT_ACTION_DST without NAT_ACTION_REVERSE
+     */
+    if (entry && natInfo->natAction != NAT_ACTION_NONE)
+    {
+        OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
+                     key, ctx.reply);
+    }
+
     if (entry && mark) {
         OvsConntrackSetMark(key, entry, mark->value, mark->mask);
     }
@@ -735,10 +787,8 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
     MD_LABELS *labels = NULL;
     PCHAR helper = NULL;
     NAT_ACTION_INFO natActionInfo;
-    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     OVS_PACKET_HDR_INFO *layers = &fwdCtx->layers;
     PNET_BUFFER_LIST newNbl = NULL;
-    NAT_ACTION_INFO natActionInfo;
     NDIS_STATUS status;
 
     memset(&natActionInfo, 0, sizeof natActionInfo);
@@ -775,12 +825,12 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
         BOOLEAN hasMaxIp = FALSE;
         BOOLEAN hasMaxPort = FALSE;
         NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
-            enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr);
-            switch(sub_type_nest) {
+            enum ovs_nat_attr subtype = NlAttrType(natAttr);
+            switch(subtype) {
             case OVS_NAT_ATTR_SRC:
             case OVS_NAT_ATTR_DST:
                 natActionInfo.natAction |=
-                    ((sub_type_nest == OVS_NAT_ATTR_SRC)
+                    ((subtype == OVS_NAT_ATTR_SRC)
                         ? NAT_ACTION_SRC : NAT_ACTION_DST);
                 break;
             case OVS_NAT_ATTR_IP_MIN:
@@ -844,19 +894,19 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
         commit = TRUE;
     }
     /* If newNbl is not allocated, use the current Nbl*/
-    status = OvsCtExecute_(newNbl != NULL ? newNbl : curNbl, key, layers,
+    status = OvsCtExecute_(fwdCtx, key, layers,
                            commit, force, zone, mark, labels, helper, &natActionInfo);
     return status;
 }
 
 /*
  *----------------------------------------------------------------------------
- * ovsConntrackEntryCleaner
+ * OvsConntrackEntryCleaner
  *     Runs periodically and cleans up the connection tracker
  *----------------------------------------------------------------------------
  */
 VOID
-ovsConntrackEntryCleaner(PVOID data)
+OvsConntrackEntryCleaner(PVOID data)
 {
 
     POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
@@ -873,15 +923,13 @@ ovsConntrackEntryCleaner(PVOID data)
         }
 
         /* Set the timeout for the thread and cleanup */
-        UINT64 currentTime, threadSleepTimeout;
-        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
-        threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
+        INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
 
         if (ctTotalEntries) {
             for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
                 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
                     entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
-                    if (entry->expiration < currentTime) {
+                    if (entry && OvsCtEntryExpired(entry)) {
                         OvsCtEntryDelete(entry);
                     }
                 }
@@ -921,6 +969,7 @@ OvsCtFlush(UINT16 zone)
         }
     }
 
+    OvsNatFlush(zone);
     NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
     return NDIS_STATUS_SUCCESS;
 }
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 1ad289f..fcdd96e 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -18,8 +18,9 @@
 #define __OVS_CONNTRACK_H_ 1
 
 #include "precomp.h"
-#include "Flow.h"
+#include "Actions.h"
 #include "Debug.h"
+#include "Flow.h"
 #include "Actions.h"
 #include <stddef.h>
 
@@ -87,6 +88,14 @@ typedef struct _OVS_CT_KEY {
     UINT64 byteCount;
 } OVS_CT_KEY, *POVS_CT_KEY;
 
+typedef struct _NAT_ACTION_INFO {
+    struct ct_addr minAddr;
+    struct ct_addr maxAddr;
+    uint16_t minPort;
+    uint16_t maxPort;
+    uint16_t natAction;
+} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
+
 typedef struct OVS_CT_ENTRY {
     OVS_CT_KEY  key;
     OVS_CT_KEY  rev_key;
@@ -95,6 +104,7 @@ typedef struct OVS_CT_ENTRY {
     UINT32      mark;
     UINT64      timestampStart;
     struct ovs_key_ct_labels labels;
+    NAT_ACTION_INFO natInfo;
     PVOID       parent; /* Points to main connection */
 } OVS_CT_ENTRY, *POVS_CT_ENTRY;
 
@@ -119,14 +129,6 @@ typedef struct OvsConntrackKeyLookupCtx {
     BOOLEAN         related;
 } OvsConntrackKeyLookupCtx;
 
-typedef struct _NAT_ACTION_INFO {
-    struct ct_addr minAddr;
-    struct ct_addr maxAddr;
-    uint16_t minPort;
-    uint16_t maxPort;
-    uint16_t natAction;
-} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
-
 #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
 #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
 #define CT_INTERVAL_SEC 10000000LL //1s
@@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
                            POVS_CT_ENTRY entry,
                            BOOLEAN request);
 
+UINT32 OvsHashCtKey(const OVS_CT_KEY *key);
+BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey);
+POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx);
+
+
 #endif /* __OVS_CONNTRACK_H_ */
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index ecfc0b8..69dfb56 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -103,6 +103,7 @@
     <ClInclude Include="Actions.h" />
     <ClInclude Include="Atomic.h" />
     <ClInclude Include="BufferMgmt.h" />
+    <ClInclude Include="Conntrack-nat.h" />
     <ClInclude Include="Conntrack.h" />
     <ClInclude Include="Datapath.h" />
     <ClInclude Include="Debug.h" />
@@ -257,6 +258,7 @@
   <ItemGroup>
     <ClCompile Include="Actions.c" />
     <ClCompile Include="BufferMgmt.c" />
+    <ClCompile Include="Conntrack-nat.c" />
     <ClCompile Include="Conntrack-related.c" />
     <ClCompile Include="Conntrack-ftp.c" />
     <ClCompile Include="Conntrack-icmp.c" />
-- 
2.10.2.windows.1



More information about the dev mailing list