[ovs-dev] [PATCH 3/3] datapath-windows: NAT integration with conntrack

Yin Lin linyi at vmware.com
Mon Feb 27 22:14:11 UTC 2017


Signed-off-by: Yin Lin <linyi at vmware.com>
---
 datapath-windows/ovsext/Actions.c      | 158 ++++++++++++++++++-----------
 datapath-windows/ovsext/Actions.h      |  77 ++++++++++++++
 datapath-windows/ovsext/Conntrack.c    | 178 ++++++++++++++++++++-------------
 datapath-windows/ovsext/Conntrack.h    |  25 +++--
 datapath-windows/ovsext/ovsext.vcxproj |   2 +
 5 files changed, 303 insertions(+), 137 deletions(-)

diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index 46f84bc..9f4a22d 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -71,63 +71,6 @@ typedef struct _OVS_ACTION_STATS {
 OVS_ACTION_STATS ovsActionStats;
 
 /*
- * There a lot of data that needs to be maintained while executing the pipeline
- * as dictated by the actions of a flow, across different functions at different
- * levels. Such data is put together in a 'context' structure. Care should be
- * exercised while adding new members to the structure - only add ones that get
- * used across multiple stages in the pipeline/get used in multiple functions.
- */
-typedef struct OvsForwardingContext {
-    POVS_SWITCH_CONTEXT switchContext;
-    /* The NBL currently used in the pipeline. */
-    PNET_BUFFER_LIST curNbl;
-    /* NDIS forwarding detail for 'curNbl'. */
-    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
-    /* Array of destination ports for 'curNbl'. */
-    PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
-    /* send flags while sending 'curNbl' into NDIS. */
-    ULONG sendFlags;
-    /* Total number of output ports, used + unused, in 'curNbl'. */
-    UINT32 destPortsSizeIn;
-    /* Total number of used output ports in 'curNbl'. */
-    UINT32 destPortsSizeOut;
-    /*
-     * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
-     * be freed/completed.
-     */
-    OvsCompletionList *completionList;
-    /*
-     * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
-     * bridge. ie. during tunneling on the Rx side.
-     */
-    UINT32 srcVportNo;
-
-    /*
-     * Tunnel key:
-     * - specified in actions during tunneling Tx
-     * - extracted from an NBL during tunneling Rx
-     */
-    OvsIPv4TunnelKey tunKey;
-
-    /*
-     * Tunneling - Tx:
-     * To store the output port, when it is a tunneled port. We don't foresee
-     * multiple tunneled ports as outport for any given NBL.
-     */
-    POVS_VPORT_ENTRY tunnelTxNic;
-
-    /*
-     * Tunneling - Rx:
-     * Points to the Internal port on the PIF Bridge, if the packet needs to be
-     * de-tunneled.
-     */
-    POVS_VPORT_ENTRY tunnelRxNic;
-
-    /* header information */
-    OVS_PACKET_HDR_INFO layers;
-} OvsForwardingContext;
-
-/*
  * --------------------------------------------------------------------------
  * OvsInitForwardingCtx --
  *     Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
@@ -1388,7 +1331,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
  *      based on the specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
                   const struct ovs_key_udp *udpAttr)
 {
@@ -1435,7 +1378,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
  *      based on the specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
                   const struct ovs_key_tcp *tcpAttr)
 {
@@ -1474,11 +1417,104 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
 /*
  *----------------------------------------------------------------------------
  * OvsUpdateIPv4Header --
+ *      Updates the source/destination IP field of IPv4 header in
+ *      ovsFwdCtx.curNbl inline based on the specified key.
+ *----------------------------------------------------------------------------
+ */
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+                        UINT32 newAddr, UINT16 newPort,
+                        BOOLEAN isSource)
+{
+    PUINT8 bufferStart;
+    UINT32 hdrSize;
+    OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
+    IPHdr *ipHdr;
+    TCPHdr *tcpHdr = NULL;
+    UDPHdr *udpHdr = NULL;
+    UINT32 *addrField = NULL;
+    UINT16 *portField = NULL;
+    UINT16 *checkField = NULL;
+
+    ASSERT(layers->value != 0);
+
+    if (layers->isTcp || layers->isUdp) {
+        hdrSize = layers->l4Offset +
+                  layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
+    } else {
+        hdrSize = layers->l3Offset + sizeof (*ipHdr);
+    }
+
+    bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize);
+    if (!bufferStart) {
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    ipHdr = (IPHdr *)(bufferStart + layers->l3Offset);
+
+    if (layers->isTcp) {
+        tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset);
+    } else if (layers->isUdp) {
+        udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset);
+    }
+
+    /*
+     * Adjust the IP header inline as dictated by the action, and also update
+     * the IP and the TCP checksum for the data modified.
+     *
+     * In the future, this could be optimized to make one call to
+     * ChecksumUpdate32(). Ignoring this for now, since for the most common
+     * case, we only update the TTL.
+     */
+    if (isSource)
+    {
+        addrField = &ipHdr->saddr;
+        if (tcpHdr) {
+            portField = &tcpHdr->source;
+        }
+        else if (udpHdr) {
+            portField = &udpHdr->source;
+        }
+    } else {
+        addrField = &ipHdr->daddr;
+        if (tcpHdr) {
+            portField = &tcpHdr->dest;
+            checkField = &tcpHdr->check;
+        } else if (udpHdr) {
+            portField = &udpHdr->dest;
+            checkField = &udpHdr->check;
+        }
+    }
+
+    if (*addrField != newAddr) {
+        if (checkField && *checkField != 0) {
+            *checkField = ChecksumUpdate32(*checkField, *addrField,
+                                           newAddr);
+        }
+        if (ipHdr->check != 0) {
+            ipHdr->check = ChecksumUpdate32(ipHdr->check, *addrField,
+                                            newAddr);
+        }
+        *addrField = newAddr;
+    }
+
+    if (portField && *portField != newPort) {
+        if (checkField && *checkField != 0) {
+            *checkField = ChecksumUpdate16(*checkField, *portField,
+                                           newPort);
+            *portField = newPort;
+        }
+    }
+    return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsUpdateIPv4Header --
  *      Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
  *      specified key.
  *----------------------------------------------------------------------------
  */
-static __inline NDIS_STATUS
+NDIS_STATUS
 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
                     const struct ovs_key_ipv4 *ipAttr)
 {
@@ -2032,7 +2068,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
                 }
             }
 
-            status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
+            status = OvsExecuteConntrackAction(&ovsFwdCtx, layers,
                                                key, (const PNL_ATTR)a);
             if (status != NDIS_STATUS_SUCCESS) {
                 OVS_LOG_ERROR("CT Action failed");
diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
index c56c260..75bb8db 100644
--- a/datapath-windows/ovsext/Actions.h
+++ b/datapath-windows/ovsext/Actions.h
@@ -52,4 +52,81 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
             UINT32 srcPortNo,
             OVS_PACKET_HDR_INFO *layers);
 
+/*
+ * There a lot of data that needs to be maintained while executing the pipeline
+ * as dictated by the actions of a flow, across different functions at different
+ * levels. Such data is put together in a 'context' structure. Care should be
+ * exercised while adding new members to the structure - only add ones that get
+ * used across multiple stages in the pipeline/get used in multiple functions.
+ */
+typedef struct OvsForwardingContext {
+    POVS_SWITCH_CONTEXT switchContext;
+    /* The NBL currently used in the pipeline. */
+    PNET_BUFFER_LIST curNbl;
+    /* NDIS forwarding detail for 'curNbl'. */
+    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+    /* Array of destination ports for 'curNbl'. */
+    PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
+    /* send flags while sending 'curNbl' into NDIS. */
+    ULONG sendFlags;
+    /* Total number of output ports, used + unused, in 'curNbl'. */
+    UINT32 destPortsSizeIn;
+    /* Total number of used output ports in 'curNbl'. */
+    UINT32 destPortsSizeOut;
+    /*
+     * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
+     * be freed/completed.
+     */
+    OvsCompletionList *completionList;
+    /*
+     * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
+     * bridge. ie. during tunneling on the Rx side.
+     */
+    UINT32 srcVportNo;
+
+    /*
+     * Tunnel key:
+     * - specified in actions during tunneling Tx
+     * - extracted from an NBL during tunneling Rx
+     */
+    OvsIPv4TunnelKey tunKey;
+
+    /*
+     * Tunneling - Tx:
+     * To store the output port, when it is a tunneled port. We don't foresee
+     * multiple tunneled ports as outport for any given NBL.
+     */
+    POVS_VPORT_ENTRY tunnelTxNic;
+
+    /*
+     * Tunneling - Rx:
+     * Points to the Internal port on the PIF Bridge, if the packet needs to be
+     * de-tunneled.
+     */
+    POVS_VPORT_ENTRY tunnelRxNic;
+
+    /* header information */
+    OVS_PACKET_HDR_INFO layers;
+} OvsForwardingContext;
+
+PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
+                          UINT32 size);
+
+NDIS_STATUS
+OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
+                  const struct ovs_key_udp *udpAttr);
+
+NDIS_STATUS
+OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
+                  const struct ovs_key_tcp *tcpAttr);
+
+NDIS_STATUS
+OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+                    const struct ovs_key_ipv4 *ipAttr);
+
+NDIS_STATUS
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+                        UINT32 newAddr, UINT16 newPort,
+                        BOOLEAN isSource);
+
 #endif /* __ACTIONS_H_ */
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 8a87dce..b22a99a 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -18,6 +18,7 @@
 #include "Jhash.h"
 #include "PacketParser.h"
 #include "Event.h"
+#include "Conntrack-nat.h"
 
 #pragma warning(push)
 #pragma warning(disable:4311)
@@ -26,7 +27,7 @@
 #define SEC_TO_UNIX_EPOCH 11644473600LL
 #define SEC_TO_NANOSEC 1000000000LL
 
-KSTART_ROUTINE ovsConntrackEntryCleaner;
+KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
@@ -71,7 +72,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
     /* Init CT Cleaner Thread */
     KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
     status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
-                                  NULL, ovsConntrackEntryCleaner,
+                                  NULL, OvsConntrackEntryCleaner,
                                   &ctThreadCtx);
 
     if (status != STATUS_SUCCESS) {
@@ -88,6 +89,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
                               &ctThreadCtx.threadObject, NULL);
     ZwClose(threadHandle);
     threadHandle = NULL;
+
+    status = OvsNatInit(context);
+
+    if (status != STATUS_SUCCESS) {
+        OvsCleanupConntrack();
+        return status;
+    }
     return STATUS_SUCCESS;
 }
 
@@ -120,6 +128,7 @@ OvsCleanupConntrack(VOID)
 
     NdisFreeRWLock(ovsConntrackLockObj);
     ovsConntrackLockObj = NULL;
+    OvsNatCleanup();
 }
 
 static __inline VOID
@@ -159,30 +168,48 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
     OvsPostCtEvent(&ctEventEntry);
 }
 
-static __inline VOID
-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
+static __inline BOOLEAN
+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
+              PNAT_ACTION_INFO natInfo, UINT64 now)
 {
     NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
-    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
+    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
     OvsCtKeyReverse(&entry->rev_key);
+
+    // NatInfo is always initialized to be disabled, so that if NAT action
+    // fails, we will not end up deleting an non-existent NAT entry.
+    if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) {
+        entry->natInfo = *natInfo;
+        if (!OvsNatCtEntry(entry)) {
+            return FALSE;
+        }
+        entry->natInfo = *natInfo;
+        ctx->hash = OvsHashCtKey(&entry->key);
+    } else {
+        entry->natInfo.natAction = natInfo->natAction;
+    }
+
     entry->timestampStart = now;
     InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
                    &entry->link);
     OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);
     ctTotalEntries++;
+    return TRUE;
 }
 
 static __inline POVS_CT_ENTRY
-OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
+OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
                  UINT8 ipProto,
                  UINT32 l4Offset,
                  OvsConntrackKeyLookupCtx *ctx,
                  OvsFlowKey *key,
+                 PNAT_ACTION_INFO natInfo,
                  BOOLEAN commit,
                  UINT64 currentTime)
 {
     POVS_CT_ENTRY entry = NULL;
     UINT32 state = 0;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     switch (ipProto)
     {
         case IPPROTO_TCP:
@@ -210,11 +237,8 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
                 if (parentEntry != NULL) {
                     entry->parent = parentEntry;
                 }
-                OvsCtAddEntry(entry, ctx, currentTime);
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         case IPPROTO_ICMP:
         {
@@ -228,33 +252,25 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateIcmpEntry(currentTime);
-                if (!entry) {
-                    return NULL;
-                }
-                OvsCtAddEntry(entry, ctx, currentTime);
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         case IPPROTO_UDP:
         {
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateOtherEntry(currentTime);
-                if (!entry) {
-                    return NULL;
-                }
-                OvsCtAddEntry(entry, ctx, currentTime);
             }
-
-            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
-            return entry;
+            break;
         }
         default:
             goto invalid;
     }
 
+    if (!entry || !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
+        return NULL;
+    }
+    OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
 invalid:
     state |= OVS_CS_F_INVALID;
     OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
@@ -263,11 +279,11 @@ invalid:
 
 static enum CT_UPDATE_RES
 OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
-                        PNET_BUFFER_LIST nbl,
-                        UINT8 ipProto,
-                        UINT32 l4Offset,
-                        BOOLEAN reply,
-                        UINT64 now)
+                 PNET_BUFFER_LIST nbl,
+                 UINT8 ipProto,
+                 UINT32 l4Offset,
+                 BOOLEAN reply,
+                 UINT64 now)
 {
     switch (ipProto)
     {
@@ -293,6 +309,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
 static __inline VOID
 OvsCtEntryDelete(POVS_CT_ENTRY entry)
 {
+    if (entry == NULL) {
+        return;
+    }
+    if (entry->natInfo.natAction) {
+        OvsNatDeleteKey(&entry->key);
+    }
     OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
     RemoveEntryList(&entry->link);
     OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
@@ -302,10 +324,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry)
 static __inline BOOLEAN
 OvsCtEntryExpired(POVS_CT_ENTRY entry)
 {
-    if (entry == NULL) {
-        return TRUE;
-    }
-
     UINT64 currentTime;
     NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
     return entry->expiration < currentTime;
@@ -333,7 +351,7 @@ OvsDetectCtPacket(OvsFlowKey *key)
     return NDIS_STATUS_NOT_SUPPORTED;
 }
 
-static __inline BOOLEAN
+BOOLEAN
 OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
 {
     return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
@@ -359,13 +377,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
     }
 }
 
-static __inline POVS_CT_ENTRY
+POVS_CT_ENTRY
 OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
 {
     PLIST_ENTRY link;
     POVS_CT_ENTRY entry;
     BOOLEAN reply = FALSE;
     POVS_CT_ENTRY found = NULL;
+    OVS_CT_KEY key = ctx->key;
 
     if (!ctTotalEntries) {
         return found;
@@ -374,13 +393,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
 
-        if (OvsCtKeyAreSame(ctx->key,entry->key)) {
+        if (OvsCtKeyAreSame(key,entry->key)) {
             found = entry;
             reply = FALSE;
             break;
         }
 
-        if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
+        /* Reverse NAT must be performed before OvsCtLookup, so here
+         * we simply need to flip the src and dst in key and compare
+         * they are equal. Note that flipped key is not equal to
+         * rev_key due to NAT effect. */
+        OvsCtKeyReverse(&key);
+        if (OvsCtKeyAreSame(key, entry->key)) {
             found = entry;
             reply = TRUE;
             break;
@@ -399,17 +423,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     return found;
 }
 
-static __inline UINT32
-OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx)
+UINT32
+OvsHashCtKey(const OVS_CT_KEY *key)
 {
-    UINT32 hsrc, hdst,hash;
-    hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
-    hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
+    UINT32 hsrc, hdst, hash;
+    hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
+    hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
     hash = hsrc ^ hdst; /* TO identify reverse traffic */
-    return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
-                         ((uint32_t *) (&ctx->key + 1) -
-                         (uint32_t *) (&ctx->key.dst + 1)),
+    hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
+                         ((uint32_t *) (key + 1) -
+                         (uint32_t *) (&key->dst + 1)),
                          hash);
+    return hash;
 }
 
 static UINT8
@@ -440,6 +465,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
                     PNET_BUFFER_LIST curNbl,
                     UINT32 l4Offset)
 {
+    const OVS_NAT_ENTRY *natEntry;
     ctx->key.zone = zone;
     ctx->key.dl_type = flowKey->l2.dlType;
     ctx->related = FALSE;
@@ -501,7 +527,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
         return NDIS_STATUS_INVALID_PACKET;
     }
 
-    ctx->hash = OvsExtractLookupCtxHash(ctx);
+    natEntry = OvsNatLookup(&ctx->key, TRUE);
+    if (natEntry) {
+        // Translate address first for reverse NAT
+        ctx->key = natEntry->ctEntry->key;
+        OvsCtKeyReverse(&ctx->key);
+    }
+
+    ctx->hash = OvsHashCtKey(&ctx->key);
     return NDIS_STATUS_SUCCESS;
 }
 
@@ -519,16 +552,18 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
  *----------------------------------------------------------------------------
  */
 static __inline POVS_CT_ENTRY
-OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
+OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
                          UINT32 l4Offset,
                          OvsConntrackKeyLookupCtx *ctx,
                          OvsFlowKey *key,
                          UINT16 zone,
+                         NAT_ACTION_INFO *natInfo,
                          BOOLEAN commit,
                          UINT64 currentTime)
 {
     POVS_CT_ENTRY entry = ctx->entry;
     UINT32 state = 0;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
 
     /* If an entry was found, update the state based on TCP flags */
     if (ctx->related) {
@@ -554,8 +589,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
             //Delete and update the Conntrack
             OvsCtEntryDelete(ctx->entry);
             ctx->entry = NULL;
-            entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
-                                     ctx, key, commit, currentTime);
+            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
+                                     ctx, key, natInfo, commit, currentTime);
             if (!entry) {
                 return NULL;
             }
@@ -621,7 +656,7 @@ OvsConntrackSetLabels(OvsFlowKey *key,
 }
 
 static __inline NDIS_STATUS
-OvsCtExecute_(PNET_BUFFER_LIST curNbl,
+OvsCtExecute_(OvsForwardingContext *fwdCtx,
               OvsFlowKey *key,
               OVS_PACKET_HDR_INFO *layers,
               BOOLEAN commit,
@@ -633,13 +668,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
     POVS_CT_ENTRY entry = NULL;
+    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     OvsConntrackKeyLookupCtx ctx = { 0 };
     LOCK_STATE_EX lockState;
     UINT64 currentTime;
     NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
 
-    /* XXX: Not referenced for now */
-    UNREFERENCED_PARAMETER(natInfo);
 
     /* Retrieve the Conntrack Key related fields from packet */
     OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
@@ -651,14 +685,25 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
 
     if (!entry) {
         /* If no matching entry was found, create one and add New state */
-        entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
+        entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
                                  layers->l4Offset, &ctx,
-                                 key, commit, currentTime);
+                                 key, natInfo, commit, currentTime);
     } else {
         /* Process the entry and update CT flags */
         OvsCtIncrementCounters(entry, ctx.reply, curNbl);
-        entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
-                                         zone, commit, currentTime);
+        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
+                                         zone, natInfo, commit, currentTime);
+    }
+
+    /* Note that natInfo is not the same as entry->natInfo here. natInfo
+       is decided by action in the openflow rule, entry->natInfo is decided
+       when the entry is created. In the reverse NAT case, natInfo is
+       NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
+       NAT_ACTION_DST without NAT_ACTION_REVERSE */
+    if (entry && natInfo->natAction != NAT_ACTION_NONE)
+    {
+        OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
+                     key, ctx.reply);
     }
 
     if (entry && mark) {
@@ -692,7 +737,7 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
  *---------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
                           OVS_PACKET_HDR_INFO *layers,
                           OvsFlowKey *key,
                           const PNL_ATTR a)
@@ -739,12 +784,12 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
         BOOLEAN hasMaxIp = FALSE;
         BOOLEAN hasMaxPort = FALSE;
         NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
-            enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr);
-            switch(sub_type_nest) {
+            enum ovs_nat_attr subtype = NlAttrType(natAttr);
+            switch(subtype) {
             case OVS_NAT_ATTR_SRC:
             case OVS_NAT_ATTR_DST:
                 natActionInfo.natAction |=
-                    ((sub_type_nest == OVS_NAT_ATTR_SRC)
+                    ((subtype == OVS_NAT_ATTR_SRC)
                         ? NAT_ACTION_SRC : NAT_ACTION_DST);
                 break;
             case OVS_NAT_ATTR_IP_MIN:
@@ -802,19 +847,19 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
         }
     }
 
-    status = OvsCtExecute_(curNbl, key, layers,
+    status = OvsCtExecute_(fwdCtx, key, layers,
                            commit, zone, mark, labels, helper, &natActionInfo);
     return status;
 }
 
 /*
  *----------------------------------------------------------------------------
- * ovsConntrackEntryCleaner
+ * OvsConntrackEntryCleaner
  *     Runs periodically and cleans up the connection tracker
  *----------------------------------------------------------------------------
  */
 VOID
-ovsConntrackEntryCleaner(PVOID data)
+OvsConntrackEntryCleaner(PVOID data)
 {
 
     POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
@@ -831,15 +876,13 @@ ovsConntrackEntryCleaner(PVOID data)
         }
 
         /* Set the timeout for the thread and cleanup */
-        UINT64 currentTime, threadSleepTimeout;
-        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
-        threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
+        INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
 
         if (ctTotalEntries) {
             for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
                 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
                     entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
-                    if (entry->expiration < currentTime) {
+                    if (entry && OvsCtEntryExpired(entry)) {
                         OvsCtEntryDelete(entry);
                     }
                 }
@@ -880,6 +923,7 @@ OvsCtFlush(UINT16 zone)
     }
 
     NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+    OvsNatFlush(zone);
     return NDIS_STATUS_SUCCESS;
 }
 
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 875c434..584c8ff 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -20,6 +20,7 @@
 #include "precomp.h"
 #include "Flow.h"
 #include "Debug.h"
+#include "Actions.h"
 #include <stddef.h>
 
 #ifdef OVS_DBG_MOD
@@ -86,6 +87,14 @@ typedef struct _OVS_CT_KEY {
     UINT64 byteCount;
 } OVS_CT_KEY, *POVS_CT_KEY;
 
+typedef struct _NAT_ACTION_INFO {
+    struct ct_addr minAddr;
+    struct ct_addr maxAddr;
+    uint16_t minPort;
+    uint16_t maxPort;
+    uint16_t natAction;
+} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
+
 typedef struct OVS_CT_ENTRY {
     OVS_CT_KEY  key;
     OVS_CT_KEY  rev_key;
@@ -94,6 +103,7 @@ typedef struct OVS_CT_ENTRY {
     UINT32      mark;
     UINT64      timestampStart;
     struct ovs_key_ct_labels labels;
+    NAT_ACTION_INFO natInfo;
     PVOID       parent; /* Points to main connection */
 } OVS_CT_ENTRY, *POVS_CT_ENTRY;
 
@@ -118,14 +128,6 @@ typedef struct OvsConntrackKeyLookupCtx {
     BOOLEAN         related;
 } OvsConntrackKeyLookupCtx;
 
-typedef struct _NAT_ACTION_INFO {
-    struct ct_addr minAddr;
-    struct ct_addr maxAddr;
-    uint16_t minPort;
-    uint16_t maxPort;
-    uint16_t natAction;
-} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
-
 #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
 #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
 #define CT_INTERVAL_SEC 10000000LL //1s
@@ -172,7 +174,7 @@ OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
 VOID OvsCleanupConntrack(VOID);
 NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
 
-NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
                                       OVS_PACKET_HDR_INFO *layers,
                                       OvsFlowKey *key,
                                       const PNL_ATTR a);
@@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
                            POVS_CT_ENTRY entry,
                            BOOLEAN request);
 
+UINT32 OvsHashCtKey(const OVS_CT_KEY *key);
+BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey);
+POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx);
+
+
 #endif /* __OVS_CONNTRACK_H_ */
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 44aea19..75754fb 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -103,6 +103,7 @@
     <ClInclude Include="Actions.h" />
     <ClInclude Include="Atomic.h" />
     <ClInclude Include="BufferMgmt.h" />
+    <ClInclude Include="Conntrack-nat.h" />
     <ClInclude Include="Conntrack.h" />
     <ClInclude Include="Datapath.h" />
     <ClInclude Include="Debug.h" />
@@ -256,6 +257,7 @@
   <ItemGroup>
     <ClCompile Include="Actions.c" />
     <ClCompile Include="BufferMgmt.c" />
+    <ClCompile Include="Conntrack-nat.c" />
     <ClCompile Include="Conntrack-related.c" />
     <ClCompile Include="Conntrack-ftp.c" />
     <ClCompile Include="Conntrack-icmp.c" />
-- 
2.8.0.windows.1



More information about the dev mailing list