[ovs-dev] [PATCH 3/3 v2] datapath-windows: NAT integration with conntrack
Anand Kumar
kumaranand at vmware.com
Thu Mar 23 19:01:08 UTC 2017
Hi Yin,
Thank you for the patches. Overall the patch looks good except for a few minor issues.
Please find my comments inline prefixed with [AK]
Regards,
Anand Kumar
On 3/22/17, 3:12 PM, "ovs-dev-bounces at openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces at openvswitch.org on behalf of linyi at vmware.com> wrote:
Signed-off-by: Yin Lin <linyi at vmware.com>
---
datapath-windows/ovsext/Actions.c | 179 ++++++++++++++++++++-----------
datapath-windows/ovsext/Actions.h | 77 +++++++++++++
datapath-windows/ovsext/Conntrack-nat.c | 3 +-
datapath-windows/ovsext/Conntrack.c | 184 ++++++++++++++++++++------------
datapath-windows/ovsext/Conntrack.h | 25 +++--
datapath-windows/ovsext/ovsext.vcxproj | 2 +
6 files changed, 331 insertions(+), 139 deletions(-)
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index 46f84bc..45b3841 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -71,63 +71,6 @@ typedef struct _OVS_ACTION_STATS {
OVS_ACTION_STATS ovsActionStats;
/*
- * There a lot of data that needs to be maintained while executing the pipeline
- * as dictated by the actions of a flow, across different functions at different
- * levels. Such data is put together in a 'context' structure. Care should be
- * exercised while adding new members to the structure - only add ones that get
- * used across multiple stages in the pipeline/get used in multiple functions.
- */
-typedef struct OvsForwardingContext {
- POVS_SWITCH_CONTEXT switchContext;
- /* The NBL currently used in the pipeline. */
- PNET_BUFFER_LIST curNbl;
- /* NDIS forwarding detail for 'curNbl'. */
- PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
- /* Array of destination ports for 'curNbl'. */
- PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
- /* send flags while sending 'curNbl' into NDIS. */
- ULONG sendFlags;
- /* Total number of output ports, used + unused, in 'curNbl'. */
- UINT32 destPortsSizeIn;
- /* Total number of used output ports in 'curNbl'. */
- UINT32 destPortsSizeOut;
- /*
- * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
- * be freed/completed.
- */
- OvsCompletionList *completionList;
- /*
- * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
- * bridge. ie. during tunneling on the Rx side.
- */
- UINT32 srcVportNo;
-
- /*
- * Tunnel key:
- * - specified in actions during tunneling Tx
- * - extracted from an NBL during tunneling Rx
- */
- OvsIPv4TunnelKey tunKey;
-
- /*
- * Tunneling - Tx:
- * To store the output port, when it is a tunneled port. We don't foresee
- * multiple tunneled ports as outport for any given NBL.
- */
- POVS_VPORT_ENTRY tunnelTxNic;
-
- /*
- * Tunneling - Rx:
- * Points to the Internal port on the PIF Bridge, if the packet needs to be
- * de-tunneled.
- */
- POVS_VPORT_ENTRY tunnelRxNic;
-
- /* header information */
- OVS_PACKET_HDR_INFO layers;
-} OvsForwardingContext;
-
-/*
* --------------------------------------------------------------------------
* OvsInitForwardingCtx --
* Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
@@ -1388,7 +1331,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
* based on the specified key.
*----------------------------------------------------------------------------
*/
-static __inline NDIS_STATUS
+NDIS_STATUS
OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
const struct ovs_key_udp *udpAttr)
{
@@ -1435,7 +1378,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
* based on the specified key.
*----------------------------------------------------------------------------
*/
-static __inline NDIS_STATUS
+NDIS_STATUS
OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
const struct ovs_key_tcp *tcpAttr)
{
@@ -1474,11 +1417,125 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
/*
*----------------------------------------------------------------------------
[AK] – change function name to OvsUpdateAddressAndPort in the comments
* OvsUpdateIPv4Header --
+ * Updates the source/destination IP field of IPv4 header in
+ * ovsFwdCtx.curNbl inline based on the specified key.
+ *----------------------------------------------------------------------------
+ */
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+ UINT32 newAddr, UINT16 newPort,
+ BOOLEAN isSource, BOOLEAN isTx)
+{
+ PUINT8 bufferStart;
+ UINT32 hdrSize;
+ OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
+ IPHdr *ipHdr;
+ TCPHdr *tcpHdr = NULL;
+ UDPHdr *udpHdr = NULL;
+ UINT32 *addrField = NULL;
+ UINT16 *portField = NULL;
+ UINT16 *checkField = NULL;
+ BOOLEAN l4Offload = FALSE;
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+
+ ASSERT(layers->value != 0);
+
+ if (layers->isTcp || layers->isUdp) {
+ hdrSize = layers->l4Offset +
+ layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
+ } else {
+ hdrSize = layers->l3Offset + sizeof (*ipHdr);
+ }
+
+ bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize);
+ if (!bufferStart) {
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ ipHdr = (IPHdr *)(bufferStart + layers->l3Offset);
+
+ if (layers->isTcp) {
+ tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset);
+ } else if (layers->isUdp) {
+ udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset);
+ }
+
+ csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
+ TcpIpChecksumNetBufferListInfo);
+ /*
+ * Adjust the IP header inline as dictated by the action, and also update
+ * the IP and the TCP checksum for the data modified.
+ *
+ * In the future, this could be optimized to make one call to
+ * ChecksumUpdate32(). Ignoring this for now, since for the most common
+ * case, we only update the TTL.
+ */
+
+ if (isSource)
[AK] – Indentation opening flower bracket
+ {
+ addrField = &ipHdr->saddr;
+ if (tcpHdr) {
+ portField = &tcpHdr->source;
+ checkField = &tcpHdr->check;
+ if (isTx) {
+ l4Offload = (csumInfo.Transmit.TcpChecksum != 0);
+ }
+ }
+ else if (udpHdr) {
+ portField = &udpHdr->source;
+ checkField = &udpHdr->check;
+ if (isTx) {
+ l4Offload = (csumInfo.Transmit.UdpChecksum != 0);
+ }
+ }
+ } else {
+ addrField = &ipHdr->daddr;
+ if (tcpHdr) {
+ portField = &tcpHdr->dest;
+ checkField = &tcpHdr->check;
+ } else if (udpHdr) {
+ portField = &udpHdr->dest;
+ checkField = &udpHdr->check;
+ }
+ }
+ if (*addrField != newAddr) {
+ UINT32 oldAddr = *addrField;
+ if (checkField && *checkField != 0) {
+ if (l4Offload) {
+ /* Recompute IP pseudo checksum */
+ *checkField = ~(*checkField);
+ *checkField = ChecksumUpdate32(*checkField, oldAddr,
+ newAddr);
+ *checkField = ~(*checkField);
+ } else {
+ *checkField = ChecksumUpdate32(*checkField, oldAddr,
+ newAddr);
+ }
+ }
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
+ newAddr);
+ }
+ *addrField = newAddr;
+ }
+
+ if (portField && *portField != newPort) {
+ if (checkField && !l4Offload) {
+ *checkField = ChecksumUpdate16(*checkField, *portField,
+ newPort);
+ }
+ *portField = newPort;
+ }
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsUpdateIPv4Header --
* Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
* specified key.
*----------------------------------------------------------------------------
*/
-static __inline NDIS_STATUS
+NDIS_STATUS
OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
const struct ovs_key_ipv4 *ipAttr)
{
@@ -2032,7 +2089,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
}
}
- status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
+ status = OvsExecuteConntrackAction(&ovsFwdCtx, layers,
key, (const PNL_ATTR)a);
if (status != NDIS_STATUS_SUCCESS) {
OVS_LOG_ERROR("CT Action failed");
diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
index c56c260..85d9dcb 100644
--- a/datapath-windows/ovsext/Actions.h
+++ b/datapath-windows/ovsext/Actions.h
@@ -52,4 +52,81 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
UINT32 srcPortNo,
OVS_PACKET_HDR_INFO *layers);
+/*
+ * There a lot of data that needs to be maintained while executing the pipeline
+ * as dictated by the actions of a flow, across different functions at different
+ * levels. Such data is put together in a 'context' structure. Care should be
+ * exercised while adding new members to the structure - only add ones that get
+ * used across multiple stages in the pipeline/get used in multiple functions.
+ */
+typedef struct OvsForwardingContext {
+ POVS_SWITCH_CONTEXT switchContext;
+ /* The NBL currently used in the pipeline. */
+ PNET_BUFFER_LIST curNbl;
+ /* NDIS forwarding detail for 'curNbl'. */
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ /* Array of destination ports for 'curNbl'. */
+ PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
+ /* send flags while sending 'curNbl' into NDIS. */
+ ULONG sendFlags;
+ /* Total number of output ports, used + unused, in 'curNbl'. */
+ UINT32 destPortsSizeIn;
+ /* Total number of used output ports in 'curNbl'. */
+ UINT32 destPortsSizeOut;
+ /*
+ * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
+ * be freed/completed.
+ */
+ OvsCompletionList *completionList;
+ /*
+ * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
+ * bridge. ie. during tunneling on the Rx side.
+ */
+ UINT32 srcVportNo;
+
+ /*
+ * Tunnel key:
+ * - specified in actions during tunneling Tx
+ * - extracted from an NBL during tunneling Rx
+ */
+ OvsIPv4TunnelKey tunKey;
+
+ /*
+ * Tunneling - Tx:
+ * To store the output port, when it is a tunneled port. We don't foresee
+ * multiple tunneled ports as outport for any given NBL.
+ */
+ POVS_VPORT_ENTRY tunnelTxNic;
+
+ /*
+ * Tunneling - Rx:
+ * Points to the Internal port on the PIF Bridge, if the packet needs to be
+ * de-tunneled.
+ */
+ POVS_VPORT_ENTRY tunnelRxNic;
+
+ /* header information */
+ OVS_PACKET_HDR_INFO layers;
+} OvsForwardingContext;
+
+PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
+ UINT32 size);
+
+NDIS_STATUS
+OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
+ const struct ovs_key_udp *udpAttr);
+
+NDIS_STATUS
+OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
+ const struct ovs_key_tcp *tcpAttr);
+
+NDIS_STATUS
+OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+ const struct ovs_key_ipv4 *ipAttr);
+
+NDIS_STATUS
+OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
+ UINT32 newAddr, UINT16 newPort,
+ BOOLEAN isSource, BOOLEAN isTx);
+
#endif /* __ACTIONS_H_ */
diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
index 4930694..17b312c 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -197,7 +197,8 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
OvsUpdateAddressAndPort(ovsFwdCtx,
endpoint->addr.ipv4_aligned,
- endpoint->port, isSrcNat);
+ endpoint->port, isSrcNat,
+ !reverse);
if (isSrcNat) {
key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
} else {
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 924f260..167e13e 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -18,6 +18,7 @@
#include "Jhash.h"
#include "PacketParser.h"
#include "Event.h"
+#include "Conntrack-nat.h"
#pragma warning(push)
#pragma warning(disable:4311)
@@ -26,7 +27,7 @@
#define SEC_TO_UNIX_EPOCH 11644473600LL
#define SEC_TO_NANOSEC 1000000000LL
-KSTART_ROUTINE ovsConntrackEntryCleaner;
+KSTART_ROUTINE OvsConntrackEntryCleaner;
static PLIST_ENTRY ovsConntrackTable;
static OVS_CT_THREAD_CTX ctThreadCtx;
static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
@@ -71,7 +72,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
/* Init CT Cleaner Thread */
KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
- NULL, ovsConntrackEntryCleaner,
+ NULL, OvsConntrackEntryCleaner,
&ctThreadCtx);
if (status != STATUS_SUCCESS) {
@@ -88,6 +89,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
&ctThreadCtx.threadObject, NULL);
ZwClose(threadHandle);
threadHandle = NULL;
+
+ status = OvsNatInit(context);
+
+ if (status != STATUS_SUCCESS) {
+ OvsCleanupConntrack();
+ return status;
+ }
return STATUS_SUCCESS;
}
@@ -120,6 +128,7 @@ OvsCleanupConntrack(VOID)
NdisFreeRWLock(ovsConntrackLockObj);
ovsConntrackLockObj = NULL;
+ OvsNatCleanup();
}
static __inline VOID
@@ -159,25 +168,42 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
OvsPostCtEvent(&ctEventEntry);
}
-static __inline VOID
-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
+static __inline BOOLEAN
+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
+ PNAT_ACTION_INFO natInfo, UINT64 now)
{
- NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
- NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
+ NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
+ NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
OvsCtKeyReverse(&entry->rev_key);
+
+ // NatInfo is always initialized to be disabled, so that if NAT action
+ // fails, we will not end up deleting an non-existent NAT entry.
+ if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) {
+ entry->natInfo = *natInfo;
+ if (!OvsNatCtEntry(entry)) {
+ return FALSE;
+ }
[AK] – Doesn’t this override the values updated by OvsNatCtEntry(entry)?
+ entry->natInfo = *natInfo;
+ ctx->hash = OvsHashCtKey(&entry->key);
+ } else {
+ entry->natInfo.natAction = natInfo->natAction;
+ }
+
entry->timestampStart = now;
InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
&entry->link);
ctTotalEntries++;
+ return TRUE;
}
static __inline POVS_CT_ENTRY
-OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
+OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
UINT8 ipProto,
UINT32 l4Offset,
OvsConntrackKeyLookupCtx *ctx,
OvsFlowKey *key,
+ PNAT_ACTION_INFO natInfo,
BOOLEAN commit,
UINT64 currentTime,
BOOLEAN *entryCreated)
@@ -185,6 +211,7 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
POVS_CT_ENTRY entry = NULL;
*entryCreated = FALSE;
UINT32 state = 0;
+ PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
switch (ipProto)
{
case IPPROTO_TCP:
@@ -212,12 +239,9 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
if (parentEntry != NULL) {
entry->parent = parentEntry;
}
- OvsCtAddEntry(entry, ctx, currentTime);
*entryCreated = TRUE;
}
-
- OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
- return entry;
+ break;
}
case IPPROTO_ICMP:
{
@@ -231,35 +255,31 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
state |= OVS_CS_F_NEW;
if (commit) {
entry = OvsConntrackCreateIcmpEntry(currentTime);
- if (!entry) {
- return NULL;
- }
- OvsCtAddEntry(entry, ctx, currentTime);
*entryCreated = TRUE;
}
-
- OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
- return entry;
+ break;
}
case IPPROTO_UDP:
{
state |= OVS_CS_F_NEW;
if (commit) {
entry = OvsConntrackCreateOtherEntry(currentTime);
- if (!entry) {
- return NULL;
- }
- OvsCtAddEntry(entry, ctx, currentTime);
*entryCreated = TRUE;
}
-
- OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
- return entry;
+ break;
}
default:
goto invalid;
}
+ if (commit && !entry) {
+ return NULL;
+ }
+ if (entry && !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
+ return NULL;
+ }
+ OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
+ return entry;
invalid:
state |= OVS_CS_F_INVALID;
OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
@@ -268,11 +288,11 @@ invalid:
static enum CT_UPDATE_RES
OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
- PNET_BUFFER_LIST nbl,
- UINT8 ipProto,
- UINT32 l4Offset,
- BOOLEAN reply,
- UINT64 now)
+ PNET_BUFFER_LIST nbl,
+ UINT8 ipProto,
+ UINT32 l4Offset,
+ BOOLEAN reply,
+ UINT64 now)
{
switch (ipProto)
{
@@ -298,6 +318,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
static __inline VOID
OvsCtEntryDelete(POVS_CT_ENTRY entry)
{
+ if (entry == NULL) {
+ return;
+ }
+ if (entry->natInfo.natAction) {
+ OvsNatDeleteKey(&entry->key);
+ }
OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
RemoveEntryList(&entry->link);
OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
@@ -307,10 +333,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry)
static __inline BOOLEAN
OvsCtEntryExpired(POVS_CT_ENTRY entry)
{
- if (entry == NULL) {
- return TRUE;
- }
-
UINT64 currentTime;
NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
return entry->expiration < currentTime;
@@ -338,7 +360,7 @@ OvsDetectCtPacket(OvsFlowKey *key)
return NDIS_STATUS_NOT_SUPPORTED;
}
-static __inline BOOLEAN
+BOOLEAN
OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
{
return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
@@ -364,13 +386,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
}
}
-static __inline POVS_CT_ENTRY
+POVS_CT_ENTRY
OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
{
PLIST_ENTRY link;
POVS_CT_ENTRY entry;
BOOLEAN reply = FALSE;
POVS_CT_ENTRY found = NULL;
+ OVS_CT_KEY key = ctx->key;
if (!ctTotalEntries) {
return found;
@@ -379,13 +402,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
- if (OvsCtKeyAreSame(ctx->key,entry->key)) {
+ if (OvsCtKeyAreSame(key,entry->key)) {
found = entry;
reply = FALSE;
break;
}
- if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
+ /* Reverse NAT must be performed before OvsCtLookup, so here
+ * we simply need to flip the src and dst in key and compare
+ * they are equal. Note that flipped key is not equal to
+ * rev_key due to NAT effect. */
+ OvsCtKeyReverse(&key);
+ if (OvsCtKeyAreSame(key, entry->key)) {
found = entry;
reply = TRUE;
break;
@@ -404,17 +432,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
return found;
}
-static __inline UINT32
-OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx)
+UINT32
+OvsHashCtKey(const OVS_CT_KEY *key)
{
- UINT32 hsrc, hdst,hash;
- hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
- hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
+ UINT32 hsrc, hdst, hash;
+ hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
+ hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
hash = hsrc ^ hdst; /* TO identify reverse traffic */
- return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
- ((uint32_t *) (&ctx->key + 1) -
- (uint32_t *) (&ctx->key.dst + 1)),
+ hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
+ ((uint32_t *) (key + 1) -
+ (uint32_t *) (&key->dst + 1)),
hash);
+ return hash;
}
static UINT8
@@ -445,6 +474,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
PNET_BUFFER_LIST curNbl,
UINT32 l4Offset)
{
+ const OVS_NAT_ENTRY *natEntry;
ctx->key.zone = zone;
ctx->key.dl_type = flowKey->l2.dlType;
ctx->related = FALSE;
@@ -506,7 +536,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
return NDIS_STATUS_INVALID_PACKET;
}
- ctx->hash = OvsExtractLookupCtxHash(ctx);
+ natEntry = OvsNatLookup(&ctx->key, TRUE);
+ if (natEntry) {
+ // Translate address first for reverse NAT
+ ctx->key = natEntry->ctEntry->key;
+ OvsCtKeyReverse(&ctx->key);
+ }
+
+ ctx->hash = OvsHashCtKey(&ctx->key);
return NDIS_STATUS_SUCCESS;
}
@@ -524,17 +561,19 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
*----------------------------------------------------------------------------
*/
static __inline POVS_CT_ENTRY
-OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
+OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
UINT32 l4Offset,
OvsConntrackKeyLookupCtx *ctx,
OvsFlowKey *key,
UINT16 zone,
+ NAT_ACTION_INFO *natInfo,
BOOLEAN commit,
UINT64 currentTime,
BOOLEAN *entryCreated)
{
POVS_CT_ENTRY entry = ctx->entry;
UINT32 state = 0;
+ PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
*entryCreated = FALSE;
/* If an entry was found, update the state based on TCP flags */
@@ -561,8 +600,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
//Delete and update the Conntrack
OvsCtEntryDelete(ctx->entry);
ctx->entry = NULL;
- entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
- ctx, key, commit, currentTime,
+ entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
+ ctx, key, natInfo, commit, currentTime,
entryCreated);
if (!entry) {
return NULL;
@@ -629,7 +668,7 @@ OvsConntrackSetLabels(OvsFlowKey *key,
}
static __inline NDIS_STATUS
-OvsCtExecute_(PNET_BUFFER_LIST curNbl,
+OvsCtExecute_(OvsForwardingContext *fwdCtx,
OvsFlowKey *key,
OVS_PACKET_HDR_INFO *layers,
BOOLEAN commit,
@@ -641,13 +680,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
{
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
POVS_CT_ENTRY entry = NULL;
+ PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
OvsConntrackKeyLookupCtx ctx = { 0 };
LOCK_STATE_EX lockState;
UINT64 currentTime;
NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
- /* XXX: Not referenced for now */
- UNREFERENCED_PARAMETER(natInfo);
/* Retrieve the Conntrack Key related fields from packet */
OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
@@ -659,18 +697,29 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
BOOLEAN entryCreated = FALSE;
if (!entry) {
/* If no matching entry was found, create one and add New state */
- entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
+ entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
layers->l4Offset, &ctx,
- key, commit, currentTime,
+ key, natInfo, commit, currentTime,
&entryCreated);
} else {
/* Process the entry and update CT flags */
OvsCtIncrementCounters(entry, ctx.reply, curNbl);
- entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
- zone, commit, currentTime,
+ entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
+ zone, natInfo, commit, currentTime,
&entryCreated);
}
+ /* Note that natInfo is not the same as entry->natInfo here. natInfo
+ is decided by action in the openflow rule, entry->natInfo is decided
+ when the entry is created. In the reverse NAT case, natInfo is
+ NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
+ NAT_ACTION_DST without NAT_ACTION_REVERSE */
+ if (entry && natInfo->natAction != NAT_ACTION_NONE)
+ {
+ OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
+ key, ctx.reply);
+ }
+
if (entry && mark) {
OvsConntrackSetMark(key, entry, mark->value, mark->mask);
}
@@ -706,7 +755,7 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
*---------------------------------------------------------------------------
*/
NDIS_STATUS
-OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
OVS_PACKET_HDR_INFO *layers,
OvsFlowKey *key,
const PNL_ATTR a)
@@ -753,12 +802,12 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
BOOLEAN hasMaxIp = FALSE;
BOOLEAN hasMaxPort = FALSE;
NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
- enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr);
- switch(sub_type_nest) {
+ enum ovs_nat_attr subtype = NlAttrType(natAttr);
+ switch(subtype) {
case OVS_NAT_ATTR_SRC:
case OVS_NAT_ATTR_DST:
natActionInfo.natAction |=
- ((sub_type_nest == OVS_NAT_ATTR_SRC)
+ ((subtype == OVS_NAT_ATTR_SRC)
? NAT_ACTION_SRC : NAT_ACTION_DST);
break;
case OVS_NAT_ATTR_IP_MIN:
@@ -816,19 +865,19 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
}
}
- status = OvsCtExecute_(curNbl, key, layers,
+ status = OvsCtExecute_(fwdCtx, key, layers,
commit, zone, mark, labels, helper, &natActionInfo);
return status;
}
/*
*----------------------------------------------------------------------------
- * ovsConntrackEntryCleaner
+ * OvsConntrackEntryCleaner
* Runs periodically and cleans up the connection tracker
*----------------------------------------------------------------------------
*/
VOID
-ovsConntrackEntryCleaner(PVOID data)
+OvsConntrackEntryCleaner(PVOID data)
{
POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
@@ -845,15 +894,13 @@ ovsConntrackEntryCleaner(PVOID data)
}
/* Set the timeout for the thread and cleanup */
- UINT64 currentTime, threadSleepTimeout;
- NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
- threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
+ INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
if (ctTotalEntries) {
for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
- if (entry->expiration < currentTime) {
+ if (entry && OvsCtEntryExpired(entry)) {
OvsCtEntryDelete(entry);
}
}
@@ -894,6 +941,7 @@ OvsCtFlush(UINT16 zone)
}
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+ OvsNatFlush(zone);
return NDIS_STATUS_SUCCESS;
}
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 875c434..584c8ff 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -20,6 +20,7 @@
#include "precomp.h"
#include "Flow.h"
#include "Debug.h"
+#include "Actions.h"
#include <stddef.h>
#ifdef OVS_DBG_MOD
@@ -86,6 +87,14 @@ typedef struct _OVS_CT_KEY {
UINT64 byteCount;
} OVS_CT_KEY, *POVS_CT_KEY;
+typedef struct _NAT_ACTION_INFO {
+ struct ct_addr minAddr;
+ struct ct_addr maxAddr;
+ uint16_t minPort;
+ uint16_t maxPort;
+ uint16_t natAction;
+} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
+
typedef struct OVS_CT_ENTRY {
OVS_CT_KEY key;
OVS_CT_KEY rev_key;
@@ -94,6 +103,7 @@ typedef struct OVS_CT_ENTRY {
UINT32 mark;
UINT64 timestampStart;
struct ovs_key_ct_labels labels;
+ NAT_ACTION_INFO natInfo;
PVOID parent; /* Points to main connection */
} OVS_CT_ENTRY, *POVS_CT_ENTRY;
@@ -118,14 +128,6 @@ typedef struct OvsConntrackKeyLookupCtx {
BOOLEAN related;
} OvsConntrackKeyLookupCtx;
-typedef struct _NAT_ACTION_INFO {
- struct ct_addr minAddr;
- struct ct_addr maxAddr;
- uint16_t minPort;
- uint16_t maxPort;
- uint16_t natAction;
-} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
-
#define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
#define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
#define CT_INTERVAL_SEC 10000000LL //1s
@@ -172,7 +174,7 @@ OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
VOID OvsCleanupConntrack(VOID);
NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
-NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
OVS_PACKET_HDR_INFO *layers,
OvsFlowKey *key,
const PNL_ATTR a);
@@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
POVS_CT_ENTRY entry,
BOOLEAN request);
+UINT32 OvsHashCtKey(const OVS_CT_KEY *key);
+BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey);
+POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx);
+
+
#endif /* __OVS_CONNTRACK_H_ */
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 44aea19..75754fb 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -103,6 +103,7 @@
<ClInclude Include="Actions.h" />
<ClInclude Include="Atomic.h" />
<ClInclude Include="BufferMgmt.h" />
+ <ClInclude Include="Conntrack-nat.h" />
<ClInclude Include="Conntrack.h" />
<ClInclude Include="Datapath.h" />
<ClInclude Include="Debug.h" />
@@ -256,6 +257,7 @@
<ItemGroup>
<ClCompile Include="Actions.c" />
<ClCompile Include="BufferMgmt.c" />
+ <ClCompile Include="Conntrack-nat.c" />
<ClCompile Include="Conntrack-related.c" />
<ClCompile Include="Conntrack-ftp.c" />
<ClCompile Include="Conntrack-icmp.c" />
--
2.10.2.windows.1
More information about the dev
mailing list