[ovs-dev] [PATCH v7 2/4] datapath-windows: Add NAT module in conntrack

Yin Lin linyi at vmware.com
Wed May 17 00:11:16 UTC 2017


Thanks Sai for the review! I fixed most of them and explained the remaining ones in the inline comments.

-----Original Message-----
From: Sairam Venugopal 
Sent: Tuesday, May 16, 2017 4:50 PM
To: Yin Lin <linyi at vmware.com>; dev at openvswitch.org
Subject: Re: [ovs-dev] [PATCH v7 2/4] datapath-windows: Add NAT module in conntrack

Hi Yin,

Thanks for the patch. Please find my comments inline.

Thanks,
Sairam




On 5/9/17, 3:59 PM, "ovs-dev-bounces at openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces at openvswitch.org on behalf of linyi at vmware.com> wrote:

>Signed-off-by: Yin Lin <linyi at vmware.com>
>---
> datapath-windows/automake.mk            |   2 +
> datapath-windows/ovsext/Conntrack-nat.c | 424 ++++++++++++++++++++++++++++++++
> datapath-windows/ovsext/Conntrack-nat.h |  39 +++
> 3 files changed, 465 insertions(+)
> create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
> create mode 100644 datapath-windows/ovsext/Conntrack-nat.h
>
>diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
>index 4f7b55a..7177630 100644
>--- a/datapath-windows/automake.mk
>+++ b/datapath-windows/automake.mk
>@@ -16,7 +16,9 @@ EXTRA_DIST += \
> 	datapath-windows/ovsext/Conntrack-icmp.c \
> 	datapath-windows/ovsext/Conntrack-other.c \
> 	datapath-windows/ovsext/Conntrack-related.c \
>+    datapath-windows/ovsext/Conntrack-nat.c \
> 	datapath-windows/ovsext/Conntrack-tcp.c \
>+    datapath-windows/ovsext/Conntrack-nat.h \
> 	datapath-windows/ovsext/Conntrack.c \
> 	datapath-windows/ovsext/Conntrack.h \
> 	datapath-windows/ovsext/Datapath.c \
>diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
>new file mode 100644
>index 0000000..edf5f8f
>--- /dev/null
>+++ b/datapath-windows/ovsext/Conntrack-nat.c
>@@ -0,0 +1,424 @@
>+#include "Conntrack-nat.h"
>+#include "Jhash.h"
>+
>+PLIST_ENTRY ovsNatTable = NULL;
>+PLIST_ENTRY ovsUnNatTable = NULL;
>+
>+/*
>+ *---------------------------------------------------------------------------
>+ * OvsHashNatKey
>+ *     Hash NAT related fields in a Conntrack key.
>+ *---------------------------------------------------------------------------
>+ */
>+static __inline UINT32
>+OvsHashNatKey(const OVS_CT_KEY *key)
>+{
>+    UINT32 hash = 0;
>+#define HASH_ADD(field) \
>+    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
>+
>+    HASH_ADD(src.addr.ipv4_aligned);
>+    HASH_ADD(dst.addr.ipv4_aligned);
>+    HASH_ADD(src.port);
>+    HASH_ADD(dst.port);
>+    HASH_ADD(zone);
>+#undef HASH_ADD
>+    return hash;
>+}
>+
>+/*
>+ *---------------------------------------------------------------------------
>+ * OvsNatKeyAreSame
>+ *     Compare NAT related fields in a Conntrack key.
>+ *---------------------------------------------------------------------------
>+ */
>+static __inline BOOLEAN
>+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
>+{
>+    // XXX: Compare IPv6 key as well
>+#define FIELD_COMPARE(field) \

[Sai]: Nit: move return statement to next line. 
[Yin]: This is a MACRO and I don't want to spread it over lines. (Note that I don't even add a semicolon at the end to make it look like a function)

>+    if (key1->field != key2->field) return FALSE
>+
>+    FIELD_COMPARE(src.addr.ipv4_aligned);
>+    FIELD_COMPARE(dst.addr.ipv4_aligned);
>+    FIELD_COMPARE(src.port);
>+    FIELD_COMPARE(dst.port);
>+    FIELD_COMPARE(zone);
>+    return TRUE;
>+#undef FIELD_COMPARE
>+}
>+
>+/*
>+ *---------------------------------------------------------------------------

[Sai]: Nit - OvsNatGetBucket
[Yin]: Fixed

>+ * OvsNaGetBucket
>+ *     Returns the row of NAT table that has the same hash as the given NAT
>+ *     hash key. If isReverse is TRUE, returns the row of reverse NAT table
>+ *     instead.
>+ *---------------------------------------------------------------------------
>+ */
>+static __inline PLIST_ENTRY
>+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
>+{
>+    uint32_t hash = OvsHashNatKey(key);
>+    if (isReverse) {
>+        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
>+    } else {
>+        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
>+    }
>+}
>+
>+/*
>+ *---------------------------------------------------------------------------
>+ * OvsNatInit
>+ *     Initialize NAT related resources.
>+ *---------------------------------------------------------------------------
>+ */
>+NTSTATUS OvsNatInit()
>+{
>+    ASSERT(ovsNatTable == NULL);
>+
>+    /* Init the Hash Buffer */
>+    ovsNatTable = OvsAllocateMemoryWithTag(
>+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
>+        OVS_CT_POOL_TAG);
>+    if (ovsNatTable == NULL) {
>+        goto failNoMem;
>+    }
>+
>+    ovsUnNatTable = OvsAllocateMemoryWithTag(
>+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
>+        OVS_CT_POOL_TAG);
>+    if (ovsUnNatTable == NULL) {
>+        goto freeNatTable;
>+    }
>+
>+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
>+        InitializeListHead(&ovsNatTable[i]);
>+        InitializeListHead(&ovsUnNatTable[i]);
>+    }
>+    return STATUS_SUCCESS;
>+
>+freeNatTable:
>+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
>+failNoMem:
>+    return STATUS_INSUFFICIENT_RESOURCES;
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatFlush
>+ *     Flushes out all NAT entries that match the given zone.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID OvsNatFlush(UINT16 zone)
>+{
>+    PLIST_ENTRY link, next;
>+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
>+        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
>+            POVS_NAT_ENTRY entry =
>+                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
>+            /* zone is a non-zero value */
>+            if (!zone || zone == entry->key.zone) {
>+                OvsNatDeleteEntry(entry);
>+            }
>+        }
>+    }
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatCleanup
>+ *     Releases all NAT related resources.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID OvsNatCleanup()
>+{

[Sai]: Nit: move return statement to next line. 
[Yin] Fixed and also added brackets around it according to coding standard.


>+    if (ovsNatTable == NULL) return;
>+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
>+    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
>+    ovsNatTable = NULL;
>+    ovsUnNatTable = NULL;
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatPacket
>+ *     Performs NAT operation on the packet by replacing the source/destinaton
>+ *     address/port based on natAction. If reverse is TRUE, perform unNAT
>+ *     instead.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID
>+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>+             const OVS_CT_ENTRY *entry,
>+             UINT16 natAction,
>+             OvsFlowKey *key,
>+             BOOLEAN reverse)
>+{
>+    UINT32 natFlag;
>+    const struct ct_endpoint* endpoint;
>+    /* When it is NAT, only entry->rev_key contains NATTED address;
>+       When it is unNAT, only entry->key contains the UNNATTED address;*/
>+    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
>+    BOOLEAN isSrcNat;
>+
>+    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
>+        return;
>+    }
>+    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
>+                ((natAction & NAT_ACTION_DST) && reverse));
>+
>+    if (isSrcNat) {
>+        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
>+        natFlag = OVS_CS_F_SRC_NAT;
>+        /* Note that ctKey is the key in the other direction, so
>+           endpoint has to be reverted, i.e. ctKey->dst for SNAT
>+           and ctKey->src for DNAT */
>+        endpoint = &ctKey->dst;
>+    } else {
>+        natFlag = OVS_CS_F_DST_NAT;
>+        endpoint = &ctKey->src;
>+    }
>+    key->ct.state |= natFlag;
>+    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
>+        OvsUpdateAddressAndPort(ovsFwdCtx,
>+                                endpoint->addr.ipv4_aligned,
>+                                endpoint->port, isSrcNat,
>+                                !reverse);
>+        if (isSrcNat) {
>+            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
>+        } else {
>+            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
>+        }
>+    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
>+        // XXX: IPv6 packet not supported yet.
>+        return;
>+    }
>+    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
>+        if (isSrcNat) {
>+            if (key->ipKey.l4.tpSrc != 0) {
>+                key->ipKey.l4.tpSrc = endpoint->port;
>+            }
>+        } else {
>+            if (key->ipKey.l4.tpDst != 0) {
>+                key->ipKey.l4.tpDst = endpoint->port;
>+            }
>+        }
>+    }
>+}
>+
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatHashRange
>+ *     Compute hash for a range of addresses specified in natInfo.
>+ *----------------------------------------------------------------------------
>+ */
>+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
>+{
>+    UINT32 hash = basis;
>+#define HASH_ADD(field) \
>+    hash = OvsJhashBytes(&field, sizeof(field), hash)
>+
>+    HASH_ADD(entry->natInfo.minAddr);
>+    HASH_ADD(entry->natInfo.maxAddr);
>+    HASH_ADD(entry->key.dl_type);
>+    HASH_ADD(entry->key.nw_proto);
>+    HASH_ADD(entry->key.zone);
>+#undef HASH_ADD
>+    return hash;
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatAddEntry
>+ *     Add an entry to the NAT table. Also updates the reverse NAT lookup
>+ *     table.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID
>+OvsNatAddEntry(OVS_NAT_ENTRY* entry)
>+{
>+    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
>+                   &entry->link);
>+    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
>+                   &entry->reverseLink);
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatCtEntry
>+ *     Update an Conntrack entry with NAT information. Translated address and
>+ *     port will be generated and write back to the conntrack entry as a
>+ *     result.
>+ *----------------------------------------------------------------------------
>+ */
[Sai] - Can you name this OvsNatUpdateCtEntry? Or something to imply what this function does.
[Yin] - Update is a little bit confusing as it doesn't explain what's the update is about. I intended to mean the function NATs a CtEntry and NAT is a verb here. But I see what you are coming from and understand that the name is very confusing. Let me rename it OvsNatTranslateCtEntry as a compromise.

>+BOOLEAN
>+OvsNatCtEntry(OVS_CT_ENTRY *entry)
>+{
>+    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
>+    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
>+
>+    uint16_t minPort;
>+    uint16_t maxPort;
>+    uint16_t firstPort;
>+
>+    uint32_t hash = OvsNatHashRange(entry, 0);
>+
>+    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
>+        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
>+        firstPort = minPort = maxPort = ntohs(entry->key.src.port);
>+    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
>+               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
>+        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
>+    } else {
>+        uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort;
>+        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
>+        firstPort = entry->natInfo.minPort + portIndex;
>+        minPort = entry->natInfo.minPort;
>+        maxPort = entry->natInfo.maxPort;
>+    }
>+

[Sai] - Can you move these definitions to the top? 
[Yin] Fixed


>+    uint32_t addrDelta = 0;
>+    uint32_t addrIndex;
>+    struct ct_addr ctAddr, maxCtAddr;
>+    memset(&ctAddr, 0, sizeof ctAddr);
>+    memset(&maxCtAddr, 0, sizeof maxCtAddr);
>+    maxCtAddr = entry->natInfo.maxAddr;
>+
>+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
>+        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
>+                    ntohl(entry->natInfo.minAddr.ipv4_aligned);
>+        addrIndex = hash % (addrDelta + 1);
>+        ctAddr.ipv4_aligned = htonl(
>+            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
>+    } else {
>+        // XXX: IPv6 not supported
>+        return FALSE;
>+    }

[Sai] - Can you move these definitions to the top?
[Yin] Fixed


>+

>+    uint16_t port = firstPort;
>+    BOOLEAN allPortsTried = FALSE;
>+    BOOLEAN originalPortsTried = FALSE;
>+    struct ct_addr firstAddr = ctAddr;
>+    for (;;) {
>+        if (entry->natInfo.natAction & NAT_ACTION_SRC) {
>+            entry->rev_key.dst.addr = ctAddr;
>+            entry->rev_key.dst.port = htons(port);
>+        } else {
>+            entry->rev_key.src.addr = ctAddr;
>+            entry->rev_key.src.port = htons(port);
>+        }
>+
>+        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
>+
>+        if (!natEntry) {
>+            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
>+                                                OVS_CT_POOL_TAG);

[Sai]: Need to check if natEntry is not NULL and handle the Insufficient resources case.
[Yin] Fixed.

nit: NdisMoveMemory instead of memcpy

>+            memcpy(&natEntry->key, &entry->key,
>+                   sizeof natEntry->key);
>+            memcpy(&natEntry->value, &entry->rev_key,
>+                   sizeof natEntry->value);
>+            natEntry->ctEntry = entry;
>+            OvsNatAddEntry(natEntry);
>+            return TRUE;
>+        } else if (!allPortsTried) {
>+            if (minPort == maxPort) {
>+                allPortsTried = TRUE;
>+            } else if (port == maxPort) {
>+                port = minPort;
>+            } else {
>+                port++;
>+            }
>+            if (port == firstPort) {
>+                allPortsTried = TRUE;
>+            }
>+        } else {
>+            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
>+                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
>+                    ctAddr.ipv4_aligned = htonl(
>+                        ntohl(ctAddr.ipv4_aligned) + 1);
>+                } else {
>+                    // XXX: IPv6 not supported

[Sai] - This can be done later. However, it will be good to return NDIS_STATUS

and return NDIS_STATUS_INVALID. At the very minimum, we should add a log msg.

[Yin] Shashank raised the similar concern earlier. We don't have IPv6 support anywhere and feels bad about
spamming error message whenever an address is concerned. We don't have log rate control so if there is a IPv6 rule then the log
is going to explode and prevent us from seeing the entries we are really interested in. If we have partial IPv6 support,
then I agree that we should add logs in areas where the support is not extended yet.

>+                    return FALSE;
>+                }
>+            } else {
>+                ctAddr = entry->natInfo.minAddr;
>+            }
>+            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
>+                if (!originalPortsTried) {
>+                    originalPortsTried = TRUE;
>+                    ctAddr = entry->natInfo.minAddr;
>+                    minPort = MIN_NAT_EPHEMERAL_PORT;
>+                    maxPort = MAX_NAT_EPHEMERAL_PORT;
>+                } else {
>+                    break;
>+                }
>+            }
>+            firstPort = minPort;
>+            port = firstPort;
>+            allPortsTried = FALSE;
>+        }
>+    }
>+    return FALSE;
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatLookup
>+ *     Look up a NAT entry with the given key in the NAT table.
>+ *     If reverse is TRUE, look up a NAT entry with the given value instead.
>+ *----------------------------------------------------------------------------
>+ */
>+POVS_NAT_ENTRY
>+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
>+{
>+    PLIST_ENTRY link;
>+    POVS_NAT_ENTRY entry;
>+
>+    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
>+        if (reverse) {
>+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
>+
>+            if (OvsNatKeyAreSame(ctKey, &entry->value)) {
>+                return entry;
>+            }
>+        } else {
>+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
>+
>+            if (OvsNatKeyAreSame(ctKey, &entry->key)) {
>+                return entry;
>+            }
>+        }
>+    }
>+    return NULL;
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatDeleteEntry
>+ *     Delete a NAT entry.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID
>+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
>+{
>+    if (entry == NULL) {
>+        return;
>+    }
>+    RemoveEntryList(&entry->link);
>+    RemoveEntryList(&entry->reverseLink);
>+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
>+}
>+
>+/*
>+ *----------------------------------------------------------------------------
>+ * OvsNatDeleteKey
>+ *     Delete a NAT entry with the given key.
>+ *----------------------------------------------------------------------------
>+ */
>+VOID
>+OvsNatDeleteKey(const OVS_CT_KEY *key)
>+{
>+    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
>+}
>diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h
>new file mode 100644
>index 0000000..aaa8ceb
>--- /dev/null
>+++ b/datapath-windows/ovsext/Conntrack-nat.h
>@@ -0,0 +1,39 @@
>+#ifndef _CONNTRACK_NAT_H
>+#define _CONNTRACK_NAT_H
>+
>+#include "precomp.h"
>+#include "Flow.h"
>+#include "Debug.h"
>+#include <stddef.h>
>+#include "Conntrack.h"
>+
>+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
>+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
>+
>+typedef struct OVS_NAT_ENTRY {
>+    LIST_ENTRY link;
>+    LIST_ENTRY reverseLink;
>+    OVS_CT_KEY key;
>+    OVS_CT_KEY value;
>+    POVS_CT_ENTRY  ctEntry;
>+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
>+
>+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {

[Sai] - What does the double !! do here?
[Yin] - This is a bitwise masking operation on integer and the return value is a Boolean, so it needs double !! to ensure return value is either 0 or 1.


>+    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
>+}
>+
>+NTSTATUS OvsNatInit();
>+VOID OvsNatFlush(UINT16 zone);
>+
>+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
>+
>+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
>+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
>+VOID OvsNatCleanup();
>+
>+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
>+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
>+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry,
>+                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
>+
>+#endif
>-- 
>2.10.2.windows.1
>
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=Z6vowHUOjP5ysP_g372c49Nqc1vEKqHKNBkR5Q5Z7uo&m=veEopphpitZ1dkD8nov8CuLArrdnUU8WDczrz9UClwo&s=U88QV-uHXUzVrccSWR_EYp7F7ySpFOMRBlwZy34qfzA&e= 


More information about the dev mailing list