[ovs-dev] [PATCH v3] datapath-windows: Add Connection Tracking Support

Sairam Venugopal vsairam at vmware.com
Wed Apr 13 18:54:03 UTC 2016


Enable support for Stateful Firewall in Hyper-V by adding a Connection
Tracking module. The module has been ported over from the userspace
implementation patch of a similar name.

The current version of the module supports ct - zone, mark and label for
TCP packets. Support for other packet formats will be added in subsequent
patches.

The conntrack-tcp module is adapted from FreeBSD's pf subsystem and hence
the BSD license. It has been ported over to match OVS Hyper-V coding
style.

Signed-off-by: Sairam Venugopal <vsairam at vmware.com>
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
Co-Authored-by: Daniele Di Proietto <diproiettod at vmware.com>
---
 NOTICE                                  |   5 +
 datapath-windows/automake.mk            |   3 +
 datapath-windows/ovsext/Actions.c       |  23 ++
 datapath-windows/ovsext/Conntrack-tcp.c | 532 ++++++++++++++++++++++++++++++++
 datapath-windows/ovsext/Conntrack.c     | 530 +++++++++++++++++++++++++++++++
 datapath-windows/ovsext/Conntrack.h     | 102 ++++++
 datapath-windows/ovsext/Debug.h         |   1 +
 datapath-windows/ovsext/DpInternal.h    |   7 +
 datapath-windows/ovsext/Flow.c          | 128 +++++++-
 datapath-windows/ovsext/Switch.c        |  10 +-
 datapath-windows/ovsext/Types.h         |   6 +
 datapath-windows/ovsext/Util.h          |   3 +-
 datapath-windows/ovsext/ovsext.vcxproj  |   3 +
 debian/copyright.in                     |   1 +
 14 files changed, 1350 insertions(+), 4 deletions(-)
 create mode 100644 datapath-windows/ovsext/Conntrack-tcp.c
 create mode 100644 datapath-windows/ovsext/Conntrack.c
 create mode 100644 datapath-windows/ovsext/Conntrack.h

diff --git a/NOTICE b/NOTICE
index 4a3e61c..6030b8b 100644
--- a/NOTICE
+++ b/NOTICE
@@ -38,3 +38,8 @@ Copyright (c) 2008, 2009, 2010 Sten Spans <sten at blinkenlights.nl>
 Auto Attach implementation
 Copyright (c) 2014, 2015 WindRiver, Inc
 Copyright (c) 2014, 2015 Avaya, Inc
+
+TCP connection tracker from FreeBSD pf, BSD licensed
+Copyright (c) 2001 Daniel Hartmeier
+Copyright (c) 2002 - 2008 Henning Brauer
+Copyright (c) 2012 Gleb Smirnoff <glebius at FreeBSD.org>
diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 04fc97f..c9af806 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -13,6 +13,9 @@ EXTRA_DIST += \
 	datapath-windows/ovsext/Atomic.h \
 	datapath-windows/ovsext/BufferMgmt.c \
 	datapath-windows/ovsext/BufferMgmt.h \
+	datapath-windows/ovsext/Conntrack-tcp.c \
+	datapath-windows/ovsext/Conntrack.c \
+	datapath-windows/ovsext/Conntrack.h \
 	datapath-windows/ovsext/Datapath.c \
 	datapath-windows/ovsext/Datapath.h \
 	datapath-windows/ovsext/Debug.c \
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index 3e5dac9..cf54ae2 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -17,6 +17,7 @@
 #include "precomp.h"
 
 #include "Actions.h"
+#include "Conntrack.h"
 #include "Debug.h"
 #include "Event.h"
 #include "Flow.h"
@@ -1786,6 +1787,28 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
             break;
         }
 
+        case OVS_ACTION_ATTR_CT:
+        {
+            if (ovsFwdCtx.destPortsSizeOut > 0
+                || ovsFwdCtx.tunnelTxNic != NULL
+                || ovsFwdCtx.tunnelRxNic != NULL) {
+                status = OvsOutputBeforeSetAction(&ovsFwdCtx);
+                if (status != NDIS_STATUS_SUCCESS) {
+                    dropReason = L"OVS-adding destination failed";
+                    goto dropit;
+                }
+            }
+
+            status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
+                                               key, (const PNL_ATTR)a);
+            if (status != NDIS_STATUS_SUCCESS) {
+                OVS_LOG_ERROR("CT Action failed");
+                dropReason = L"OVS-conntrack action failed";
+                goto dropit;
+            }
+            break;
+        }
+
         case OVS_ACTION_ATTR_RECIRC:
         {
             if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
new file mode 100644
index 0000000..3e25ba5
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack-tcp.c
@@ -0,0 +1,532 @@
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * Copyright (c) 2012 Gleb Smirnoff <glebius at FreeBSD.org>
+ * Copyright (c) 2015, 2016 VMware, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ *      $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
+ */
+
+#include "Conntrack.h"
+#include <stddef.h>
+
+struct tcp_peer {
+    enum ct_dpif_tcp_state state;
+    uint32_t               seqlo;  /* Max sequence number sent     */
+    uint32_t               seqhi;  /* Max the other end ACKd + win */
+    uint16_t               max_win;/* largest window (pre scaling) */
+    uint8_t                wscale; /* window scaling factor        */
+};
+
+struct conn_tcp {
+    struct OVS_CT_ENTRY up;
+    struct tcp_peer peer[2];
+};
+
+enum {
+    TCPOPT_EOL,
+    TCPOPT_NOP,
+    TCPOPT_WINDOW = 3,
+};
+
+/* Given POINTER, the address of the given MEMBER in a STRUCT object, returns
+   the STRUCT object. */
+#define CONTAINER_OF(POINTER, STRUCT, MEMBER)                           \
+        ((STRUCT *) (void *) ((char *) (POINTER) - \
+         offsetof (STRUCT, MEMBER)))
+
+
+/* TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers. */
+#define SEQ_LT(a,b)     ((int)((a)-(b)) < 0)
+#define SEQ_LEQ(a,b)    ((int)((a)-(b)) <= 0)
+#define SEQ_GT(a,b)     ((int)((a)-(b)) > 0)
+#define SEQ_GEQ(a,b)    ((int)((a)-(b)) >= 0)
+
+#define SEQ_MIN(a, b)   ((SEQ_LT(a, b)) ? (a) : (b))
+#define SEQ_MAX(a, b)   ((SEQ_GT(a, b)) ? (a) : (b))
+
+#define TCP_FIN 0x001
+#define TCP_SYN 0x002
+#define TCP_RST 0x004
+#define TCP_PSH 0x008
+#define TCP_ACK 0x010
+#define TCP_URG 0x020
+#define TCP_ECE 0x040
+#define TCP_CWR 0x080
+#define TCP_NS  0x100
+
+#define CT_DPIF_TCP_FLAGS \
+    CT_DPIF_TCP_FLAG(WINDOW_SCALE) \
+    CT_DPIF_TCP_FLAG(SACK_PERM) \
+    CT_DPIF_TCP_FLAG(CLOSE_INIT) \
+    CT_DPIF_TCP_FLAG(BE_LIBERAL) \
+    CT_DPIF_TCP_FLAG(DATA_UNACKNOWLEDGED) \
+    CT_DPIF_TCP_FLAG(MAXACK_SET) \
+
+enum ct_dpif_tcp_flags_count_ {
+#define CT_DPIF_TCP_FLAG(FLAG) FLAG##_COUNT_,
+    CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+enum ct_dpif_tcp_flags {
+#define CT_DPIF_TCP_FLAG(FLAG) CT_DPIF_TCPF_##FLAG = (1 << \
+                                                      FLAG##_COUNT_),
+    CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+
+#define CT_DPIF_TCP_STATES \
+    CT_DPIF_TCP_STATE(CLOSED) \
+    CT_DPIF_TCP_STATE(LISTEN) \
+    CT_DPIF_TCP_STATE(SYN_SENT) \
+    CT_DPIF_TCP_STATE(SYN_RECV) \
+    CT_DPIF_TCP_STATE(ESTABLISHED) \
+    CT_DPIF_TCP_STATE(CLOSE_WAIT) \
+    CT_DPIF_TCP_STATE(FIN_WAIT_1) \
+    CT_DPIF_TCP_STATE(CLOSING) \
+    CT_DPIF_TCP_STATE(LAST_ACK) \
+    CT_DPIF_TCP_STATE(FIN_WAIT_2) \
+    CT_DPIF_TCP_STATE(TIME_WAIT)
+
+enum ct_dpif_tcp_state {
+#define CT_DPIF_TCP_STATE(STATE) CT_DPIF_TCPS_##STATE,
+    CT_DPIF_TCP_STATES
+#undef CT_DPIF_TCP_STATE
+};
+
+#define TCP_MAX_WSCALE 14
+#define CT_WSCALE_FLAG 0x80
+#define CT_WSCALE_UNKNOWN 0x40
+#define CT_WSCALE_MASK 0xf
+
+/* pf does this in in pf_normalize_tcp(), and it is called only if scrub
+ * is enabled.  We're not scrubbing, but this check seems reasonable.  */
+static __inline BOOLEAN
+OvsConntrackValidateTcpFlags(const TCPHdr *tcp)
+{
+    if (tcp->syn) {
+        if (tcp->rst) {
+            return TRUE;
+        }
+        if (tcp->fin) {
+            /* Here pf removes the fin flag.  We simply mark the packet as
+             * invalid */
+            return TRUE;
+        }
+    } else {
+        /* Illegal packet */
+        if (!(tcp->ack || tcp->rst)) {
+            return TRUE;
+        }
+    }
+
+    if (!(tcp->ack)) {
+        /* These flags are only valid if ACK is set */
+        if ((tcp->fin) || (tcp->psh) || (tcp->urg)) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+static __inline uint8_t
+OvsTcpGetWscale(const TCPHdr *tcp)
+{
+    unsigned len = tcp->doff * 4 - sizeof *tcp;
+    const uint8_t *opt = (const uint8_t *)(tcp + 1);
+    uint8_t wscale = 0;
+    uint8_t optlen;
+
+    while (len >= 3) {
+        if (*opt == TCPOPT_EOL) {
+            break;
+        }
+        switch (*opt) {
+        case TCPOPT_NOP:
+            opt++;
+            len--;
+            break;
+        case TCPOPT_WINDOW:
+            wscale = MIN(opt[2], TCP_MAX_WSCALE);
+            wscale |= CT_WSCALE_FLAG;
+            /* fall through */
+        default:
+            optlen = opt[2];
+            if (optlen < 2) {
+                optlen = 2;
+            }
+            len -= optlen;
+            opt += optlen;
+        }
+    }
+
+    return wscale;
+}
+
+static __inline uint32_t
+OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
+{
+    IPHdr *ipHdr;
+    char *ipBuf[sizeof(IPHdr)];
+    PNET_BUFFER curNb;
+    curNb = NET_BUFFER_LIST_FIRST_NB(nbl);
+    ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
+                                                    1 /*no align*/, 0);
+    TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
+    return (UINT16)ntohs(ipHdr->tot_len)
+                        - (ipHdr->ihl * 4)
+                        - (sizeof * tcp);
+}
+
+static __inline void
+OvsConntrackUpdateExpiration(struct conn_tcp *conn,
+                             long long now,
+                             long long interval)
+{
+    conn->up.expiration = now + interval;
+}
+
+static __inline struct conn_tcp*
+OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
+{
+    return CONTAINER_OF(conn, struct conn_tcp, up);
+}
+
+enum CT_UPDATE_RES
+OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
+                           const TCPHdr *tcp,
+                           PNET_BUFFER_LIST nbl,
+                           BOOLEAN reply,
+                           UINT64 now)
+{
+    struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
+    /* The peer that sent 'pkt' */
+    struct tcp_peer *src = &conn->peer[reply ? 1 : 0];
+    /* The peer that should receive 'pkt' */
+    struct tcp_peer *dst = &conn->peer[reply ? 0 : 1];
+    uint8_t sws = 0, dws = 0;
+    uint16_t win = ntohs(tcp->window);
+    uint32_t ack, end, seq, orig_seq;
+    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
+    int ackskew;
+
+    if (OvsConntrackValidateTcpFlags(tcp)) {
+        return CT_UPDATE_INVALID;
+    }
+
+    if ((tcp->syn) && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 &&
+            src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
+        src->state = dst->state = CT_DPIF_TCPS_CLOSED;
+        return CT_UPDATE_NEW;
+    }
+
+    if (src->wscale & CT_WSCALE_FLAG
+        && dst->wscale & CT_WSCALE_FLAG
+        && !(tcp->syn)) {
+
+        sws = src->wscale & CT_WSCALE_MASK;
+        dws = dst->wscale & CT_WSCALE_MASK;
+
+    } else if (src->wscale & CT_WSCALE_UNKNOWN
+        && dst->wscale & CT_WSCALE_UNKNOWN
+        && !(tcp->syn)) {
+
+        sws = TCP_MAX_WSCALE;
+        dws = TCP_MAX_WSCALE;
+    }
+
+    /*
+     * Sequence tracking algorithm from Guido van Rooij's paper:
+     *   http://www.madison-gurkha.com/publications/tcp_filtering/
+     *      tcp_filtering.ps
+     */
+
+    orig_seq = seq = ntohl(tcp->seq);
+    if (src->state < CT_DPIF_TCPS_SYN_SENT) {
+        /* First packet from this end. Set its state */
+
+        ack = ntohl(tcp->ack);
+
+        end = seq + p_len;
+        if (tcp->syn) {
+            end++;
+            if (dst->wscale & CT_WSCALE_FLAG) {
+                src->wscale = OvsTcpGetWscale(tcp);
+                if (src->wscale & CT_WSCALE_FLAG) {
+                    /* Remove scale factor from initial window */
+                    sws = src->wscale & CT_WSCALE_MASK;
+                    win = DIV_ROUND_UP((uint32_t) win, 1 << sws);
+                    dws = dst->wscale & CT_WSCALE_MASK;
+                } else {
+                    /* fixup other window */
+                    dst->max_win <<= dst->wscale &
+                        CT_WSCALE_MASK;
+                    /* in case of a retrans SYN|ACK */
+                    dst->wscale = 0;
+                }
+            }
+        }
+        if (tcp->fin) {
+            end++;
+        }
+
+        src->seqlo = seq;
+        src->state = CT_DPIF_TCPS_SYN_SENT;
+        /*
+         * May need to slide the window (seqhi may have been set by
+         * the crappy stack check or if we picked up the connection
+         * after establishment)
+         */
+        if (src->seqhi == 1 ||
+                SEQ_GEQ(end + MAX(1, dst->max_win << dws),
+                        src->seqhi)) {
+            src->seqhi = end + MAX(1, dst->max_win << dws);
+        }
+        if (win > src->max_win) {
+            src->max_win = win;
+        }
+
+    } else {
+        ack = ntohl(tcp->ack);
+        end = seq + p_len;
+        if (tcp->syn) {
+            end++;
+        }
+        if (tcp->fin) {
+            end++;
+        }
+    }
+
+    if ((tcp->ack) == 0) {
+        /* Let it pass through the ack skew check */
+        ack = dst->seqlo;
+    } else if ((ack == 0
+                && (tcp->ack && tcp->rst) == (TCP_ACK|TCP_RST))
+               /* broken tcp stacks do not set ack */) {
+        /* Many stacks (ours included) will set the ACK number in an
+         * FIN|ACK if the SYN times out -- no sequence to ACK. */
+        ack = dst->seqlo;
+    }
+
+    if (seq == end) {
+        /* Ease sequencing restrictions on no data packets */
+        seq = src->seqlo;
+        end = seq;
+    }
+
+    ackskew = dst->seqlo - ack;
+#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
+    if (SEQ_GEQ(src->seqhi, end)
+        /* Last octet inside other's window space */
+        && SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))
+        /* Retrans: not more than one window back */
+        && (ackskew >= -MAXACKWINDOW)
+        /* Acking not more than one reassembled fragment backwards */
+        && (ackskew <= (MAXACKWINDOW << sws))
+        /* Acking not more than one window forward */
+        && ((tcp->rst) == 0 || orig_seq == src->seqlo
+            || (orig_seq == src->seqlo + 1)
+            || (orig_seq + 1 == src->seqlo))) {
+        /* Require an exact/+1 sequence match on resets when possible */
+
+        /* update max window */
+        if (src->max_win < win) {
+            src->max_win = win;
+        }
+        /* synchronize sequencing */
+        if (SEQ_GT(end, src->seqlo)) {
+            src->seqlo = end;
+        }
+        /* slide the window of what the other end can send */
+        if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
+            dst->seqhi = ack + MAX((win << sws), 1);
+        }
+
+        /* update states */
+        if (tcp->syn && src->state < CT_DPIF_TCPS_SYN_SENT) {
+                src->state = CT_DPIF_TCPS_SYN_SENT;
+        }
+        if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
+                src->state = CT_DPIF_TCPS_CLOSING;
+        }
+        if (tcp->ack) {
+            if (dst->state == CT_DPIF_TCPS_SYN_SENT) {
+                dst->state = CT_DPIF_TCPS_ESTABLISHED;
+            } else if (dst->state == CT_DPIF_TCPS_CLOSING) {
+                dst->state = CT_DPIF_TCPS_FIN_WAIT_2;
+            }
+        }
+        if (tcp->rst) {
+            src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
+        }
+
+        if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
+            && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
+            OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
+        } else if (src->state >= CT_DPIF_TCPS_CLOSING
+                   && dst->state >= CT_DPIF_TCPS_CLOSING) {
+            OvsConntrackUpdateExpiration(conn, now, 45 * 10000000LL);
+        } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
+                   || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
+            OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
+        } else if (src->state >= CT_DPIF_TCPS_CLOSING
+                   || dst->state >= CT_DPIF_TCPS_CLOSING) {
+            OvsConntrackUpdateExpiration(conn, now, 15 * 60 * 10000000LL);
+        } else {
+            OvsConntrackUpdateExpiration(conn, now, 24 * 60 * 60 * 10000000LL);
+        }
+    } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
+                || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
+                || src->state >= CT_DPIF_TCPS_FIN_WAIT_2)
+               && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end)
+               /* Within a window forward of the originating packet */
+               && SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
+               /* Within a window backward of the originating packet */
+
+        /*
+         * This currently handles three situations:
+         *  1) Stupid stacks will shotgun SYNs before their peer
+         *     replies.
+         *  2) When PF catches an already established stream (the
+         *     firewall rebooted, the state table was flushed, routes
+         *     changed...)
+         *  3) Packets get funky immediately after the connection
+         *     closes (this should catch Solaris spurious ACK|FINs
+         *     that web servers like to spew after a close)
+         *
+         * This must be a little more careful than the above code
+         * since packet floods will also be caught here. We don't
+         * update the TTL here to mitigate the damage of a packet
+         * flood and so the same code can handle awkward establishment
+         * and a loosened connection close.
+         * In the establishment case, a correct peer response will
+         * validate the connection, go through the normal state code
+         * and keep updating the state TTL.
+         */
+
+        /* update max window */
+        if (src->max_win < win) {
+            src->max_win = win;
+        }
+        /* synchronize sequencing */
+        if (SEQ_GT(end, src->seqlo)) {
+            src->seqlo = end;
+        }
+        /* slide the window of what the other end can send */
+        if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
+            dst->seqhi = ack + MAX((win << sws), 1);
+        }
+
+        /*
+         * Cannot set dst->seqhi here since this could be a shotgunned
+         * SYN and not an already established connection.
+         */
+
+        if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
+            src->state = CT_DPIF_TCPS_CLOSING;
+        }
+
+        if (tcp->rst) {
+            src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
+        }
+    } else {
+        return CT_UPDATE_INVALID;
+    }
+
+    return CT_UPDATE_VALID;
+}
+
+BOOLEAN
+OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
+{
+    if (tcp == NULL || OvsConntrackValidateTcpFlags(tcp)) {
+        return FALSE;
+    }
+
+    /* A syn+ack is not allowed to create a connection.  We want to allow
+     * totally new connections (syn) or already established, not partially
+     * open (syn+ack). */
+    if ((tcp->syn) && (tcp->ack)) {
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+OVS_CT_ENTRY *
+OvsNewTcpConntrack(const TCPHdr *tcp,
+                   PNET_BUFFER_LIST nbl,
+                   UINT64 now)
+{
+    struct conn_tcp* newconn = NULL;
+    struct tcp_peer *src, *dst;
+
+    newconn = OvsAllocateMemoryWithTag(sizeof(struct conn_tcp),
+                                       OVS_CT_POOL_TAG);
+    newconn->up = (OVS_CT_ENTRY) {0};
+    src = &newconn->peer[0];
+    dst = &newconn->peer[1];
+
+    src->seqlo = ntohl(tcp->seq);
+    src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
+
+    if (tcp->syn) {
+        src->seqhi++;
+        src->wscale = OvsTcpGetWscale(tcp);
+    } else {
+        src->wscale = CT_WSCALE_UNKNOWN;
+        dst->wscale = CT_WSCALE_UNKNOWN;
+    }
+    src->max_win = MAX(ntohs(tcp->window), 1);
+    if (src->wscale & CT_WSCALE_MASK) {
+        /* Remove scale factor from initial window */
+        uint8_t sws = src->wscale & CT_WSCALE_MASK;
+        src->max_win = DIV_ROUND_UP((uint32_t) src->max_win,
+                                    1 << sws);
+    }
+    if (tcp->fin) {
+        src->seqhi++;
+    }
+    dst->seqhi = 1;
+    dst->max_win = 1;
+    src->state = CT_DPIF_TCPS_SYN_SENT;
+    dst->state = CT_DPIF_TCPS_CLOSED;
+
+    OvsConntrackUpdateExpiration(newconn, now, CT_ENTRY_TIMEOUT);
+
+    return &newconn->up;
+}
\ No newline at end of file
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
new file mode 100644
index 0000000..fbeb70c
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2015, 2016 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_CONTRK
+
+#include "Conntrack.h"
+#include "Jhash.h"
+#include "PacketParser.h"
+#include "Debug.h"
+
+typedef struct _OVS_CT_THREAD_CTX {
+    KEVENT      event;
+    PVOID       threadObject;
+    UINT32      exit;
+} OVS_CT_THREAD_CTX, *POVS_CT_THREAD_CTX;
+
+KSTART_ROUTINE ovsConntrackEntryCleaner;
+static PLIST_ENTRY ovsConntrackTable;
+static OVS_CT_THREAD_CTX ctThreadCtx;
+static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsInitConntrack
+ *     Initialize the components used by Connection Tracking
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsInitConntrack(POVS_SWITCH_CONTEXT context)
+{
+    NTSTATUS status;
+    HANDLE threadHandle = NULL;
+
+    /* Init the sync-lock */
+    ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
+    if (ovsConntrackLockObj == NULL) {
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    /* Init the Hash Buffer */
+    ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
+                                                 * CT_HASH_TABLE_SIZE,
+                                                 OVS_CT_POOL_TAG);
+    if (ovsConntrackTable == NULL) {
+        NdisFreeRWLock(ovsConntrackLockObj);
+        ovsConntrackLockObj = NULL;
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+        InitializeListHead(&ovsConntrackTable[i]);
+    }
+
+    /* Init CT Cleaner Thread */
+    KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
+    status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
+                                  NULL, ovsConntrackEntryCleaner,
+                                  &ctThreadCtx);
+
+    if (status != STATUS_SUCCESS) {
+        NdisFreeRWLock(ovsConntrackLockObj);
+        ovsConntrackLockObj = NULL;
+
+        OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
+        ovsConntrackTable = NULL;
+
+        return status;
+    }
+
+    ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
+                              &ctThreadCtx.threadObject, NULL);
+    ZwClose(threadHandle);
+    threadHandle = NULL;
+    return STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsCleanupConntrack
+ *     Cleanup memory and thread that were spawned for Connection tracking
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsCleanupConntrack(VOID)
+{
+    LOCK_STATE_EX lockState;
+    NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
+    ctThreadCtx.exit = 1;
+    KeSetEvent(&ctThreadCtx.event, 0, FALSE);
+    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+
+    KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
+                          KernelMode, FALSE, NULL);
+    ObDereferenceObject(ctThreadCtx.threadObject);
+
+    if (ovsConntrackTable) {
+        OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
+        ovsConntrackTable = NULL;
+    }
+
+    NdisFreeRWLock(ovsConntrackLockObj);
+    ovsConntrackLockObj = NULL;
+}
+
+static __inline VOID
+OvsCtKeyReverse(OVS_CT_KEY *key)
+{
+    struct ct_endpoint tmp;
+    tmp = key->src;
+    key->src = key->dst;
+    key->dst = tmp;
+}
+
+static __inline VOID
+OvsCtUpdateFlowKey(struct OvsFlowKey *key,
+                   UINT32 state,
+                   UINT16 zone,
+                   UINT32 mark,
+                   struct ovs_key_ct_labels *labels)
+{
+    key->ct.state = state | OVS_CS_F_TRACKED;
+    key->ct.zone = zone;
+    key->ct.mark = mark;
+    if (labels) {
+        NdisMoveMemory(&key->ct.labels, labels,
+                       sizeof(struct ovs_key_ct_labels));
+    } else {
+        memset(&key->ct.labels, 0,
+               sizeof(struct ovs_key_ct_labels));
+    }
+}
+
+static __inline POVS_CT_ENTRY
+OvsCtEntryCreate(const TCPHdr *tcp,
+                 PNET_BUFFER_LIST curNbl,
+                 OvsConntrackKeyLookupCtx *ctx,
+                 OvsFlowKey *key,
+                 BOOLEAN commit,
+                 UINT64 currentTime)
+{
+    POVS_CT_ENTRY entry = NULL;
+    UINT32 state = 0;
+    if (!OvsConntrackValidateTcpPacket(tcp)) {
+        state |= OVS_CS_F_INVALID;
+        OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
+        return entry;
+    }
+
+    state |= OVS_CS_F_NEW;
+    if (commit) {
+        entry = OvsNewTcpConntrack(tcp, curNbl, currentTime);
+        NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
+        NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
+        OvsCtKeyReverse(&entry->rev_key);
+        InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
+                       &entry->link);
+    }
+
+    OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
+    return entry;
+}
+
+static __inline VOID
+OvsCtEntryDelete(POVS_CT_ENTRY entry)
+{
+    RemoveEntryList(&entry->link);
+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
+}
+
+static __inline BOOLEAN
+OvsCtEntryExpired(POVS_CT_ENTRY entry)
+{
+    if (entry == NULL)
+        return TRUE;
+
+    UINT64 currentTime;
+    NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+    return entry->expiration < currentTime;
+}
+
+static __inline NDIS_STATUS
+OvsDetectCtPacket(OvsFlowKey *key)
+{
+    /* Currently we support only Unfragmented TCP packets */
+    switch (ntohs(key->l2.dlType)) {
+    case ETH_TYPE_IPV4:
+        if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
+            return NDIS_STATUS_NOT_SUPPORTED;
+        }
+        if (key->ipKey.nwProto != IPPROTO_TCP) {
+            return NDIS_STATUS_NOT_SUPPORTED;
+        }
+        return NDIS_STATUS_SUCCESS;
+    case ETH_TYPE_IPV6:
+        return NDIS_STATUS_NOT_SUPPORTED;
+    }
+
+    return NDIS_STATUS_NOT_SUPPORTED;
+}
+
+static __inline BOOLEAN
+OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
+{
+    return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
+        (ctxKey.src.addr.ipv4_aligned == entryKey.src.addr.ipv4_aligned) &&
+        (ctxKey.src.port == entryKey.src.port) &&
+        (ctxKey.dst.addr.ipv4 == entryKey.dst.addr.ipv4) &&
+        (ctxKey.dst.addr.ipv4_aligned == entryKey.dst.addr.ipv4_aligned) &&
+        (ctxKey.dst.port == entryKey.dst.port) &&
+        (ctxKey.dl_type == entryKey.dl_type) &&
+        (ctxKey.nw_proto == entryKey.nw_proto) &&
+        (ctxKey.zone == entryKey.zone));
+}
+
+static __inline POVS_CT_ENTRY
+OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
+{
+    PLIST_ENTRY link;
+    POVS_CT_ENTRY entry;
+    BOOLEAN reply = FALSE;
+    POVS_CT_ENTRY found = NULL;
+
+    LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
+        entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
+
+        if (OvsCtKeyAreSame(ctx->key,entry->key)) {
+            found = entry;
+            reply = FALSE;
+            break;
+        }
+
+        if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
+            found = entry;
+            reply = TRUE;
+            break;
+        }
+    }
+
+    if (found) {
+        if (OvsCtEntryExpired(found)) {
+            found = NULL;
+        } else {
+            ctx->reply = reply;
+        }
+    }
+
+    ctx->entry = found;
+    return found;
+}
+
+static __inline VOID
+OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
+                    UINT16 zone,
+                    OvsConntrackKeyLookupCtx *ctx)
+{
+    UINT32 hsrc, hdst,hash;
+
+    ctx->key.zone = zone;
+    ctx->key.dl_type = flowKey->l2.dlType;
+
+    if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
+        ctx->key.src.addr.ipv4 = flowKey->ipKey.nwSrc;
+        ctx->key.dst.addr.ipv4 = flowKey->ipKey.nwDst;
+        ctx->key.nw_proto = flowKey->ipKey.nwProto;
+
+        ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
+        ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
+    } else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) {
+        ctx->key.src.addr.ipv6 = flowKey->ipv6Key.ipv6Src;
+        ctx->key.dst.addr.ipv6 = flowKey->ipv6Key.ipv6Dst;
+        ctx->key.nw_proto = flowKey->ipv6Key.nwProto;
+
+        ctx->key.src.port = flowKey->ipv6Key.l4.tpSrc;
+        ctx->key.dst.port = flowKey->ipv6Key.l4.tpDst;
+    }
+
+    /* Related bit is set for ICMP and FTP (Not supported)*/
+    ctx->related = FALSE;
+
+    hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
+    hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
+    hash = hsrc ^ hdst; /* TO identify reverse traffic */
+    ctx->hash = OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
+                              ((uint32_t *) (&ctx->key + 1) -
+                              (uint32_t *) (&ctx->key.dst + 1)),
+                              hash);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsProcessConntrackEntry
+ *     Check the TCP flags and set the ct_state of the entry
+ *----------------------------------------------------------------------------
+ */
+static __inline POVS_CT_ENTRY
+OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
+                         const TCPHdr *tcp,
+                         OvsConntrackKeyLookupCtx *ctx,
+                         OvsFlowKey *key,
+                         UINT16 zone,
+                         BOOLEAN commit,
+                         UINT64 currentTime)
+{
+    POVS_CT_ENTRY entry = ctx->entry;
+    UINT32 state = 0;
+
+    /* If an entry was found, update the state based on TCP flags */
+    if (ctx->related) {
+        state |= OVS_CS_F_RELATED;
+        if (ctx->reply) {
+            state = OVS_CS_F_REPLY_DIR;
+        }
+    } else {
+        CT_UPDATE_RES result;
+        result = OvsConntrackUpdateTcpEntry(entry, tcp, curNbl,
+                                            ctx->reply, currentTime);
+        switch (result) {
+        case CT_UPDATE_VALID:
+            state |= OVS_CS_F_ESTABLISHED;
+            if (ctx->reply) {
+                state |= OVS_CS_F_REPLY_DIR;
+            }
+            break;
+        case CT_UPDATE_INVALID:
+            state |= OVS_CS_F_INVALID;
+            break;
+        case CT_UPDATE_NEW:
+            //Delete and update the Conntrack
+            OvsCtEntryDelete(ctx->entry);
+            ctx->entry = NULL;
+            entry = OvsCtEntryCreate(tcp, curNbl, ctx, key,
+                                     commit, currentTime);
+            break;
+        }
+    }
+    /* Copy mark and label from entry into flowKey. If actions specify
+       different mark and label, update the flowKey. */
+    OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
+    return entry;
+}
+
+static __inline VOID
+OvsConntrackSetMark(OvsFlowKey *key,
+                    POVS_CT_ENTRY entry,
+                    UINT32 value,
+                    UINT32 mask)
+{
+    UINT32 newMark;
+    newMark = value | (entry->mark & ~(mask));
+    if (entry->mark != newMark) {
+        entry->mark = newMark;
+        key->ct.mark = newMark;
+    }
+}
+
+static __inline void
+OvsConntrackSetLabels(OvsFlowKey *key,
+                      POVS_CT_ENTRY entry,
+                      struct ovs_key_ct_labels *val,
+                      struct ovs_key_ct_labels *mask)
+{
+    ovs_u128 v, m, pktMdLabel;
+    memcpy(&v, val, sizeof v);
+    memcpy(&m, mask, sizeof m);
+
+    pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
+    pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
+
+    NdisMoveMemory(&entry->labels, &pktMdLabel,
+                   sizeof(struct ovs_key_ct_labels));
+    NdisMoveMemory(&key->ct.labels, &pktMdLabel,
+                   sizeof(struct ovs_key_ct_labels));
+}
+
+static __inline NDIS_STATUS
+OvsCtExecute_(PNET_BUFFER_LIST curNbl,
+              OvsFlowKey *key,
+              OVS_PACKET_HDR_INFO *layers,
+              BOOLEAN commit,
+              UINT16 zone,
+              MD_MARK *mark,
+              MD_LABELS *labels)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    POVS_CT_ENTRY entry = NULL;
+    OvsConntrackKeyLookupCtx ctx = { 0 };
+    TCPHdr tcpStorage;
+    UINT64 currentTime;
+    LOCK_STATE_EX lockState;
+    const TCPHdr *tcp;
+    tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
+    NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
+
+    /* Retrieve the Conntrack Key related fields from packet */
+    OvsCtSetupLookupCtx(key, zone, &ctx);
+
+    NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
+
+    /* Lookup Conntrack entries for a matching entry */
+    entry = OvsCtLookup(&ctx);
+
+    if (!entry) {
+        /* If no matching entry was found, create one and add New state */
+        entry = OvsCtEntryCreate(tcp, curNbl, &ctx,
+                                 key, commit, currentTime);
+    } else {
+        /* Process the entry and update CT flags */
+        entry = OvsProcessConntrackEntry(curNbl, tcp, &ctx, key,
+                                         zone, commit, currentTime);
+    }
+
+    if (entry && mark) {
+        OvsConntrackSetMark(key, entry, mark->value, mark->mask);
+    }
+
+    if (entry && labels) {
+        OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask);
+    }
+
+    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+
+    return status;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsExecuteConntrackAction
+ *     Executes Conntrack actions XXX - Add more
+ *---------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+                          OVS_PACKET_HDR_INFO *layers,
+                          OvsFlowKey *key,
+                          const PNL_ATTR a)
+{
+    PNL_ATTR ctAttr;
+    BOOLEAN commit = FALSE;
+    UINT16 zone = 0;
+    MD_MARK *mark = NULL;
+    MD_LABELS *labels = NULL;
+    NDIS_STATUS status;
+
+    status = OvsDetectCtPacket(key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        return status;
+    }
+
+    ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
+    if (ctAttr) {
+        zone = NlAttrGetU16(ctAttr);
+    }
+    ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
+    if (ctAttr) {
+        commit = TRUE;
+    }
+    ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
+    if (ctAttr) {
+        mark = NlAttrGet(ctAttr);
+    }
+    ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
+    if (ctAttr) {
+        labels = NlAttrGet(ctAttr);
+    }
+
+    status = OvsCtExecute_(curNbl, key, layers,
+                           commit, zone, mark, labels);
+    return status;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsConntrackEnrtyCleaner
+ *     Runs periodically and cleans up the connection tracker
+ *----------------------------------------------------------------------------
+ */
+VOID
+ovsConntrackEntryCleaner(PVOID data)
+{
+
+    POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
+    PLIST_ENTRY link, next;
+    POVS_CT_ENTRY entry;
+    BOOLEAN success = TRUE;
+
+    while (success) {
+        LOCK_STATE_EX lockState;
+        NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
+        if (context->exit) {
+            NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+            break;
+        }
+
+        /* Set the timeout for the thread and cleanup */
+        UINT64 currentTime, threadSleepTimeout;
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
+
+        for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+            LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
+                entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
+                if (entry->expiration < currentTime) {
+                    OvsCtEntryDelete(entry);
+                }
+            }
+        }
+
+        NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+        KeWaitForSingleObject(&context->event, Executive, KernelMode,
+                              FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
+    }
+
+    PsTerminateSystemThread(STATUS_SUCCESS);
+}
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
new file mode 100644
index 0000000..3a73f21
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015, 2016 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_CONNTRACK_H_
+#define __OVS_CONNTRACK_H_ 1
+
+#include "precomp.h"
+#include "Flow.h"
+
+struct ct_addr {
+    union {
+        ovs_be32 ipv4;
+        struct in6_addr ipv6;
+        uint32_t ipv4_aligned;
+        struct in6_addr ipv6_aligned;
+    };
+};
+
+struct ct_endpoint {
+    struct ct_addr addr;
+    ovs_be16 port;
+    UINT16 pad;
+};
+
+typedef enum CT_UPDATE_RES {
+    CT_UPDATE_INVALID,
+    CT_UPDATE_VALID,
+    CT_UPDATE_NEW,
+} CT_UPDATE_RES;
+
+/* Metadata mark for masked write to conntrack mark */
+typedef struct MD_MARK {
+    UINT32 value;
+    UINT32 mask;
+} MD_MARK;
+
+/* Metadata label for masked write to conntrack label. */
+typedef struct MD_LABELS {
+    struct ovs_key_ct_labels value;
+    struct ovs_key_ct_labels mask;
+} MD_LABELS;
+
+typedef struct _OVS_CT_KEY {
+    struct ct_endpoint src;
+    struct ct_endpoint dst;
+    UINT16 dl_type;
+    UINT8 nw_proto;
+    UINT16 zone;
+} OVS_CT_KEY, *POVS_CT_KEY;
+
+typedef struct OVS_CT_ENTRY {
+    OVS_CT_KEY  key;
+    OVS_CT_KEY  rev_key;
+    UINT64      expiration;
+    LIST_ENTRY  link;
+    UINT32      mark;
+    struct ovs_key_ct_labels labels;
+} OVS_CT_ENTRY, *POVS_CT_ENTRY;
+
+typedef struct OvsConntrackKeyLookupCtx {
+    OVS_CT_KEY      key;
+    POVS_CT_ENTRY   entry;
+    UINT32          hash;
+    BOOLEAN         reply;
+    BOOLEAN         related;
+} OvsConntrackKeyLookupCtx;
+
+#define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
+#define CT_ENTRY_TIMEOUT (2 * 600000000)   // 2m
+#define CT_CLEANUP_INTERVAL (2 * 600000000) // 2m
+
+VOID OvsCleanupConntrack(VOID);
+NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
+
+NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
+                                      OVS_PACKET_HDR_INFO *layers,
+                                      OvsFlowKey *key,
+                                      const PNL_ATTR a);
+BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
+OVS_CT_ENTRY * OvsNewTcpConntrack(const TCPHdr *tcp,
+                                  PNET_BUFFER_LIST nbl,
+                                  UINT64 now);
+enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
+                                              const TCPHdr *tcp,
+                                              PNET_BUFFER_LIST nbl,
+                                              BOOLEAN reply,
+                                              UINT64 now);
+#endif /* __OVS_CONNTRACK_H_ */
\ No newline at end of file
diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h
index 45f3c49..e5ed963 100644
--- a/datapath-windows/ovsext/Debug.h
+++ b/datapath-windows/ovsext/Debug.h
@@ -40,6 +40,7 @@
 #define OVS_DBG_NETLINK  BIT32(20)
 #define OVS_DBG_TUNFLT   BIT32(21)
 #define OVS_DBG_STT      BIT32(22)
+#define OVS_DBG_CONTRK   BIT32(23)
 
 #define OVS_DBG_RESERVED BIT32(31)
 //Please add above OVS_DBG_RESERVED.
diff --git a/datapath-windows/ovsext/DpInternal.h b/datapath-windows/ovsext/DpInternal.h
index d26833f..a3ce311 100644
--- a/datapath-windows/ovsext/DpInternal.h
+++ b/datapath-windows/ovsext/DpInternal.h
@@ -167,6 +167,13 @@ typedef __declspec(align(8)) struct OvsFlowKey {
     };
     UINT32 recircId;             /* Recirculation ID.  */
     UINT32 dpHash;               /* Datapath calculated hash value. */
+    struct {
+        /* Connection tracking fields. */
+        UINT16 zone;
+        UINT32 mark;
+        UINT32 state;
+        struct ovs_key_ct_labels labels;
+    } ct;                        /* Connection Tracking Flags */
 } OvsFlowKey;
 
 #define OVS_WIN_TUNNEL_KEY_SIZE (sizeof (OvsIPv4TunnelKey))
diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c
index f74ce12..a7e9bd2 100644
--- a/datapath-windows/ovsext/Flow.c
+++ b/datapath-windows/ovsext/Flow.c
@@ -172,7 +172,17 @@ const NL_POLICY nlFlowKeyPolicy[] = {
                               .maxLen = 4, .optional = TRUE},
     [OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
                                 .maxLen = 4, .optional = TRUE},
-    [OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE}
+    [OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE},
+    [OVS_KEY_ATTR_CT_STATE] = {.type = NL_A_UNSPEC, .minLen = 4,
+                               .maxLen = 4, .optional = TRUE},
+    [OVS_KEY_ATTR_CT_ZONE] = {.type = NL_A_UNSPEC, .minLen = 2,
+                              .maxLen = 2, .optional = TRUE},
+    [OVS_KEY_ATTR_CT_MARK] = {.type = NL_A_UNSPEC, .minLen = 4,
+                              .maxLen = 4, .optional = TRUE},
+    [OVS_KEY_ATTR_CT_LABELS] = {.type = NL_A_UNSPEC,
+                                .minLen = sizeof(struct ovs_key_ct_labels),
+                                .maxLen = sizeof(struct ovs_key_ct_labels),
+                                .optional = TRUE}
 };
 const UINT32 nlFlowKeyPolicyLen = ARRAY_SIZE(nlFlowKeyPolicy);
 
@@ -229,7 +239,8 @@ const NL_POLICY nlFlowActionPolicy[] = {
                               .maxLen = sizeof(struct ovs_action_hash),
                               .optional = TRUE},
     [OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
-    [OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE}
+    [OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE},
+    [OVS_ACTION_ATTR_CT] = {.type = NL_A_VAR_LEN, .optional = TRUE}
 };
 
 /*
@@ -850,6 +861,28 @@ MapFlowKeyToNlKey(PNL_BUFFER nlBuf,
         goto done;
     }
 
+    if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_STATE,
+                         flowKey->ct.state)) {
+        rc = STATUS_UNSUCCESSFUL;
+        goto done;
+    }
+    if (!NlMsgPutTailU16(nlBuf, OVS_KEY_ATTR_CT_ZONE,
+                         flowKey->ct.zone)) {
+        rc = STATUS_UNSUCCESSFUL;
+        goto done;
+    }
+    if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_MARK,
+                         flowKey->ct.mark)) {
+        rc = STATUS_UNSUCCESSFUL;
+        goto done;
+    }
+    if (!NlMsgPutTailUnspec(nlBuf, OVS_KEY_ATTR_CT_LABELS,
+                            (PCHAR)(&flowKey->ct.labels),
+                            sizeof(struct ovs_key_ct_labels))) {
+        rc = STATUS_UNSUCCESSFUL;
+        goto done;
+    }
+
     if (flowKey->dpHash) {
         if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_DP_HASH,
                              flowKey->dpHash)) {
@@ -1386,6 +1419,24 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
         destKey->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
     }
 
+    if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
+        destKey->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
+        destKey->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
+        destKey->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
+        const struct ovs_key_ct_labels *ct_labels;
+        ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
+        RtlCopyMemory(&destKey->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
+    }
+
     /* ===== L2 headers ===== */
     destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
 
@@ -1774,6 +1825,24 @@ OvsGetFlowMetadata(OvsFlowKey *key,
         key->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
     }
 
+    if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
+        key->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
+        key->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
+        key->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
+    }
+
+    if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
+        const struct ovs_key_ct_labels *ct_labels;
+        ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
+        RtlCopyMemory(&key->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
+    }
+
     return status;
 }
 
@@ -2059,6 +2128,11 @@ FlowEqual(OvsFlow *srcFlow,
             srcFlow->key.l2.val == dstKey->l2.val &&
             srcFlow->key.recircId == dstKey->recircId &&
             srcFlow->key.dpHash == dstKey->dpHash &&
+            srcFlow->key.ct.state == dstKey->ct.state &&
+            srcFlow->key.ct.zone == dstKey->ct.zone &&
+            srcFlow->key.ct.mark == dstKey->ct.mark &&
+            !memcmp(&srcFlow->key.ct.labels, &dstKey->ct.labels,
+                    sizeof(struct ovs_key_ct_labels)) &&
             FlowMemoryEqual((UINT64 *)((UINT8 *)&srcFlow->key + offset),
                             (UINT64 *) dstStart,
                             size));
@@ -2156,6 +2230,21 @@ OvsLookupFlow(OVS_DATAPATH *datapath,
         if (key->dpHash) {
             *hash = OvsJhashWords((UINT32*)hash, 1, key->dpHash);
         }
+        if (key->ct.state) {
+            *hash = OvsJhashWords((UINT32*)hash, 1, key->ct.state);
+        }
+        if (key->ct.zone) {
+            *hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
+        }
+        if (key->ct.mark) {
+            *hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
+        }
+        if (key->ct.labels.ct_labels) {
+            UINT32 lblHash = OvsJhashBytes(&key->ct.labels,
+                                           sizeof(struct ovs_key_ct_labels),
+                                           0);
+            *hash = OvsJhashWords((UINT32*)hash, 1, lblHash);
+        }
     }
 
     head = &datapath->flowTable[HASH_BUCKET(*hash)];
@@ -2322,6 +2411,12 @@ ReportFlowInfo(OvsFlow *flow,
 
     info->key.recircId = flow->key.recircId;
     info->key.dpHash = flow->key.dpHash;
+    info->key.ct.state = flow->key.ct.state;
+    info->key.ct.zone = flow->key.ct.zone;
+    info->key.ct.mark = flow->key.ct.mark;
+    NdisMoveMemory(&info->key.ct.labels,
+                   &flow->key.ct.labels,
+                   sizeof(struct ovs_key_ct_labels));
 
     return status;
 }
@@ -2578,6 +2673,10 @@ OvsFlowKeyAttrSize(void)
          + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_SKB_MARK */
          + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_DP_HASH */
          + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_RECIRC_ID */
+         + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_CT_STATE */
+         + NlAttrTotalSize(2)   /* OVS_KEY_ATTR_CT_ZONE */
+         + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_CT_MARK */
+         + NlAttrTotalSize(16)  /* OVS_KEY_ATTR_CT_LABELS */
          + NlAttrTotalSize(12)  /* OVS_KEY_ATTR_ETHERNET */
          + NlAttrTotalSize(2)   /* OVS_KEY_ATTR_ETHERTYPE */
          + NlAttrTotalSize(4)   /* OVS_KEY_ATTR_VLAN */
@@ -2657,6 +2756,31 @@ OvsProbeSupportedFeature(POVS_MESSAGE msgIn,
             OVS_LOG_ERROR("Invalid recirculation ID.");
             status = STATUS_INVALID_PARAMETER;
         }
+    } else if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
+        UINT32 state = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]);
+        if (state & OVS_CS_F_DST_NAT || state & OVS_CS_F_SRC_NAT) {
+            status = STATUS_INVALID_PARAMETER;
+            OVS_LOG_ERROR("Contrack NAT is not supported:%d", state);
+        }
+    } else if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
+        UINT16 zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
+        if (!zone) {
+            OVS_LOG_ERROR("Invalid zone specified.");
+            status = STATUS_INVALID_PARAMETER;
+        }
+    } else if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
+        UINT32 mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
+        if (!mark) {
+            OVS_LOG_ERROR("Invalid ct mark specified.");
+            status = STATUS_INVALID_PARAMETER;
+        }
+    } else if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
+        const struct ovs_key_ct_labels *ct_labels;
+        ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
+        if (!ct_labels->ct_labels) {
+            OVS_LOG_ERROR("Invalid ct label specified.");
+            status = STATUS_INVALID_PARAMETER;
+        }
     } else {
         OVS_LOG_ERROR("Feature not supported.");
         status = STATUS_INVALID_PARAMETER;
diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c
index 77bafb4..7ad2e98 100644
--- a/datapath-windows/ovsext/Switch.c
+++ b/datapath-windows/ovsext/Switch.c
@@ -20,7 +20,7 @@
  */
 
 #include "precomp.h"
-
+#include "Conntrack.h"
 #include "Switch.h"
 #include "Vport.h"
 #include "Event.h"
@@ -218,6 +218,13 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
         goto create_switch_done;
     }
 
+    status = OvsInitConntrack(switchContext);
+    if (status != STATUS_SUCCESS) {
+        OvsUninitSwitchContext(switchContext);
+        OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
+        goto create_switch_done;
+    }
+
     *switchContextOut = switchContext;
 
 create_switch_done:
@@ -249,6 +256,7 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
     OvsDeleteSwitch(switchContext);
     OvsCleanupIpHelper();
     OvsCleanupSttDefragmentation();
+    OvsCleanupConntrack();
     /* This completes the cleanup, and a new attach can be handled now. */
 
     OVS_LOG_TRACE("Exit: OvsDetach Successfully");
diff --git a/datapath-windows/ovsext/Types.h b/datapath-windows/ovsext/Types.h
index 5d2744b..022c65b 100644
--- a/datapath-windows/ovsext/Types.h
+++ b/datapath-windows/ovsext/Types.h
@@ -28,6 +28,12 @@ typedef uint64 __u64, __be64;
 typedef uint32 __u32, __be32;
 typedef uint16 __u16, __be16;
 typedef uint8 __u8;
+typedef union ovs_u128 {
+    uint32_t u32[4];
+    struct {
+        uint64_t lo, hi;
+    } u64;
+} ovs_u128;
 
 /* Defines the  userspace specific data types for file
  * included within kernel only. */
diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
index 038754d..288ea51 100644
--- a/datapath-windows/ovsext/Util.h
+++ b/datapath-windows/ovsext/Util.h
@@ -37,6 +37,7 @@
 #define OVS_GRE_POOL_TAG                'GSVO'
 #define OVS_TUNFLT_POOL_TAG             'WSVO'
 #define OVS_RECIRC_POOL_TAG             'CSVO'
+#define OVS_CT_POOL_TAG                 'CTVO'
 
 VOID *OvsAllocateMemory(size_t size);
 VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
@@ -68,7 +69,7 @@ VOID OvsFreeAlignedMemory(VOID *ptr);
 
 VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
 
-
+#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
 #define MIN(_a, _b) ((_a) > (_b) ? (_b) : (_a))
 #define ARRAY_SIZE(_x)  ((sizeof(_x))/sizeof (_x)[0])
 #define OVS_SWITCH_PORT_ID_INVALID  (NDIS_SWITCH_PORT_ID)(-1)
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index af718f7..0356ddf 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -74,6 +74,7 @@
     <ClInclude Include="Actions.h" />
     <ClInclude Include="Atomic.h" />
     <ClInclude Include="BufferMgmt.h" />
+    <ClInclude Include="Conntrack.h" />
     <ClInclude Include="Datapath.h" />
     <ClInclude Include="Debug.h" />
     <ClInclude Include="DpInternal.h" />
@@ -175,6 +176,8 @@
   <ItemGroup>
     <ClCompile Include="Actions.c" />
     <ClCompile Include="BufferMgmt.c" />
+    <ClCompile Include="Conntrack-tcp.c" />
+    <ClCompile Include="Conntrack.c" />
     <ClCompile Include="Debug.c" />
     <ClCompile Include="Driver.c" />
     <ClCompile Include="Event.c" />
diff --git a/debian/copyright.in b/debian/copyright.in
index bfac3c6..57d007a 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -89,6 +89,7 @@ License:
 
 	lib/getopt_long.c
 	include/windows/getopt.h
+	datapath-windows/ovsext/Conntrack-tcp.c
 
 * The following files are licensed under the 3-clause BSD-license
 
-- 
2.5.0.windows.1




More information about the dev mailing list