[ovs-dev] [PATCH 3/3] datapath-windows: Add GRE TEB support for windows datapath

Alin Serdean aserdean at cloudbasesolutions.com
Wed Dec 2 17:50:50 UTC 2015


This patch introduces the support for GRE TEB (trasparent ethernet bridging)
for the windows datapath.

The GRE support is based on http://tools.ietf.org/html/rfc2890 and supports
only the GRE protocol type 6558 (trasparent ethernet bridging) like its linux
counterpart.

Util.h: define the GRE pool tag
Vport.c/h: sort the includes alphabetically
           add the function OvsFindTunnelVportByPortType which searches the
           tunnelVportsArray for a given port type
Actions.c : sort the includes alphabetically
            call the GRE encapsulation / decapsulation functions when needed
Gre.c/h : add GRE type defines
          add initialization/cleanup functions
          add encapsulation / decapsulation functions with software offloads
          (hardware offloads will be added in a separate patch) with LSO(TSO)
          support

Tested using: PSPING
              (https://technet.microsoft.com/en-us/sysinternals/psping.aspx)
              (ICMP, TCP, UDP) with various packet lengths
              IPERF3
              (https://iperf.fr/iperf-download.php)
              (TCP, UDP) with various options

Signed-off-by: Alin Gabriel Serdean <aserdean at cloudbasesolutions.com>
---
 datapath-windows/ovsext/Actions.c      |  71 +++--
 datapath-windows/ovsext/Gre.c          | 456 +++++++++++++++++++++++++++++++++
 datapath-windows/ovsext/Gre.h          | 113 ++++++++
 datapath-windows/ovsext/Util.h         |   1 +
 datapath-windows/ovsext/Vport.c        |  43 +++-
 datapath-windows/ovsext/Vport.h        |  14 +-
 datapath-windows/ovsext/ovsext.vcxproj |   2 +
 7 files changed, 665 insertions(+), 35 deletions(-)
 create mode 100644 datapath-windows/ovsext/Gre.c
 create mode 100644 datapath-windows/ovsext/Gre.h

diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index e902983..6b2a191 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -16,16 +16,17 @@
 
 #include "precomp.h"
 
-#include "Switch.h"
-#include "Vport.h"
+#include "Checksum.h"
 #include "Event.h"
-#include "User.h"
-#include "NetProto.h"
 #include "Flow.h"
-#include "Vxlan.h"
-#include "Stt.h"
-#include "Checksum.h"
+#include "Gre.h"
+#include "NetProto.h"
 #include "PacketIO.h"
+#include "Stt.h"
+#include "Switch.h"
+#include "User.h"
+#include "Vport.h"
+#include "Vxlan.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
@@ -34,6 +35,8 @@
 #include "Debug.h"
 
 typedef struct _OVS_ACTION_STATS {
+    UINT64 rxGre;
+    UINT64 txGre;
     UINT64 rxVxlan;
     UINT64 txVxlan;
     UINT64 rxStt;
@@ -205,27 +208,35 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
     /* XXX: we should also check for the length of the UDP payload to pick
      * packets only if they are at least VXLAN header size.
      */
-    if (!flowKey->ipKey.nwFrag &&
-        flowKey->ipKey.nwProto == IPPROTO_UDP) {
-        UINT16 dstPort = ntohs(flowKey->ipKey.l4.tpDst);
-        tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
-                                                  dstPort,
-                                                  OVS_VPORT_TYPE_VXLAN);
-        if (tunnelVport) {
-            ovsActionStats.rxVxlan++;
-        }
-    } else if (!flowKey->ipKey.nwFrag &&
-                flowKey->ipKey.nwProto == IPPROTO_TCP) {
+    if (!flowKey->ipKey.nwFrag) {
         UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
-        tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
-                                                  dstPort,
-                                                  OVS_VPORT_TYPE_STT);
-        if (tunnelVport) {
-            ovsActionStats.rxStt++;
+        switch (flowKey->ipKey.nwProto) {
+        case IPPROTO_GRE:
+            tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext,
+                                                       OVS_VPORT_TYPE_GRE);
+            if (tunnelVport) {
+                ovsActionStats.rxGre++;
+            }
+            break;
+        case IPPROTO_TCP:
+            tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
+                                                      dstPort,
+                                                      OVS_VPORT_TYPE_STT);
+            if (tunnelVport) {
+                ovsActionStats.rxStt++;
+            }
+            break;
+        case IPPROTO_UDP:
+            tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
+                                                      dstPort,
+                                                      OVS_VPORT_TYPE_VXLAN);
+            if (tunnelVport) {
+                ovsActionStats.rxVxlan++;
+            }
+            break;
         }
     }
 
-
     // We might get tunnel packets even before the tunnel gets initialized.
     if (tunnelVport) {
         ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
@@ -306,6 +317,9 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
         /* Tunnel the packet only if tunnel context is set. */
         if (ovsFwdCtx->tunKey.dst != 0) {
             switch(dstVport->ovsType) {
+            case OVS_VPORT_TYPE_GRE:
+                ovsActionStats.txGre++;
+                break;
             case OVS_VPORT_TYPE_VXLAN:
                 ovsActionStats.txVxlan++;
                 break;
@@ -652,6 +666,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
 
     /* Do the encap. Encap function does not consume the NBL. */
     switch(ovsFwdCtx->tunnelTxNic->ovsType) {
+    case OVS_VPORT_TYPE_GRE:
+        status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
+                             &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
+                             &ovsFwdCtx->layers, &newNbl);
+        break;
     case OVS_VPORT_TYPE_VXLAN:
         status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
                                &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
@@ -724,6 +743,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
      */
 
     switch(tunnelRxVport->ovsType) {
+    case OVS_VPORT_TYPE_GRE:
+        status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+                             &ovsFwdCtx->tunKey, &newNbl);
+        break;
     case OVS_VPORT_TYPE_VXLAN:
         status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
                                &ovsFwdCtx->tunKey, &newNbl);
diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c
new file mode 100644
index 0000000..de914be
--- /dev/null
+++ b/datapath-windows/ovsext/Gre.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2015 Cloudbase Solutions Srl
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "Atomic.h"
+#include "Checksum.h"
+#include "Flow.h"
+#include "Gre.h"
+#include "IpHelper.h"
+#include "NetProto.h"
+#include "PacketIO.h"
+#include "PacketParser.h"
+#include "Switch.h"
+#include "User.h"
+#include "Util.h"
+#include "Vport.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_GRE
+#include "Debug.h"
+
+static NDIS_STATUS
+OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
+              const OvsIPv4TunnelKey *tunKey,
+              const POVS_FWD_INFO fwdInfo,
+              POVS_PACKET_HDR_INFO layers,
+              POVS_SWITCH_CONTEXT switchContext,
+              PNET_BUFFER_LIST *newNbl);
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsInitGreTunnel --
+ *    Initialize GRE tunnel module.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsInitGreTunnel(POVS_VPORT_ENTRY vport,
+                 UINT16 udpDestPort)
+{
+    POVS_GRE_VPORT grePort;
+
+    grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort),
+                                                       OVS_GRE_POOL_TAG);
+    if (!grePort) {
+        OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    RtlZeroMemory(grePort, sizeof(*grePort));
+    grePort->dstPort = udpDestPort;
+    vport->priv = (PVOID)grePort;
+    return STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCleanupGreTunnel --
+ *    Cleanup GRE Tunnel module.
+ * --------------------------------------------------------------------------
+ */
+void
+OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport)
+{
+    if (vport->ovsType != OVS_VPORT_TYPE_GRE ||
+        vport->priv == NULL) {
+        return;
+    }
+
+    OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG);
+    vport->priv = NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsEncapGre --
+ *     Encapsulates a packet with an GRE header.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsEncapGre(POVS_VPORT_ENTRY vport,
+            PNET_BUFFER_LIST curNbl,
+            OvsIPv4TunnelKey *tunKey,
+            POVS_SWITCH_CONTEXT switchContext,
+            POVS_PACKET_HDR_INFO layers,
+            PNET_BUFFER_LIST *newNbl)
+{
+    OVS_FWD_INFO fwdInfo;
+    NDIS_STATUS status;
+
+    UNREFERENCED_PARAMETER(switchContext);
+    status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
+    if (status != STATUS_SUCCESS) {
+        OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
+        return NDIS_STATUS_FAILURE;
+    }
+
+    status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers,
+                           switchContext, newNbl);
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsDoEncapGre --
+ *    Internal utility function which actually does the GRE encap.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsDoEncapGre(POVS_VPORT_ENTRY vport,
+              PNET_BUFFER_LIST curNbl,
+              const OvsIPv4TunnelKey *tunKey,
+              const POVS_FWD_INFO fwdInfo,
+              POVS_PACKET_HDR_INFO layers,
+              POVS_SWITCH_CONTEXT switchContext,
+              PNET_BUFFER_LIST *newNbl)
+{
+    NDIS_STATUS status;
+    PNET_BUFFER curNb;
+    PMDL curMdl;
+    PUINT8 bufferStart;
+    EthHdr *ethHdr;
+    IPHdr *ipHdr;
+    PGREHdr greHdr;
+    POVS_GRE_VPORT vportGre;
+    UINT32 headRoom = GreTunHdrSize(tunKey->flags);
+#if DBG
+    UINT32 counterHeadRoom;
+#endif
+    UINT32 packetLength;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+    packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+
+    if (layers->isTcp) {
+        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
+
+        tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+                                             TcpLargeSendNetBufferListInfo);
+        OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS,
+                      packetLength);
+        if (tsoInfo.LsoV1Transmit.MSS) {
+            OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
+            *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
+                                       tsoInfo.LsoV1Transmit.MSS, headRoom);
+            if (*newNbl == NULL) {
+                OVS_LOG_ERROR("Unable to segment NBL");
+                return NDIS_STATUS_FAILURE;
+            }
+            /* Clear out LSO flags after this point */
+            NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
+        }
+    }
+
+    vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport);
+    ASSERT(vportGre);
+
+    /* If we didn't split the packet above, make a copy now */
+    if (*newNbl == NULL) {
+        *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+                                    FALSE /*NBL info*/);
+        if (*newNbl == NULL) {
+            OVS_LOG_ERROR("Unable to copy NBL");
+            return NDIS_STATUS_FAILURE;
+        }
+        /*
+         * To this point we do not have VXLAN offloading.
+         * Apply defined checksums
+         */
+        curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+        curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+        bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+                                                           LowPagePriority);
+        if (!bufferStart) {
+            status = NDIS_STATUS_RESOURCES;
+            goto ret_error;
+        }
+
+        NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+        csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+                                              TcpIpChecksumNetBufferListInfo);
+
+        bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+
+        if (layers->isIPv4) {
+            IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
+
+            if (csumInfo.Transmit.IpHeaderChecksum) {
+                ip->check = 0;
+                ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
+            }
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+        } else if (layers->isIPv6) {
+            IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+                                                (UINT32 *) &ip->daddr,
+                                                IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+                                                (UINT32 *) &ip->daddr,
+                                                IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+        }
+        /* Clear out TcpIpChecksumNetBufferListInfo flag */
+        NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
+    }
+
+    curNbl = *newNbl;
+    for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
+         curNb = curNb->Next) {
+#if DBG
+        counterHeadRoom = headRoom;
+#endif
+        status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
+        if (status != NDIS_STATUS_SUCCESS) {
+            goto ret_error;
+        }
+
+        curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+        bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+                                                           LowPagePriority);
+        if (!bufferStart) {
+            status = NDIS_STATUS_RESOURCES;
+            goto ret_error;
+        }
+
+        bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+        if (NET_BUFFER_NEXT_NB(curNb)) {
+            OVS_LOG_TRACE("nb length %u next %u",
+                          NET_BUFFER_DATA_LENGTH(curNb),
+                          NET_BUFFER_DATA_LENGTH(curNb->Next));
+        }
+
+        /* L2 header */
+        ethHdr = (EthHdr *)bufferStart;
+        ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
+               (PCHAR)&fwdInfo->srcMacAddr);
+        NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+                       sizeof ethHdr->Destination + sizeof ethHdr->Source);
+        ethHdr->Type = htons(ETH_TYPE_IPV4);
+#if DBG
+        counterHeadRoom -= sizeof *ethHdr;
+#endif
+
+        /* IP header */
+        ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+
+        ipHdr->ihl = sizeof *ipHdr / 4;
+        ipHdr->version = IPPROTO_IPV4;
+        ipHdr->tos = tunKey->tos;
+        ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
+        ipHdr->id = (uint16)atomic_add64(&vportGre->ipId,
+                                         NET_BUFFER_DATA_LENGTH(curNb));
+        ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
+                          IP_DF_NBO : 0;
+        ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64;
+        ipHdr->protocol = IPPROTO_GRE;
+        ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
+        ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
+        ipHdr->saddr = fwdInfo->srcIpAddr;
+        ipHdr->daddr = fwdInfo->dstIpAddr;
+
+        ipHdr->check = 0;
+        ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
+#if DBG
+        counterHeadRoom -= sizeof *ipHdr;
+#endif
+
+        /* GRE header */
+        greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+        greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags);
+        greHdr->protocolType = GRE_NET_TEB;
+#if DBG
+        counterHeadRoom -= sizeof *greHdr;
+#endif
+
+        PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
+
+        if (tunKey->flags & OVS_TNL_F_CSUM) {
+            RtlZeroMemory(currentOffset, 4);
+            currentOffset += 4;
+#if DBG
+            counterHeadRoom -= 4;
+#endif
+        }
+
+        if (tunKey->flags & OVS_TNL_F_KEY) {
+            RtlZeroMemory(currentOffset, 4);
+            UINT32 key = (tunKey->tunnelId >> 32);
+            RtlCopyMemory(currentOffset, &key, sizeof key);
+            currentOffset += 4;
+#if DBG
+            counterHeadRoom -= 4;
+#endif
+        }
+
+        if (tunKey->flags & OVS_TNL_F_SEQ) {
+            RtlZeroMemory(currentOffset, 4);
+            currentOffset += 4;
+#if DBG
+            counterHeadRoom -= 4;
+#endif
+        }
+
+#if DBG
+        ASSERT(counterHeadRoom == 0);
+#endif
+
+    }
+    return STATUS_SUCCESS;
+
+ret_error:
+    OvsCompleteNBL(switchContext, *newNbl, TRUE);
+    *newNbl = NULL;
+    return status;
+}
+
+NDIS_STATUS
+OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
+            PNET_BUFFER_LIST curNbl,
+            OvsIPv4TunnelKey *tunKey,
+            PNET_BUFFER_LIST *newNbl)
+{
+    PNET_BUFFER curNb;
+    PMDL curMdl;
+    EthHdr *ethHdr;
+    IPHdr *ipHdr;
+    GREHdr *greHdr;
+    UINT32 tunnelSize = 0, packetLength = 0;
+    UINT32 headRoom = 0;
+    PUINT8 bufferStart;
+    NDIS_STATUS status;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+    packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+    tunnelSize = GreTunHdrSize(tunKey->flags);
+    if (packetLength <= tunnelSize) {
+        return NDIS_STATUS_INVALID_LENGTH;
+    }
+
+    /*
+     * Create a copy of the NBL so that we have all the headers in one MDL.
+     */
+    *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
+                                tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
+                                TRUE /*copy NBL info */);
+
+    if (*newNbl == NULL) {
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    curNbl = *newNbl;
+    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+    curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+    bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
+                  NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+    if (!bufferStart) {
+        status = NDIS_STATUS_RESOURCES;
+        goto dropNbl;
+    }
+
+    ethHdr = (EthHdr *)bufferStart;
+    headRoom += sizeof *ethHdr;
+
+    ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+    tunKey->src = ipHdr->saddr;
+    tunKey->dst = ipHdr->daddr;
+    tunKey->tos = ipHdr->tos;
+    tunKey->ttl = ipHdr->ttl;
+    tunKey->pad = 0;
+    headRoom += sizeof *ipHdr;
+
+    greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+    headRoom += sizeof *greHdr;
+
+    /* Validate if GRE header protocol type. */
+    if (greHdr->protocolType != GRE_NET_TEB) {
+        status = STATUS_NDIS_INVALID_PACKET;
+        goto dropNbl;
+    }
+
+    PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
+
+    if (greHdr->flags & GRE_CSUM) {
+        tunKey->flags |= OVS_TNL_F_CSUM;
+        currentOffset += 4;
+        headRoom += 4;
+    }
+
+    if (greHdr->flags & GRE_KEY) {
+        tunKey->flags |= OVS_TNL_F_KEY;
+        UINT32 key = 0;
+        RtlCopyMemory(&key, currentOffset, 4);
+        tunKey->tunnelId = (UINT64)key << 32;
+        currentOffset += 4;
+        headRoom += 4;
+    }
+
+    if (greHdr->flags & GRE_SEQ) {
+        tunKey->flags |= OVS_TNL_F_SEQ;
+        currentOffset += 4;
+        headRoom += 4;
+    }
+
+    /* Clear out the receive flag for the inner packet. */
+    NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+    NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE,
+                                  NULL);
+    ASSERT(headRoom == GreTunHdrSize(tunKey->flags));
+    return NDIS_STATUS_SUCCESS;
+
+dropNbl:
+    OvsCompleteNBL(switchContext, *newNbl, TRUE);
+    *newNbl = NULL;
+    return status;
+}
diff --git a/datapath-windows/ovsext/Gre.h b/datapath-windows/ovsext/Gre.h
new file mode 100644
index 0000000..71ff05e
--- /dev/null
+++ b/datapath-windows/ovsext/Gre.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Cloudbase Solutions Srl
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRE_H_
+#define __GRE_H_ 1
+
+#include "NetProto.h"
+#include "Flow.h"
+
+typedef struct _OVS_GRE_VPORT {
+    UINT16 dstPort;
+    UINT64 inPkts;
+    UINT64 outPkts;
+    UINT64 slowInPkts;
+    UINT64 slowOutPkts;
+    UINT64 filterID;
+    UINT64 ipId;
+    /*
+    * To be filled
+    */
+} OVS_GRE_VPORT, *POVS_GRE_VPORT;
+
+
+/* GRE RFC 2890 header based on http://tools.ietf.org/html/rfc2890
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |C| |K|S| Reserved0       | Ver |         Protocol Type         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Checksum (optional)      |       Reserved1 (Optional)    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         Key (optional)                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                 Sequence Number (Optional)                    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+typedef struct GREHdr {
+    UINT16 flags;
+    UINT16 protocolType;
+} GREHdr, *PGREHdr;
+
+/* Transparent Ethernet Bridging */
+#define GRE_NET_TEB     0x5865
+/* GRE Flags*/
+#define GRE_CSUM    0x0080
+#define GRE_KEY     0x0020
+#define GRE_SEQ     0x0010
+
+NTSTATUS OvsInitGreTunnel(POVS_VPORT_ENTRY vport,
+                          UINT16 udpDestPort);
+
+VOID OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport);
+
+
+void OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport);
+
+NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport,
+                        PNET_BUFFER_LIST curNbl,
+                        OvsIPv4TunnelKey *tunKey,
+                        POVS_SWITCH_CONTEXT switchContext,
+                        POVS_PACKET_HDR_INFO layers,
+                        PNET_BUFFER_LIST *newNbl);
+
+NDIS_STATUS OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
+                        PNET_BUFFER_LIST curNbl,
+                        OvsIPv4TunnelKey *tunKey,
+                        PNET_BUFFER_LIST *newNbl);
+
+static __inline UINT16
+OvsTunnelFlagsToGreFlags(UINT16 tunnelflags)
+{
+    UINT16 flags = 0;
+
+    if (tunnelflags & OVS_TNL_F_CSUM)
+        flags |= GRE_CSUM;
+
+    if (tunnelflags & OVS_TNL_F_KEY)
+        flags |= GRE_KEY;
+
+    if (tunnelflags & OVS_TNL_F_SEQ)
+        flags |= GRE_SEQ;
+
+    return flags;
+}
+
+static __inline UINT32
+GreTunHdrSize(UINT16 flags)
+{
+    UINT32 sum = sizeof(EthHdr) + sizeof(IPHdr) + sizeof(GREHdr);
+    sum += (flags & OVS_TNL_F_CSUM) ?
+           4 : 0;
+    sum += (flags & OVS_TNL_F_KEY) ?
+           4 : 0;
+    sum += (flags & OVS_TNL_F_SEQ) ?
+           4 : 0;
+
+    return sum;
+}
+
+#endif /*__GRE_H_ */
diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
index e5ba72b..a81c723 100644
--- a/datapath-windows/ovsext/Util.h
+++ b/datapath-windows/ovsext/Util.h
@@ -34,6 +34,7 @@
 #define OVS_USER_POOL_TAG               'USVO'
 #define OVS_VPORT_POOL_TAG              'PSVO'
 #define OVS_STT_POOL_TAG                'RSVO'
+#define OVS_GRE_POOL_TAG                'GSVO'
 #define OVS_TUNFLT_POOL_TAG             'WSVO'
 
 VOID *OvsAllocateMemory(size_t size);
diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c
index a7576d3..11737a8 100644
--- a/datapath-windows/ovsext/Vport.c
+++ b/datapath-windows/ovsext/Vport.c
@@ -15,16 +15,18 @@
  */
 
 #include "precomp.h"
+
+#include "Datapath.h"
+#include "Event.h"
+#include "Gre.h"
+#include "IpHelper.h"
 #include "Jhash.h"
+#include "Oid.h"
+#include "Stt.h"
 #include "Switch.h"
-#include "Vport.h"
-#include "Event.h"
 #include "User.h"
+#include "Vport.h"
 #include "Vxlan.h"
-#include "Stt.h"
-#include "IpHelper.h"
-#include "Oid.h"
-#include "Datapath.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
@@ -700,6 +702,26 @@ OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext,
     return NULL;
 }
 
+POVS_VPORT_ENTRY
+OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext,
+                             OVS_VPORT_TYPE ovsPortType)
+{
+    POVS_VPORT_ENTRY vport;
+    PLIST_ENTRY head, link;
+    UINT16 dstPort = 0;
+    UINT32 hash = OvsJhashBytes((const VOID *)&dstPort, sizeof(dstPort),
+                                OVS_HASH_BASIS);
+    head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]);
+    LIST_FORALL(head, link) {
+        vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink);
+        if (vport->ovsType == ovsPortType) {
+            return vport;
+        }
+    }
+    return NULL;
+}
+
+
 
 POVS_VPORT_ENTRY
 OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext,
@@ -983,6 +1005,7 @@ OvsInitTunnelVport(PVOID userContext,
     vport->ovsState = OVS_STATE_PORT_CREATED;
     switch (ovsType) {
     case OVS_VPORT_TYPE_GRE:
+        status = OvsInitGreTunnel(vport, dstPort);
         break;
     case OVS_VPORT_TYPE_VXLAN:
     {
@@ -1153,6 +1176,7 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext,
     UINT32 hash;
 
     switch(vport->ovsType) {
+    case OVS_VPORT_TYPE_GRE:
     case OVS_VPORT_TYPE_VXLAN:
     case OVS_VPORT_TYPE_STT:
     {
@@ -1242,6 +1266,7 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext,
         OvsCleanupSttTunnel(vport);
         break;
     case OVS_VPORT_TYPE_GRE:
+        OvsCleanupGreTunnel(vport);
         break;
     case OVS_VPORT_TYPE_NETDEV:
         if (vport->isExternal) {
@@ -1299,7 +1324,8 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext,
         RemoveEntryList(&vport->portNoLink);
         InitializeListHead(&vport->portNoLink);
         if (OVS_VPORT_TYPE_VXLAN == vport->ovsType ||
-            OVS_VPORT_TYPE_STT == vport->ovsType) {
+            OVS_VPORT_TYPE_STT == vport->ovsType   ||
+            OVS_VPORT_TYPE_GRE == vport->ovsType) {
             RemoveEntryList(&vport->tunnelVportLink);
             InitializeListHead(&vport->tunnelVportLink);
         }
@@ -2190,6 +2216,9 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
             UINT16 transportPortDest = 0;
 
             switch (portType) {
+            case OVS_VPORT_TYPE_GRE:
+                OvsCleanupGreTunnel(vport);
+                break;
             case OVS_VPORT_TYPE_VXLAN:
                 transportPortDest = VXLAN_UDP_PORT;
                 break;
diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h
index e9f3b03..b11cf79 100644
--- a/datapath-windows/ovsext/Vport.h
+++ b/datapath-windows/ovsext/Vport.h
@@ -17,9 +17,10 @@
 #ifndef __VPORT_H_
 #define __VPORT_H_ 1
 
+#include "Gre.h"
+#include "Stt.h"
 #include "Switch.h"
 #include "VxLan.h"
-#include "Stt.h"
 
 #define OVS_MAX_DPPORTS             MAXUINT16
 #define OVS_DPPORT_NUMBER_INVALID   OVS_MAX_DPPORTS
@@ -147,6 +148,8 @@ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchConte
 POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext,
                                              UINT16 dstPort,
                                              OVS_VPORT_TYPE ovsVportType);
+POVS_VPORT_ENTRY OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext,
+                                              OVS_VPORT_TYPE ovsPortType);
 
 NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext);
 NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext);
@@ -256,16 +259,19 @@ GetPortFromPriv(POVS_VPORT_ENTRY vport)
     /* XXX would better to have a commom tunnel "parent" structure */
     ASSERT(vportPriv);
     switch(vport->ovsType) {
-    case OVS_VPORT_TYPE_VXLAN:
-        dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort;
+    case OVS_VPORT_TYPE_GRE:
+        dstPort = ((POVS_GRE_VPORT)vportPriv)->dstPort;
         break;
     case OVS_VPORT_TYPE_STT:
         dstPort = ((POVS_STT_VPORT)vportPriv)->dstPort;
         break;
+    case OVS_VPORT_TYPE_VXLAN:
+        dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort;
+        break;
     default:
         ASSERT(! "Port is not a tunnel port");
     }
-    ASSERT(dstPort);
+    ASSERT(dstPort || vport->ovsType == OVS_VPORT_TYPE_GRE);
     return dstPort;
 }
 
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 616f688..231ac83 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -80,6 +80,7 @@
     <ClInclude Include="Ethernet.h" />
     <ClInclude Include="Event.h" />
     <ClInclude Include="Flow.h" />
+    <ClInclude Include="Gre.h" />
     <ClInclude Include="IpHelper.h" />
     <ClInclude Include="Jhash.h" />
     <ClInclude Include="Netlink/Netlink.h" />
@@ -172,6 +173,7 @@
     <ClCompile Include="Driver.c" />
     <ClCompile Include="Event.c" />
     <ClCompile Include="Flow.c" />
+    <ClCompile Include="Gre.c" />
     <ClCompile Include="IpHelper.c" />
     <ClCompile Include="Jhash.c" />
     <ClCompile Include="Netlink/Netlink.c" />
-- 
1.9.5.msysgit.0



More information about the dev mailing list