[ovs-dev] [PATCH] datapath-windows: Stateless TCP Tunnelling protocol - Initial implementation

Nithin Raju nithin at vmware.com
Fri May 22 20:46:13 UTC 2015


hi Eitan,
Thanks for this patch. Looks good overall. I had a few minor comments which I have inlined.

Also, please update he instructions in INSTALL.Windows.md about the configuration steps for setting STT tunnels and testing them.

thanks,
-- Nithin



> On May 7, 2015, at 6:08 PM, Eitan Eliahu <eliahue at vmware.com> wrote:
> 
> This change include an initial implementable of STT.
> The following should be added:
> [1] Checksum offload (SW and HW)
> [2] LSO (SW and HW)
> [3] IP layer WFP callout for IP segments
> 
> Testing: link layer connection through ping works. File transfer.
> 
> Signed-off-by: Eitan Eliahu <eliahue at vmware.com>
> Co-authored-by: Saurabh Shah <ssaurabh at vmware.com>
> Signed-off-by: Saurabh Shah <ssaurabh at vmware.com>
> ---
> datapath-windows/automake.mk           |   2 +
> datapath-windows/ovsext/Actions.c      |  13 ++
> datapath-windows/ovsext/Debug.h        |   1 +
> datapath-windows/ovsext/Stt.c          | 363 +++++++++++++++++++++++++++++++++
> datapath-windows/ovsext/Stt.h          |  87 ++++++++
> datapath-windows/ovsext/Switch.h       |   3 +-
> datapath-windows/ovsext/Tunnel.c       |   3 +-
> datapath-windows/ovsext/Util.h         |   1 +
> datapath-windows/ovsext/Vport.c        |  18 ++
> datapath-windows/ovsext/Vport.h        |  22 ++
> datapath-windows/ovsext/Vxlan.c        |   4 +-
> datapath-windows/ovsext/ovsext.vcxproj |   4 +-
> 12 files changed, 515 insertions(+), 6 deletions(-)
> create mode 100644 datapath-windows/ovsext/Stt.c
> create mode 100644 datapath-windows/ovsext/Stt.h
> 
> diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
> index 9324b3c..a4f5a57 100644
> --- a/datapath-windows/automake.mk
> +++ b/datapath-windows/automake.mk
> @@ -56,6 +56,8 @@ EXTRA_DIST += \
> 	datapath-windows/ovsext/Vport.c \
> 	datapath-windows/ovsext/Vport.h \
> 	datapath-windows/ovsext/Vxlan.c \
> +	datapath-windows/ovsext/Stt.h \
> +	datapath-windows/ovsext/Stt.c \
> 	datapath-windows/ovsext/Vxlan.h \
> 	datapath-windows/ovsext/ovsext.inf \
> 	datapath-windows/ovsext/ovsext.rc \
> diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
> index a93fe03..39f36b4 100644
> --- a/datapath-windows/ovsext/Actions.c
> +++ b/datapath-windows/ovsext/Actions.c
> @@ -23,6 +23,7 @@
> #include "NetProto.h"
> #include "Flow.h"
> #include "Vxlan.h"
> +#include "Stt.h"
> #include "Checksum.h"
> #include "PacketIO.h"
> 
> @@ -207,6 +208,10 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
>         flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) {
>         tunnelVport = ovsFwdCtx->switchContext->vxlanVport;
>         ovsActionStats.rxVxlan++;
> +    } else if (!flowKey->ipKey.nwFrag &&
> +                flowKey->ipKey.nwProto == IPPROTO_TCP &&
> +                flowKey->ipKey.l4.tpDst == STT_DST_PORT_NBO) {
> +        tunnelVport =ovsFwdCtx->switchContext->sttVport;

minor: need a space after ‘='

>     }
> 
>     // We might get tunnel packets even before the tunnel gets initialized.
> @@ -632,6 +637,10 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
>                                ovsFwdCtx->switchContext,
>                                (VOID *)ovsFwdCtx->completionList,
>                                &ovsFwdCtx->layers, &newNbl);
> +        break;
> +    case OVS_VPORT_TYPE_STT:
> +        status = OvsEncapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
> +                             &ovsFwdCtx->tunKey, &ovsFwdCtx->layers, &newNbl);

minor: it would be nice if the order of the parameters to OvsEncapVxlan() and OvsEncapStt() are the same.

>         break;
>     default:
>         ASSERT(! "Tx: Unhandled tunnel type");
> @@ -697,6 +706,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>         status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
>                                                 &ovsFwdCtx->tunKey, &newNbl);
>         break;
> +    case OVS_VPORT_TYPE_STT:
> +        status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
> +                             &ovsFwdCtx->tunKey, &newNbl);
> +        break;

minor: it would be nice if the names of the decap functions are similar for VXLAN and STT. I think we can get rid of the ‘Do’ in OvsDoDecapVxlan(). Also, the comment above OvsDoDecapVxlan() about the expectation of ‘newNbl’ is applicable for OvsDecapStt(). So, the comment can be made generic.


>     default:
>         OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
>                       tunnelRxVport->ovsType);
> diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h
> index a0da5eb..4b7b526 100644
> --- a/datapath-windows/ovsext/Debug.h
> +++ b/datapath-windows/ovsext/Debug.h
> @@ -40,6 +40,7 @@
> #define OVS_DBG_OTHERS   BIT32(21)
> #define OVS_DBG_NETLINK  BIT32(22)
> #define OVS_DBG_TUNFLT   BIT32(23)
> +#define OVS_DBG_STT      BIT32(24)
> 
> #define OVS_DBG_RESERVED BIT32(31)
> //Please add above OVS_DBG_RESERVED.
> diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
> new file mode 100644
> index 0000000..a1dae21
> --- /dev/null
> +++ b/datapath-windows/ovsext/Stt.c
> @@ -0,0 +1,363 @@
> +/*
> + * Copyright (c) 2015 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pNHQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=Jt32ABe7ku4LgND_rao2E88tHIDsVz3xZSikH4Dje-0&s=3Q4P-Cgqs2AY4t1lS4C1TDePrQhyn8jHl5dH8FSfkHo&e= 
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include "precomp.h"
> +#include "NetProto.h"
> +#include "Switch.h"
> +#include "Vport.h"
> +#include "Flow.h"
> +#include “stt.h"

minor: stt.h => Stt.h. I know Windows supports both :)

> +#include "IpHelper.h"
> +#include "Checksum.h"
> +#include "User.h"
> +#include "PacketIO.h"
> +#include "Flow.h"
> +#include "PacketParser.h"
> +#include "Atomic.h"
> +
> +#ifdef OVS_DBG_MOD
> +#undef OVS_DBG_MOD
> +#endif
> +#define OVS_DBG_MOD OVS_DBG_STT
> +#include "Debug.h"
> +
> +static NDIS_STATUS
> +OvsDoEncapStt(PNET_BUFFER_LIST curNbl, const OvsIPv4TunnelKey *tunKey,
> +              const POVS_FWD_INFO fwdInfo,
> +              POVS_PACKET_HDR_INFO layers,
> +              POVS_SWITCH_CONTEXT switchContext,
> +              PNET_BUFFER_LIST *newNbl);
> +
> +/*
> + *
> + * OvsInitSttTunnel --
> + *    Initialize STT tunnel module.
> + *
> + */

minor: pls. add a " *----------------------------------------------------------------------------'” to make it look like a function description header, to this function as well as other functions in this file.

> +NTSTATUS
> +OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
> +                 UINT16 tcpDestPort)
> +{
> +    POVS_STT_VPORT sttPort;
> +
> +    sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort),
> +                                                        OVS_STT_POOL_TAG);
> +    if (!sttPort) {
> +        OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT");
> +        return STATUS_INSUFFICIENT_RESOURCES;
> +    }
> +
> +    RtlZeroMemory(sttPort, sizeof(*sttPort));
> +    sttPort->dstPort = tcpDestPort;
> +    vport->priv = (PVOID) sttPort;
> +    return STATUS_SUCCESS;
> +}
> +
> +/*
> + *
> + * OvsCleanupSttTunnel --
> + *    Cleanup STT Tunnel module.
> + *
> + */

minor: extra lines before " * OvsCleanupSttTunnel --“ and after " *    Cleanup STT Tunnel module.”, in this function and functions below. You can replace that with " *----------------------------------------------------------------------------‘”.

> +void
> +OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport)
> +{
> +    if (vport->ovsType != OVS_VPORT_TYPE_STT ||
> +        vport->priv == NULL) {
> +        return;
> +    }
> +
> +    OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG);
> +    vport->priv = NULL;
> +}
> +
> +/*
> + *
> + * OvsEncapStt --
> + *     Encapsulates a packet with an STT header.
> + *
> + */
> +NDIS_STATUS
> +OvsEncapStt(POVS_SWITCH_CONTEXT switchContext,
> +            PNET_BUFFER_LIST curNbl,
> +            OvsIPv4TunnelKey *tunKey,
> +            POVS_PACKET_HDR_INFO layers,
> +            PNET_BUFFER_LIST *newNbl)
> +{
> +    OVS_FWD_INFO fwdInfo;
> +    NDIS_STATUS status;
> +
> +    UNREFERENCED_PARAMETER(switchContext);
> +    status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
> +    if (status != STATUS_SUCCESS) {
> +        OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
> +        /*
> +         * XXX This case where the ARP table is not populated is
> +         * currently not handled
> +         */
> +        return NDIS_STATUS_FAILURE;
> +    }
> +
> +    status = OvsDoEncapStt(curNbl, tunKey, &fwdInfo, layers, switchContext,
> +                           newNbl);
> +    return status;
> +}
> +
> +/*
> + *
> + * OvsDoEncapStt --
> + *    Internal utility function which actually does the STT encap.
> + *
> + */
> +NDIS_STATUS
> +OvsDoEncapStt(PNET_BUFFER_LIST curNbl,
> +              const OvsIPv4TunnelKey *tunKey,
> +              const POVS_FWD_INFO fwdInfo,
> +              POVS_PACKET_HDR_INFO layers,
> +              POVS_SWITCH_CONTEXT switchContext,
> +              PNET_BUFFER_LIST *newNbl)
> +{
> +    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
> +    PMDL curMdl = NULL;
> +    PNET_BUFFER curNb;
> +    PUINT8 buf = NULL;
> +    EthHdr *outerEthHdr;
> +    IPHdr *outerIpHdr;
> +    TCPHdr *outerTcpHdr;
> +    SttHdr *sttHdr;
> +    UINT32 innerFrameLen, ipTotalLen;
> +    POVS_STT_VPORT vportStt;
> +    UINT32 headRoom = OvsGetSttTunHdrSize();
> +    UINT32 tcpChksumLen;
> +    POVS_VPORT_ENTRY ovsVport;
> +
> +    UNREFERENCED_PARAMETER(layers);
> +
> +    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
> +    if (layers->isTcp) {
> +        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
> +
> +        lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
> +                TcpLargeSendNetBufferListInfo);
> +        if (lsoInfo.LsoV1Transmit.MSS) {
> +            /* XXX We don't handle LSO yet */
> +            OVS_LOG_ERROR("Unable to segment NBL”);

minor: A better error message might help. “LSO on STT is not supported”.

> +            return NDIS_STATUS_FAILURE;
> +        }
> +    }
> +
> +    ovsVport = OvsGetTunnelVport(switchContext, OVS_VPORT_TYPE_STT);
> +    ASSERT(ovsVport);
> +    vportStt = (POVS_STT_VPORT) GetOvsVportPriv(ovsVport);
> +    ASSERT(vportStt);
> +
> +    *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
> +                                FALSE /*copy NblInfo*/);
> +    if (*newNbl == NULL) {
> +        OVS_LOG_ERROR("Unable to copy NBL");
> +        return NDIS_STATUS_FAILURE;
> +    }
> +
> +    curNbl = *newNbl;
> +    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
> +    /* NB Chain should be splitted before */

minor: splitted => split.

> +    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
> +
> +    innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
> +    if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
> +        OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate",
> +                innerFrameLen, OvsGetExternalMtu(switchContext));
> +        status = NDIS_STATUS_FAILURE;
> +        goto ret_error;
> +    }
> +
> +    status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
> +    if (status != NDIS_STATUS_SUCCESS) {
> +        ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
> +        OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)");
> +        goto ret_error;
> +    }
> +
> +    /*
> +     * Make sure that the headroom for the tunnel header is continguous in
> +     * memory.
> +     */
> +    curMdl = NET_BUFFER_CURRENT_MDL(curNb);
> +    ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
> +                >= (short) headRoom);

minor: are the typecasts necessary? If yes, can we do ‘int’ and ‘int’. Not that it makes a difference at runtime.

> +
> +    buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
> +    if (!buf) {
> +        ASSERT(!"MmGetSystemAddressForMdlSafe failed");
> +        OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed");
> +        status = NDIS_STATUS_RESOURCES;
> +        goto ret_error;
> +    }
> +
> +    buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
> +    outerEthHdr = (EthHdr *)buf;
> +    outerIpHdr = (IPHdr *) (outerEthHdr + 1);
> +    outerTcpHdr = (TCPHdr *) (outerIpHdr + 1);
> +    sttHdr = (SttHdr *) (outerTcpHdr + 1);
> +
> +    /* L2 header */
> +    ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
> +            (PCHAR)&fwdInfo->srcMacAddr);
> +    NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr,
> +                    sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source);
> +    outerEthHdr->Type = htons(ETH_TYPE_IPV4);
> +
> +    /* L3 header */
> +    outerIpHdr->ihl = sizeof(IPHdr) >> 2;
> +    outerIpHdr->version = IPPROTO_IPV4;
> +    outerIpHdr->tos = tunKey->tos;
> +
> +    ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
> +    outerIpHdr->tot_len = htons(ipTotalLen);
> +    ASSERT(ipTotalLen < 65536);
> +
> +    outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen);
> +    outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
> +                                                                IP_DF_NBO : 0;

minor: indentation.

> +    outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64;
> +    outerIpHdr->protocol = IPPROTO_TCP;
> +    outerIpHdr->check = 0;
> +    outerIpHdr->saddr = fwdInfo->srcIpAddr;
> +    outerIpHdr->daddr = tunKey->dst;
> +    outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0);
> +
> +    /* L4 header */
> +    RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
> +    outerTcpHdr->source = htons(tunKey->flow_hash | 32768);
> +    outerTcpHdr->dest = STT_DST_PORT_NBO;
> +    outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) <<
> +                                                        STT_SEQ_LEN_SHIFT);

minor: indentation

> +    outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo));
> +    outerTcpHdr->doff = sizeof(TCPHdr) >> 2;
> +    outerTcpHdr->psh = 1;
> +    outerTcpHdr->ack = 1;
> +    outerTcpHdr->window = (uint16) ~0;
> +
> +    /* Calculate pseudo header chksum */
> +    tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
> +    ASSERT(tcpChksumLen < 65535);
> +    outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
> +                                          IPPROTO_TCP, (uint16) tcpChksumLen);
> +    sttHdr->version = 0;
> +
> +    /* XXX need to peek into the inner packet, hard code for now */
> +    sttHdr->flags = STT_PROTO_IPV4;
> +    sttHdr->l4Offset = 0;
> +
> +    sttHdr->reserved = 0;
> +    /* XXX Used for large TCP packets.Not sure how it is used, clarify */
> +    sttHdr->mss = 0;
> +    sttHdr->vlanTCI = 0;
> +    sttHdr->key = tunKey->tunnelId;
> +    /* Zero out stt padding */
> +    *(uint16 *)(sttHdr + 1) = 0; 
> +
> +    /* Calculate software tcp checksum */
> +    outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen,
> +                                             sizeof(EthHdr) + sizeof(IPHdr));
> +    if (outerTcpHdr->check == 0) {
> +        status = NDIS_STATUS_FAILURE;
> +        goto ret_error;
> +    }
> +
> +    return STATUS_SUCCESS;
> +
> +ret_error:
> +    OvsCompleteNBL(switchContext, *newNbl, TRUE);
> +    *newNbl = NULL;
> +    return status;
> +}
> +
> +/*
> + *
> + * OvsDecapStt --
> + *     Decapsulates an STT packet.
> + *
> + */
> +NDIS_STATUS
> +OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
> +            PNET_BUFFER_LIST curNbl,
> +            OvsIPv4TunnelKey *tunKey,
> +            PNET_BUFFER_LIST *newNbl)
> +{
> +    NDIS_STATUS status = NDIS_STATUS_FAILURE;
> +    PNET_BUFFER curNb;
> +    IPHdr *ipHdr;
> +    char *ipBuf[sizeof(IPHdr)];
> +    SttHdr *sttHdr;
> +    char *sttBuf[STT_HDR_LEN];
> +    UINT32 advanceCnt, hdrLen;
> +
> +    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
> +    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
> +
> +    if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
> +        OVS_LOG_ERROR("Packet lenght received is less than the tunnel header:"
> +            " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize());
> +        return NDIS_STATUS_INVALID_LENGTH;
> +    }

minor: lenght => length
Also, isn’t it very restrictive to expect L2 + L3 + L4 + STT header to fall into one contiguous buffer? It is easy to expect physical NICs to chop up the packet into multiple MDLs after some initial parsing. Intel’s NICs support a “packet split descriptor” for Eg. VXLAN code handles this by creating a copy of the NBL upto the STT header. Maybe, we can do the same here. It can be an improvement and need not be done as part of this patch.

> +
> +    /* Skip Eth header */
> +    hdrLen = sizeof(EthHdr);
> +    NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
> +    advanceCnt = hdrLen;
> +
> +    ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
> +                                                    1 /*no align*/, 0);
> +    ASSERT(ipHdr);
> +
> +    /* Skip IP & TCP headers */
> +    hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
> +    NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
> +    advanceCnt += hdrLen;
> +
> +    /* STT Header */
> +    sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
> +                                                    1 /*no align*/, 0);
> +    ASSERT(sttHdr);
> +
> +    /* Initialize the tunnel key */
> +    tunKey->dst = ipHdr->daddr;
> +    tunKey->src = ipHdr->saddr;
> +    tunKey->tunnelId = sttHdr->key;
> +    tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
> +    tunKey->tos = ipHdr->tos;
> +    tunKey->ttl = ipHdr->ttl;
> +    tunKey->pad = 0;
> +
> +    /* Skip stt header, DataOffset points to inner pkt now. */
> +    hdrLen = STT_HDR_LEN;
> +    NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
> +    advanceCnt += hdrLen;
> +
> +    *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE,
> +                                0, FALSE /*copy NBL info*/);
> +
> +    ASSERT(advanceCnt == OvsGetSttTunHdrSize());
> +    status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
> +
> +    if (*newNbl == NULL) {
> +        OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL");
> +        status = NDIS_STATUS_RESOURCES;
> +    }
> +
> +    return status;
> +}
> diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h
> new file mode 100644
> index 0000000..8ef9f64
> --- /dev/null
> +++ b/datapath-windows/ovsext/Stt.h
> @@ -0,0 +1,87 @@
> +/*
> + * Copyright (c) 2015 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pNHQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=Jt32ABe7ku4LgND_rao2E88tHIDsVz3xZSikH4Dje-0&s=3Q4P-Cgqs2AY4t1lS4C1TDePrQhyn8jHl5dH8FSfkHo&e= 
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef __OVS_STT_H_
> +#define __OVS_STT_H_ 1
> +
> +#define STT_DST_PORT 7471
> +#define STT_DST_PORT_NBO 0x2f1d
> +
> +#define MAX_IP_TOTAL_LEN 65535
> +
> +// STT defines.
> +#define STT_SEQ_LEN_SHIFT 16
> +#define STT_SEQ_OFFSET_MASK ((1 << STT_SEQ_LEN_SHIFT) - 1)
> +#define STT_FRAME_LEN(seq) ((seq) >> STT_SEQ_LEN_SHIFT)
> +#define STT_SEGMENT_OFF(seq) ((seq) & STT_SEQ_OFFSET_MASK)
> +
> +#define STT_CSUM_VERIFIED   (1 << 0)
> +#define STT_CSUM_PARTIAL    (1 << 1)
> +#define STT_PROTO_IPV4      (1 << 2)
> +#define STT_PROTO_TCP       (1 << 3)
> +#define STT_PROTO_TYPES     (STT_PROTO_IPV4 | STT_PROTO_TCP)
> +
> +#define STT_ETH_PAD 2
> +typedef struct SttHdr {
> +    UINT8    version;
> +    UINT8    flags;
> +    UINT8    l4Offset;
> +    UINT8    reserved;
> +    UINT16   mss;
> +    UINT16   vlanTCI;
> +    UINT64   key;
> +} SttHdr, *PSttHdr;
> +
> +#define STT_HDR_LEN (sizeof(SttHdr) + STT_ETH_PAD)
> +
> +typedef struct _OVS_STT_VPORT {
> +    UINT32 dstPort;
> +    UINT64 ackNo;
> +    UINT64 ipId;
> +
> +    UINT64 inPkts;
> +    UINT64 outPkts;
> +    UINT64 slowInPkts;
> +    UINT64 slowOutPkts;
> +} OVS_STT_VPORT, *POVS_STT_VPORT;
> +
> +NTSTATUS OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
> +                          UINT16 udpDestPort);
> +
> +VOID OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport);
> +
> +
> +void OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport);
> +
> +NDIS_STATUS OvsEncapStt(POVS_SWITCH_CONTEXT switchContext,
> +                        PNET_BUFFER_LIST curNbl,
> +                        OvsIPv4TunnelKey *tunKey,
> +                        POVS_PACKET_HDR_INFO layers,
> +                        PNET_BUFFER_LIST *newNbl);
> +
> +NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
> +                        PNET_BUFFER_LIST curNbl,
> +                        OvsIPv4TunnelKey *tunKey,
> +                        PNET_BUFFER_LIST *newNbl);
> +
> +static __inline UINT32
> +OvsGetSttTunHdrSize(VOID)
> +{
> +    return sizeof (EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr) +
> +                STT_HDR_LEN;

minor: indentation.

> +}
> +
> +#endif /*__OVS_STT_H_ */
> diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h
> index 6ec34e1..ee43ccb 100644
> --- a/datapath-windows/ovsext/Switch.h
> +++ b/datapath-windows/ovsext/Switch.h
> @@ -133,6 +133,7 @@ typedef struct _OVS_SWITCH_CONTEXT
>     POVS_VPORT_ENTRY        internalVport;
> 
>     POVS_VPORT_ENTRY        vxlanVport;
> +    POVS_VPORT_ENTRY        sttVport;
> 
>     /*
>      * 'portIdHashArray' ONLY contains ports that exist on the Hyper-V switch,
> @@ -216,6 +217,4 @@ OvsAcquireSwitchContext(VOID);
> VOID
> OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext);
> 
> -PVOID OvsGetExternalVport();
> -
> #endif /* __SWITCH_H_ */
> diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c
> index fed58f1..a4bf7ed 100644
> --- a/datapath-windows/ovsext/Tunnel.c
> +++ b/datapath-windows/ovsext/Tunnel.c
> @@ -292,7 +292,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
>             goto unlockAndDrop;
>         }
> 
> -        ASSERT(vport->ovsType == OVS_VPORT_TYPE_VXLAN);
> +        ASSERT(vport->ovsType == OVS_VPORT_TYPE_VXLAN ||
> +               vport->ovsType == OVS_VPORT_TYPE_STT);
> 
>         portNo = vport->portNo;
> 
> diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
> index 9a01242..ee676fa 100644
> --- a/datapath-windows/ovsext/Util.h
> +++ b/datapath-windows/ovsext/Util.h
> @@ -33,6 +33,7 @@
> #define OVS_SWITCH_POOL_TAG             'SSVO'
> #define OVS_USER_POOL_TAG               'USVO'
> #define OVS_VPORT_POOL_TAG              'PSVO'
> +#define OVS_STT_POOL_TAG                'TSVO'
> 
> VOID *OvsAllocateMemory(size_t size);
> VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
> diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c
> index 1423ace..f5651eb 100644
> --- a/datapath-windows/ovsext/Vport.c
> +++ b/datapath-windows/ovsext/Vport.c
> @@ -21,6 +21,7 @@
> #include "Event.h"
> #include "User.h"
> #include "Vxlan.h"
> +#include "Stt.h"
> #include "IpHelper.h"
> #include "Oid.h"
> #include "Datapath.h"
> @@ -867,6 +868,9 @@ OvsInitTunnelVport(POVS_VPORT_ENTRY vport,
>     case OVS_VPORT_TYPE_VXLAN:
>         status = OvsInitVxlanTunnel(vport, dstPort);
>         break;
> +    case OVS_VPORT_TYPE_STT:
> +        status = OvsInitSttTunnel(vport, dstPort);
> +        break;
>     default:
>         ASSERT(0);
>     }
> @@ -1016,6 +1020,11 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext,
>         switchContext->vxlanVport = vport;
>         switchContext->numNonHvVports++;
>         break;
> +    case OVS_VPORT_TYPE_STT:
> +        ASSERT(switchContext->sttVport == NULL);
> +        switchContext->sttVport = vport;
> +        switchContext->numNonHvVports++;
> +        break;
>     case OVS_VPORT_TYPE_INTERNAL:
>         if (vport->isBridgeInternal) {
>             switchContext->numNonHvVports++;
> @@ -1099,6 +1108,10 @@ OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
>         OvsCleanupVxlanTunnel(vport);
>         switchContext->vxlanVport = NULL;
>         break;
> +    case OVS_VPORT_TYPE_STT:
> +        OvsCleanupSttTunnel(vport);
> +        switchContext->sttVport = NULL;
> +        break;
>     case OVS_VPORT_TYPE_GRE:
>     case OVS_VPORT_TYPE_GRE64:
>         break;
> @@ -2113,7 +2126,12 @@ Cleanup:
>         if (vport && vportAllocated == TRUE) {
>             if (vportInitialized == TRUE) {
>                 if (OvsIsTunnelVportType(portType)) {
> +                  if (vport->ovsType == OVS_VPORT_TYPE_VXLAN)
>                     OvsCleanupVxlanTunnel(vport);
> +                  else if (vport->ovsType == OVS_VPORT_TYPE_STT)
> +                    OvsCleanupSttTunnel(vport);

minor: missing parentheses for blocks of code under ‘if’. Also, indentation needs fixing.


> +                  else
> +                    ASSERT(!"Invalid tunnel port type");
>                 }
>             }
>             OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG);
> diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h
> index 348fbfd..d74ff59 100644
> --- a/datapath-windows/ovsext/Vport.h
> +++ b/datapath-windows/ovsext/Vport.h
> @@ -176,6 +176,7 @@ static __inline BOOLEAN
> OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType)
> {
>     return ovsType == OVS_VPORT_TYPE_VXLAN ||
> +           ovsType == OVS_VPORT_TYPE_STT ||
>            ovsType == OVS_VPORT_TYPE_GRE ||
>            ovsType == OVS_VPORT_TYPE_GRE64;
> }
> @@ -187,11 +188,19 @@ OvsGetTunnelVport(POVS_SWITCH_CONTEXT switchContext,
>     switch(ovsType) {
>     case OVS_VPORT_TYPE_VXLAN:
>         return switchContext->vxlanVport;
> +    case OVS_VPORT_TYPE_STT:
> +        return switchContext->sttVport;
>     default:
>         return NULL;
>     }
> }
> 
> +static __inline PVOID
> +GetOvsVportPriv(POVS_VPORT_ENTRY ovsVport)
> +{
> +    return ovsVport->priv;
> +}
> +
> static __inline BOOLEAN
> OvsIsInternalVportType(OVS_VPORT_TYPE ovsType)
> {
> @@ -207,6 +216,19 @@ OvsIsBridgeInternalVport(POVS_VPORT_ENTRY vport)
>     return vport->isBridgeInternal == TRUE;
> }
> 
> +static __inline POVS_VPORT_ENTRY
> +OvsGetExternalVport(POVS_SWITCH_CONTEXT switchContext)
> +{
> +    return switchContext->virtualExternalVport;
> +}
> +
> +static __inline UINT32
> +OvsGetExternalMtu(POVS_SWITCH_CONTEXT switchContext)
> +{
> +    ASSERT(OvsGetExternalVport(switchContext));
> +    return ((POVS_VPORT_ENTRY) OvsGetExternalVport(switchContext))->mtu;
> +}

Instead of ASSERTing, can we return -1/0 if there’s no external vport and handle it in the caller? The reason is, during testing, we might just temporarily disconnect a physical NIC from the Hyper-V switch, and if a VM sends out a packet, we’ll crash immediately. For VXLAN, we let the packet get forwarded to the PIF bridge, and then get dropped during actions execute for that flow on the PIF bridge.

> +
> VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
>                              POVS_VPORT_ENTRY vport,
>                              BOOLEAN hvDelete, BOOLEAN ovsDelete,
> diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c
> index 8c57185..125306f 100644
> --- a/datapath-windows/ovsext/Vxlan.c
> +++ b/datapath-windows/ovsext/Vxlan.c
> @@ -173,10 +173,10 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
> 
>         /* L2 header */
>         ethHdr = (EthHdr *)bufferStart;
> -        NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
> -                       sizeof ethHdr->Destination + sizeof ethHdr->Source);
>         ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
>                (PCHAR)&fwdInfo->srcMacAddr);
> +        NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
> +                       sizeof ethHdr->Destination + sizeof ethHdr->Source);
>         ethHdr->Type = htons(ETH_TYPE_IPV4);
> 
>         // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
> diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
> index 693bc50..7050015 100644
> --- a/datapath-windows/ovsext/ovsext.vcxproj
> +++ b/datapath-windows/ovsext/ovsext.vcxproj
> @@ -90,6 +90,7 @@
>     <ClInclude Include="PacketIO.h" />
>     <ClInclude Include="PacketParser.h" />
>     <ClInclude Include="precomp.h" />
> +    <ClInclude Include="Stt.h" />
>     <ClInclude Include="Switch.h" />
>     <ClInclude Include="Tunnel.h" />
>     <ClInclude Include="TunnelIntf.h" />
> @@ -183,6 +184,7 @@
>       <PreCompiledHeader>Create</PreCompiledHeader>
>       <PreCompiledHeaderOutputFile>$(IntDir)\precomp.h.pch</PreCompiledHeaderOutputFile>
>     </ClCompile>
> +    <ClCompile Include="Stt.c" />
>     <ClCompile Include="Switch.c" />
>     <ClCompile Include="Tunnel.c" />
>     <ClCompile Include="TunnelFilter.c" />
> @@ -202,4 +204,4 @@
>     <None Exclude="@(None)" Include="*.def;*.bat;*.hpj;*.asmx" />
>   </ItemGroup>
>   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
> -</Project>
> +</Project>
> \ No newline at end of file
> -- 
> 1.9.4.msysgit.2
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailman_listinfo_dev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pNHQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=Jt32ABe7ku4LgND_rao2E88tHIDsVz3xZSikH4Dje-0&s=QJUdy8j1II9IozCaoqfdTN0lXGh4WGktKewVKhRMKsE&e= 



More information about the dev mailing list