[ovs-dev] [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP Segmentation offloads

Gurucharan Shetty shettyg at nicira.com
Tue Oct 27 20:49:52 UTC 2015


> Acked-by: Nithin Raju <nithin at vmware.com>

Series applied.

>
> -----Original Message-----
> From: Sairam Venugopal <vsairam at vmware.com>
> Date: Tuesday, October 27, 2015 at 10:20 AM
> To: Nithin Raju <nithin at vmware.com>
> Subject: Fw: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP
> Segmentation offloads
>
>>
>>________________________________________
>>From: Sairam Venugopal <vsairam at vmware.com>
>>Sent: Monday, October 26, 2015 4:48 PM
>>To: dev at openvswitch.org
>>Cc: Sairam Venugopal
>>Subject: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP
>>Segmentation offloads
>>
>>Add support to STT - Encap and Decap functions to reassemble the packet
>>fragments. Also add support to offload the packet to NDIS.
>>
>>Signed-off-by: Sairam Venugopal <vsairam at vmware.com>
>>---
>> datapath-windows/ovsext/Actions.c |  40 ++--
>> datapath-windows/ovsext/Stt.c     | 398
>>+++++++++++++++++++++++++++++---------
>> 2 files changed, 329 insertions(+), 109 deletions(-)
>>
>>diff --git a/datapath-windows/ovsext/Actions.c
>>b/datapath-windows/ovsext/Actions.c
>>index b4644a7..ce592b3 100644
>>--- a/datapath-windows/ovsext/Actions.c
>>+++ b/datapath-windows/ovsext/Actions.c
>>@@ -594,7 +594,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
>>         InitializeListHead(&missedPackets);
>>         status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS,
>>vport,
>>                           &key,ovsFwdCtx->curNbl,
>>-                          ovsFwdCtx->tunnelRxNic != NULL,
>>&ovsFwdCtx->layers,
>>+                          FALSE, &ovsFwdCtx->layers,
>>                           ovsFwdCtx->switchContext, &missedPackets,
>>&num);
>>         if (num) {
>>             OvsQueuePackets(&missedPackets, num);
>>@@ -709,6 +709,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>>     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>>     PNET_BUFFER_LIST newNbl = NULL;
>>     POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
>>+    PCWSTR dropReason = L"OVS-dropped due to new decap packet";
>>
>>     if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
>>             != NDIS_STATUS_SUCCESS) {
>>@@ -730,6 +731,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>>     case OVS_VPORT_TYPE_STT:
>>         status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
>>                              &ovsFwdCtx->tunKey, &newNbl);
>>+        if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
>>+            /* This was an STT-LSO Fragment */
>>+            dropReason = L"OVS-STT segment is cached";
>>+        }
>>         break;
>>     default:
>>         OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
>>@@ -747,25 +752,26 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>>      * tunnelRxNic and other fields will be cleared, re-init the context
>>      * before usage.
>>       */
>>-    OvsCompleteNBLForwardingCtx(ovsFwdCtx,
>>-                                L"OVS-dropped due to new decap packet");
>>+    OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
>>
>>-    /* Decapsulated packet is in a new NBL */
>>-    ovsFwdCtx->tunnelRxNic = tunnelRxVport;
>>-    OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
>>-                         newNbl, tunnelRxVport->portNo, 0,
>>-
>>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
>>-                         ovsFwdCtx->completionList,
>>-                         &ovsFwdCtx->layers, FALSE);
>>+    if (newNbl) {
>>+        /* Decapsulated packet is in a new NBL */
>>+        ovsFwdCtx->tunnelRxNic = tunnelRxVport;
>>+        OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
>>+                             newNbl, tunnelRxVport->portNo, 0,
>>+
>>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
>>+                             ovsFwdCtx->completionList,
>>+                             &ovsFwdCtx->layers, FALSE);
>>
>>-    /*
>>-     * Set the NBL's SourcePortId and SourceNicIndex to default values to
>>-     * keep NDIS happy when we forward the packet.
>>-     */
>>-    ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
>>-    ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
>>+        /*
>>+         * Set the NBL's SourcePortId and SourceNicIndex to default
>>values to
>>+         * keep NDIS happy when we forward the packet.
>>+         */
>>+        ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
>>+        ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
>>
>>-    status = OvsDoFlowLookupOutput(ovsFwdCtx);
>>+        status = OvsDoFlowLookupOutput(ovsFwdCtx);
>>+    }
>>     ASSERT(ovsFwdCtx->curNbl == NULL);
>>     OvsClearTunRxCtx(ovsFwdCtx);
>>
>>diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
>>index b78ef95..ef44d23 100644
>>--- a/datapath-windows/ovsext/Stt.c
>>+++ b/datapath-windows/ovsext/Stt.c
>>@@ -34,6 +34,7 @@
>> #endif
>> #define OVS_DBG_MOD OVS_DBG_STT
>> #include "Debug.h"
>>+#include "Jhash.h"
>>
>> KSTART_ROUTINE OvsSttDefragCleaner;
>> static PLIST_ENTRY OvsSttPktFragHash;
>>@@ -152,8 +153,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>>     UINT32 headRoom = OvsGetSttTunHdrSize();
>>     UINT32 tcpChksumLen;
>>     PUINT8 bufferStart;
>>-
>>-    UNREFERENCED_PARAMETER(layers);
>>+    ULONG mss = 0;
>>+    NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>
>>     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>
>>@@ -162,14 +163,20 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>>     BOOLEAN innerPartialChecksum = FALSE;
>>
>>     if (layers->isTcp) {
>>-        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>-
>>         lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>>                 TcpLargeSendNetBufferListInfo);
>>-        if (lsoInfo.LsoV1Transmit.MSS) {
>>-            /* XXX We don't handle LSO yet */
>>-            OVS_LOG_ERROR("LSO on STT is not supported");
>>-            return NDIS_STATUS_FAILURE;
>>+
>>+        switch (lsoInfo.Transmit.Type) {
>>+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
>>+                mss = lsoInfo.LsoV1Transmit.MSS;
>>+                break;
>>+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
>>+                mss = lsoInfo.LsoV2Transmit.MSS;
>>+                break;
>>+            default:
>>+                OVS_LOG_ERROR("Unknown LSO transmit type:%d",
>>+                              lsoInfo.Transmit.Type);
>>+                return NDIS_STATUS_FAILURE;
>>         }
>>     }
>>
>>@@ -186,21 +193,36 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>>         return NDIS_STATUS_FAILURE;
>>     }
>>
>>-    curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
>>+    curNbl = *newNbl;
>>+    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
>>+    /* NB Chain should be split before */
>>+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>+    innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
>>+
>>     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
>>                                                        LowPagePriority);
>>     bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
>>
>>-    if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) {
>>+    if (layers->isIPv4) {
>>         IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
>>-        ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
>>+        if (!ip->tot_len) {
>>+            ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
>>+        }
>>+        if (!ip->check) {
>>+            ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
>>+        }
>>     }
>>+
>>     if (layers->isTcp) {
>>-        if(!csumInfo.Transmit.TcpChecksum) {
>>-            innerChecksumVerified = TRUE;
>>-        } else {
>>+        if (mss) {
>>             innerPartialChecksum = TRUE;
>>+        } else {
>>+            if (!csumInfo.Transmit.TcpChecksum) {
>>+                innerChecksumVerified = TRUE;
>>+            } else {
>>+                innerPartialChecksum = TRUE;
>>+            }
>>         }
>>     } else if (layers->isUdp) {
>>         if(!csumInfo.Transmit.UdpChecksum) {
>>@@ -210,24 +232,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>>         }
>>     }
>>
>>-    curNbl = *newNbl;
>>-    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>-    /* NB Chain should be split before */
>>-    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>-
>>-    innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
>>-    /*
>>-     * External port can't be removed as we hold the dispatch lock
>>-     * We also check if the external port was removed beforecalling
>>-     * port encapsulation functions
>>-     */
>>-    if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
>>-        OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't
>>encapsulate",
>>-                innerFrameLen, OvsGetExternalMtu(switchContext));
>>-        status = NDIS_STATUS_FAILURE;
>>-        goto ret_error;
>>-    }
>>-
>>     status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
>>     if (status != NDIS_STATUS_SUCCESS) {
>>         ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
>>@@ -301,33 +305,52 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>>                                           IPPROTO_TCP, (uint16)
>>tcpChksumLen);
>>     sttHdr->version = 0;
>>
>>-    /* XXX need to peek into the inner packet, hard code for now */
>>-    sttHdr->flags = STT_PROTO_IPV4;
>>-    if (innerChecksumVerified) {
>>-        sttHdr->flags |= STT_CSUM_VERIFIED;
>>-    } else if (innerPartialChecksum) {
>>+    /* Set STT Header */
>>+    sttHdr->flags = 0;
>>+    if (innerPartialChecksum) {
>>         sttHdr->flags |= STT_CSUM_PARTIAL;
>>+        if (layers->isIPv4) {
>>+            sttHdr->flags |= STT_PROTO_IPV4;
>>+        }
>>+        if (layers->isTcp) {
>>+            sttHdr->flags |= STT_PROTO_TCP;
>>+        }
>>+        sttHdr->l4Offset = (UINT8) layers->l4Offset;
>>+        sttHdr->mss = (UINT16) htons(mss);
>>+    } else if (innerChecksumVerified) {
>>+        sttHdr->flags = STT_CSUM_VERIFIED;
>>+        sttHdr->l4Offset = 0;
>>+        sttHdr->mss = 0;
>>     }
>>-    sttHdr->l4Offset = 0;
>>
>>     sttHdr->reserved = 0;
>>-    /* XXX Used for large TCP packets.Not sure how it is used, clarify */
>>-    sttHdr->mss = 0;
>>     sttHdr->vlanTCI = 0;
>>     sttHdr->key = tunKey->tunnelId;
>>     /* Zero out stt padding */
>>     *(uint16 *)(sttHdr + 1) = 0;
>>
>>     /* Offload IP and TCP checksum */
>>+    ULONG tcpHeaderOffset = sizeof *outerEthHdr +
>>+                        outerIpHdr->ihl * 4;
>>     csumInfo.Value = 0;
>>     csumInfo.Transmit.IpHeaderChecksum = 1;
>>     csumInfo.Transmit.TcpChecksum = 1;
>>     csumInfo.Transmit.IsIPv4 = 1;
>>-    csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr +
>>-                                        outerIpHdr->ihl * 4;
>>+    csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
>>     NET_BUFFER_LIST_INFO(curNbl,
>>                          TcpIpChecksumNetBufferListInfo) =
>>csumInfo.Value;
>>
>>+    UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) -
>>sizeof(TCPHdr);
>>+    if (ipTotalLen > encapMss) {
>>+        lsoInfo.Value = 0;
>>+        lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
>>+        lsoInfo.LsoV2Transmit.MSS = encapMss;
>>+        lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
>>+        lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
>>+        NET_BUFFER_LIST_INFO(curNbl,
>>+                             TcpLargeSendNetBufferListInfo) =
>>lsoInfo.Value;
>>+    }
>>+
>>     return STATUS_SUCCESS;
>>
>> ret_error:
>>@@ -338,16 +361,22 @@ ret_error:
>>
>> /*
>>
>>*-------------------------------------------------------------------------
>>---
>>- * OvsCalculateTCPChecksum
>>- *     Calculate TCP checksum
>>+ * OvsValidateTCPChecksum
>>+ *     Validate TCP checksum
>>
>>*-------------------------------------------------------------------------
>>---
>>  */
>> static __inline NDIS_STATUS
>>-OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
>>+OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
>> {
>>     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
>>     csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>>
>>TcpIpChecksumNetBufferListInfo);
>>+
>>+    /* Check if NIC has indicated TCP checksum failure */
>>+    if (csumInfo.Receive.TcpChecksumFailed) {
>>+        return NDIS_STATUS_INVALID_PACKET;
>>+    }
>>+
>>     UINT16 checkSum;
>>
>>     /* Check if TCP Checksum has been calculated by NIC */
>>@@ -399,10 +428,9 @@ OvsInitSttDefragmentation()
>>     NdisAllocateSpinLock(&OvsSttSpinLock);
>>
>>     /* Init the Hash Buffer */
>>-    OvsSttPktFragHash = (PLIST_ENTRY) OvsAllocateMemoryWithTag(
>>-                                                sizeof(LIST_ENTRY)
>>-                                                * STT_HASH_TABLE_SIZE,
>>-                                                OVS_STT_POOL_TAG);
>>+    OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
>>+                                                 * STT_HASH_TABLE_SIZE,
>>+                                                 OVS_STT_POOL_TAG);
>>     if (OvsSttPktFragHash == NULL) {
>>         NdisFreeSpinLock(&OvsSttSpinLock);
>>         return STATUS_INSUFFICIENT_RESOURCES;
>>@@ -487,6 +515,7 @@ OvsSttDefragCleaner(PVOID data)
>>                 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
>>                 if (entry->timeout < currentTime) {
>>                     RemoveEntryList(&entry->link);
>>+                    OvsFreeMemoryWithTag(entry->packetBuf,
>>OVS_STT_POOL_TAG);
>>                     OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
>>                 }
>>             }
>>@@ -500,6 +529,158 @@ OvsSttDefragCleaner(PVOID data)
>>     PsTerminateSystemThread(STATUS_SUCCESS);
>> }
>>
>>+static OVS_STT_PKT_KEY
>>+OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
>>+{
>>+    OVS_STT_PKT_KEY key;
>>+    key.sAddr = ipHdr->saddr;
>>+    key.dAddr = ipHdr->daddr;
>>+    key.ackSeq = ntohl(tcpHdr->ack_seq);
>>+    return key;
>>+}
>>+
>>+static UINT32
>>+OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
>>+{
>>+    UINT32 arr[3];
>>+    arr[0] = pktKey->ackSeq;
>>+    arr[1] = pktKey->dAddr;
>>+    arr[2] = pktKey->sAddr;
>>+    return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
>>+}
>>+
>>+static VOID *
>>+OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
>>+{
>>+    PLIST_ENTRY link;
>>+    POVS_STT_PKT_ENTRY entry;
>>+
>>+    LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
>>+        entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
>>+        if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
>>+            entry->ovsPktKey.dAddr == pktKey->dAddr &&
>>+            entry->ovsPktKey.sAddr == pktKey->sAddr) {
>>+            return entry;
>>+        }
>>+    }
>>+    return NULL;
>>+}
>>+
>>+/*
>>+*
>>+-------------------------------------------------------------------------
>>-
>>+* OvsSttReassemble --
>>+*     Reassemble an LSO packet from multiple STT-Fragments.
>>+*
>>+-------------------------------------------------------------------------
>>-
>>+*/
>>+PNET_BUFFER_LIST
>>+OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
>>+                 PNET_BUFFER_LIST curNbl,
>>+                 IPHdr *ipHdr,
>>+                 TCPHdr *tcp,
>>+                 SttHdr *newSttHdr,
>>+                 UINT16 payloadLen)
>>+{
>>+    UINT32 seq = ntohl(tcp->seq);
>>+    UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
>>+    UINT32 segOffset = STT_SEGMENT_OFF(seq);
>>+    UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
>>+    UINT32 startOffset = 0;
>>+    OVS_STT_PKT_ENTRY *pktFragEntry;
>>+    PNET_BUFFER_LIST targetPNbl = NULL;
>>+    BOOLEAN lastPacket = FALSE;
>>+    PNET_BUFFER sourceNb;
>>+    UINT32 fragmentLength = payloadLen;
>>+    SttHdr stt;
>>+    SttHdr *sttHdr = NULL;
>>+    sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>+
>>+    /* XXX optimize this lock */
>>+    NdisAcquireSpinLock(&OvsSttSpinLock);
>>+
>>+    /* If this is the first fragment, copy the STT header */
>>+    if (segOffset == 0) {
>>+        sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
>>+        if (sttHdr == NULL) {
>>+            OVS_LOG_ERROR("Unable to retrieve STT header");
>>+            return NULL;
>>+        }
>>+        fragmentLength = fragmentLength - STT_HDR_LEN;
>>+        startOffset = startOffset + STT_HDR_LEN;
>>+    }
>>+
>>+    /* Lookup fragment */
>>+    OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
>>+    UINT32 hash = OvsSttGetPktHash(&pktKey);
>>+    pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
>>+
>>+    if (pktFragEntry == NULL) {
>>+        /* Create a new Packet Entry */
>>+        POVS_STT_PKT_ENTRY entry;
>>+        entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
>>+                                         OVS_STT_POOL_TAG);
>>+        RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
>>+
>>+        /* Update Key, timestamp and recvdLen */
>>+        NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof
>>(OVS_STT_PKT_KEY));
>>+
>>+        entry->recvdLen = fragmentLength;
>>+
>>+        UINT64 currentTime;
>>+        NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
>>+        entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
>>+
>>+        if (segOffset == 0) {
>>+            entry->sttHdr = *sttHdr;
>>+        }
>>+
>>+        /* Copy the data from Source to new buffer */
>>+        entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
>>+                                                    OVS_STT_POOL_TAG);
>>+        if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
>>+                              entry->packetBuf + offset) == NULL) {
>>+            OVS_LOG_ERROR("Error when obtaining bytes from Packet");
>>+            goto handle_error;
>>+        }
>>+
>>+        /* Insert the entry in the Static Buffer */
>>+        InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
>>+                       &entry->link);
>>+    } else {
>>+        /* Add to recieved length to identify if this is the last
>>fragment */
>>+        pktFragEntry->recvdLen += fragmentLength;
>>+        lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
>>+
>>+        if (segOffset == 0) {
>>+            pktFragEntry->sttHdr = *sttHdr;
>>+        }
>>+
>>+        /* Copy the fragment data from Source to existing buffer */
>>+        if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
>>+                              pktFragEntry->packetBuf + offset) == NULL)
>>{
>>+            OVS_LOG_ERROR("Error when obtaining bytes from Packet");
>>+            goto handle_error;
>>+        }
>>+    }
>>+
>>+handle_error:
>>+    if (lastPacket) {
>>+        /* Retrieve the original STT header */
>>+        NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof
>>(SttHdr));
>>+        targetPNbl = OvsAllocateNBLFromBuffer(switchContext,
>>pktFragEntry->packetBuf,
>>+                                              innerPacketLen);
>>+
>>+        /* Delete this entry and free up the memory/ */
>>+        RemoveEntryList(&pktFragEntry->link);
>>+        OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
>>+        OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
>>+    }
>>+
>>+    NdisReleaseSpinLock(&OvsSttSpinLock);
>>+    return lastPacket ? targetPNbl : NULL;
>>+}
>>+
>> /*
>>  *
>>--------------------------------------------------------------------------
>>  * OvsDecapStt --
>>@@ -513,34 +694,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>>             PNET_BUFFER_LIST *newNbl)
>> {
>>     NDIS_STATUS status = NDIS_STATUS_FAILURE;
>>-    PNET_BUFFER curNb;
>>+    PNET_BUFFER curNb, newNb;
>>     IPHdr *ipHdr;
>>     char *ipBuf[sizeof(IPHdr)];
>>+    SttHdr stt;
>>     SttHdr *sttHdr;
>>     char *sttBuf[STT_HDR_LEN];
>>     UINT32 advanceCnt, hdrLen;
>>-    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
>>+    BOOLEAN isLsoPacket = FALSE;
>>
>>     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>     ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>
>>-    if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
>>-        OVS_LOG_ERROR("Packet length received is less than the tunnel
>>header:"
>>-            " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb),
>>OvsGetSttTunHdrSize());
>>-        return NDIS_STATUS_INVALID_LENGTH;
>>-    }
>>-
>>-    /* Verify outer TCP Checksum */
>>-    csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>>-
>>TcpIpChecksumNetBufferListInfo);
>>-
>>-    /* Check if NIC has indicated TCP checksum failure */
>>-    if (csumInfo.Receive.TcpChecksumFailed) {
>>-        return NDIS_STATUS_INVALID_PACKET;
>>-    }
>>-
>>-    /* Calculate the TCP Checksum */
>>-    status = OvsCalculateTCPChecksum(curNbl, curNb);
>>+    /* Validate the TCP Checksum */
>>+    status = OvsValidateTCPChecksum(curNbl, curNb);
>>     if (status != NDIS_STATUS_SUCCESS) {
>>         return status;
>>     }
>>@@ -554,34 +721,73 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>>                                                     1 /*no align*/, 0);
>>     ASSERT(ipHdr);
>>
>>+    TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
>>+
>>     /* Skip IP & TCP headers */
>>     hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
>>     NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>>     advanceCnt += hdrLen;
>>
>>-    /* STT Header */
>>-    sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
>>-                                                    1 /*no align*/, 0);
>>+    UINT32 seq = ntohl(tcp->seq);
>>+    UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
>>+    UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
>>+                        - (ipHdr->ihl * 4)
>>+                        - (sizeof * tcp);
>>+
>>+    /* Check if incoming packet requires reassembly */
>>+    if (totalLen != payloadLen) {
>>+        sttHdr = &stt;
>>+        PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
>>+                                                 ipHdr, tcp, sttHdr,
>>+                                                 payloadLen);
>>+        if (pNbl == NULL) {
>>+            return NDIS_STATUS_SUCCESS;
>>+        }
>>+
>>+        *newNbl = pNbl;
>>+        isLsoPacket = TRUE;
>>+    } else {
>>+        /* STT Header */
>>+        sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
>>+                                   (PVOID) &sttBuf, 1 /*no align*/, 0);
>>+        /* Skip stt header, DataOffset points to inner pkt now. */
>>+        hdrLen = STT_HDR_LEN;
>>+        NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>>+        advanceCnt += hdrLen;
>>+
>>+        *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
>>+                                    0, FALSE /*copy NBL info*/);
>>+    }
>>+
>>+    if (*newNbl == NULL) {
>>+        OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
>>+        return NDIS_STATUS_RESOURCES;
>>+    }
>>+
>>+    status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
>>+    if (status != NDIS_STATUS_SUCCESS) {
>>+        OvsCompleteNBL(switchContext, *newNbl, TRUE);
>>+        return NDIS_STATUS_FAILURE;
>>+    }
>>+    newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
>>+
>>     ASSERT(sttHdr);
>>
>>     /* Initialize the tunnel key */
>>     tunKey->dst = ipHdr->daddr;
>>     tunKey->src = ipHdr->saddr;
>>     tunKey->tunnelId = sttHdr->key;
>>-    tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
>>+    tunKey->flags = OVS_TNL_F_KEY;
>>     tunKey->tos = ipHdr->tos;
>>     tunKey->ttl = ipHdr->ttl;
>>     tunKey->pad = 0;
>>
>>-    /* Skip stt header, DataOffset points to inner pkt now. */
>>-    hdrLen = STT_HDR_LEN;
>>-    NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>>-    advanceCnt += hdrLen;
>>+    BOOLEAN requiresLSO = sttHdr->mss != 0;
>>
>>     /* Verify checksum for inner packet if it's required */
>>     if (!(sttHdr->flags & STT_CSUM_VERIFIED)) {
>>         BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL;
>>-        EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
>>+        EthHdr *eth = (EthHdr *)NdisGetDataBuffer(newNb, sizeof(EthHdr),
>>                                                   NULL, 1, 0);
>>
>>         /* XXX Figure out a way to offload checksum receives */
>>@@ -597,14 +803,16 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>>                                                   IPPROTO_TCP,
>>                                                   (UINT16)l4Payload);
>>                 }
>>-                tcp->check = CalculateChecksumNB(curNb, l4Payload,
>>offset);
>>+                if (!requiresLSO) {
>>+                    tcp->check = CalculateChecksumNB(newNb, l4Payload,
>>offset);
>>+                }
>>             } else if (ip->protocol == IPPROTO_UDP) {
>>                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
>>                 if (!innerChecksumPartial){
>>                     udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
>>                                                   IPPROTO_UDP,
>>l4Payload);
>>                 }
>>-                udp->check = CalculateChecksumNB(curNb, l4Payload,
>>offset);
>>+                udp->check = CalculateChecksumNB(newNb, l4Payload,
>>offset);
>>             }
>>         } else if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) {
>>             IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth);
>>@@ -617,7 +825,9 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>>                                                     (UINT32 *)&ip->daddr,
>>                                                     IPPROTO_TCP,
>>totalLength);
>>                 }
>>-                tcp->check = CalculateChecksumNB(curNb, totalLength,
>>offset);
>>+                if (!requiresLSO) {
>>+                    tcp->check = CalculateChecksumNB(newNb, totalLength,
>>offset);
>>+                }
>>             }
>>             else if (ip->nexthdr == IPPROTO_UDP) {
>>                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
>>@@ -626,23 +836,27 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>>                                                     (UINT32 *)&ip->daddr,
>>                                                     IPPROTO_UDP,
>>totalLength);
>>                 }
>>-                udp->check = CalculateChecksumNB(curNb, totalLength,
>>offset);
>>+                udp->check = CalculateChecksumNB(newNb, totalLength,
>>offset);
>>             }
>>         }
>>
>>-        NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
>>+        NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) =
>>0;
>>     }
>>
>>-    *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
>>OVS_DEFAULT_COPY_SIZE,
>>-                                0, FALSE /*copy NBL info*/);
>>-
>>-    ASSERT(advanceCnt == OvsGetSttTunHdrSize());
>>-    status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
>>-
>>-    if (*newNbl == NULL) {
>>-        OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned
>>NBL");
>>-        status = NDIS_STATUS_RESOURCES;
>>+    if (requiresLSO) {
>>+        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>+        lsoInfo.Value = 0;
>>+        lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
>>+        lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - sizeof(IPHdr) -
>>sizeof(TCPHdr);
>>+        lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
>>+        if (sttHdr->flags & STT_PROTO_IPV4) {
>>+            lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
>>+        } else {
>>+            lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
>>+        }
>>+        NET_BUFFER_LIST_INFO(*newNbl,
>>+                                TcpLargeSendNetBufferListInfo) =
>>lsoInfo.Value;
>>     }
>>
>>-    return status;
>>+    return NDIS_STATUS_SUCCESS;
>> }
>>--
>>1.9.5.msysgit.0
>>
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list