[ovs-dev] [PATCH 3/3] datapath-windows: STT - Enable support for TCP Segmentation offloads

Sairam Venugopal vsairam at vmware.com
Sat Oct 24 20:40:22 UTC 2015


Add support to STT - Encap and Decap functions to reassemble the packet
fragments. Also add support to offload the packet to NDIS.

Signed-off-by: Sairam Venugopal <vsairam at vmware.com>
---
 datapath-windows/ovsext/Actions.c |  40 ++--
 datapath-windows/ovsext/Stt.c     | 407 ++++++++++++++++++++++++++++++--------
 2 files changed, 346 insertions(+), 101 deletions(-)

diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index b4644a7..ce592b3 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -594,7 +594,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
         InitializeListHead(&missedPackets);
         status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
                           &key,ovsFwdCtx->curNbl,
-                          ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
+                          FALSE, &ovsFwdCtx->layers,
                           ovsFwdCtx->switchContext, &missedPackets, &num);
         if (num) {
             OvsQueuePackets(&missedPackets, num);
@@ -709,6 +709,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
     PNET_BUFFER_LIST newNbl = NULL;
     POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
+    PCWSTR dropReason = L"OVS-dropped due to new decap packet";
 
     if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
             != NDIS_STATUS_SUCCESS) {
@@ -730,6 +731,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
     case OVS_VPORT_TYPE_STT:
         status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
                              &ovsFwdCtx->tunKey, &newNbl);
+        if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
+            /* This was an STT-LSO Fragment */
+            dropReason = L"OVS-STT segment is cached";
+        }
         break;
     default:
         OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
@@ -747,25 +752,26 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
      * tunnelRxNic and other fields will be cleared, re-init the context
      * before usage.
       */
-    OvsCompleteNBLForwardingCtx(ovsFwdCtx,
-                                L"OVS-dropped due to new decap packet");
+    OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
 
-    /* Decapsulated packet is in a new NBL */
-    ovsFwdCtx->tunnelRxNic = tunnelRxVport;
-    OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
-                         newNbl, tunnelRxVport->portNo, 0,
-                         NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
-                         ovsFwdCtx->completionList,
-                         &ovsFwdCtx->layers, FALSE);
+    if (newNbl) {
+        /* Decapsulated packet is in a new NBL */
+        ovsFwdCtx->tunnelRxNic = tunnelRxVport;
+        OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+                             newNbl, tunnelRxVport->portNo, 0,
+                             NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+                             ovsFwdCtx->completionList,
+                             &ovsFwdCtx->layers, FALSE);
 
-    /*
-     * Set the NBL's SourcePortId and SourceNicIndex to default values to
-     * keep NDIS happy when we forward the packet.
-     */
-    ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
-    ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
+        /*
+         * Set the NBL's SourcePortId and SourceNicIndex to default values to
+         * keep NDIS happy when we forward the packet.
+         */
+        ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+        ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
 
-    status = OvsDoFlowLookupOutput(ovsFwdCtx);
+        status = OvsDoFlowLookupOutput(ovsFwdCtx);
+    }
     ASSERT(ovsFwdCtx->curNbl == NULL);
     OvsClearTunRxCtx(ovsFwdCtx);
 
diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
index b78ef95..3dfb443 100644
--- a/datapath-windows/ovsext/Stt.c
+++ b/datapath-windows/ovsext/Stt.c
@@ -34,6 +34,7 @@
 #endif
 #define OVS_DBG_MOD OVS_DBG_STT
 #include "Debug.h"
+#include "Jhash.h"
 
 KSTART_ROUTINE OvsSttDefragCleaner;
 static PLIST_ENTRY OvsSttPktFragHash;
@@ -152,8 +153,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
     UINT32 headRoom = OvsGetSttTunHdrSize();
     UINT32 tcpChksumLen;
     PUINT8 bufferStart;
-
-    UNREFERENCED_PARAMETER(layers);
+    ULONG mss = 0;
+    NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
 
     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
 
@@ -162,14 +163,20 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
     BOOLEAN innerPartialChecksum = FALSE;
 
     if (layers->isTcp) {
-        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
-
         lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
                 TcpLargeSendNetBufferListInfo);
-        if (lsoInfo.LsoV1Transmit.MSS) {
-            /* XXX We don't handle LSO yet */
-            OVS_LOG_ERROR("LSO on STT is not supported");
-            return NDIS_STATUS_FAILURE;
+
+        switch (lsoInfo.Transmit.Type) {
+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+                mss = lsoInfo.LsoV1Transmit.MSS;
+                break;
+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+                mss = lsoInfo.LsoV2Transmit.MSS;
+                break;
+            default:
+                OVS_LOG_ERROR("Unknown LSO transmit type:%d",
+                              lsoInfo.Transmit.Type);
+                return NDIS_STATUS_FAILURE;
         }
     }
 
@@ -186,21 +193,43 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
         return NDIS_STATUS_FAILURE;
     }
 
-    curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+    curNbl = *newNbl;
+    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+    /* NB Chain should be split before */
+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+    innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
+
+    if (!mss && (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom)) {
+        OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate",
+                innerFrameLen, OvsGetExternalMtu(switchContext));
+        status = NDIS_STATUS_FAILURE;
+        goto ret_error;
+    }
+
     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
                                                        LowPagePriority);
     bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
 
-    if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) {
+    if (layers->isIPv4) {
         IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
-        ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
+        if (!ip->tot_len) {
+            ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
+        }
+        if (!ip->check) {
+            ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
+        }
     }
+
     if (layers->isTcp) {
-        if(!csumInfo.Transmit.TcpChecksum) {
-            innerChecksumVerified = TRUE;
-        } else {
+        if (mss) {
             innerPartialChecksum = TRUE;
+        } else {
+            if (!csumInfo.Transmit.TcpChecksum) {
+                innerChecksumVerified = TRUE;
+            } else {
+                innerPartialChecksum = TRUE;
+            }
         }
     } else if (layers->isUdp) {
         if(!csumInfo.Transmit.UdpChecksum) {
@@ -210,24 +239,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
         }
     }
 
-    curNbl = *newNbl;
-    curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
-    /* NB Chain should be split before */
-    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
-
-    innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
-    /*
-     * External port can't be removed as we hold the dispatch lock
-     * We also check if the external port was removed beforecalling
-     * port encapsulation functions
-     */
-    if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
-        OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate",
-                innerFrameLen, OvsGetExternalMtu(switchContext));
-        status = NDIS_STATUS_FAILURE;
-        goto ret_error;
-    }
-
     status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
     if (status != NDIS_STATUS_SUCCESS) {
         ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
@@ -301,33 +312,52 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
                                           IPPROTO_TCP, (uint16) tcpChksumLen);
     sttHdr->version = 0;
 
-    /* XXX need to peek into the inner packet, hard code for now */
-    sttHdr->flags = STT_PROTO_IPV4;
-    if (innerChecksumVerified) {
-        sttHdr->flags |= STT_CSUM_VERIFIED;
-    } else if (innerPartialChecksum) {
+    /* Set STT Header */
+    sttHdr->flags = 0;
+    if (innerPartialChecksum) {
         sttHdr->flags |= STT_CSUM_PARTIAL;
+        if (layers->isIPv4) {
+            sttHdr->flags |= STT_PROTO_IPV4;
+        }
+        if (layers->isTcp) {
+            sttHdr->flags |= STT_PROTO_TCP;
+        }
+        sttHdr->l4Offset = (UINT8) layers->l4Offset;
+        sttHdr->mss = (UINT16) htons(mss);
+    } else if (innerChecksumVerified) {
+        sttHdr->flags = STT_CSUM_VERIFIED;
+        sttHdr->l4Offset = 0;
+        sttHdr->mss = 0;
     }
-    sttHdr->l4Offset = 0;
 
     sttHdr->reserved = 0;
-    /* XXX Used for large TCP packets.Not sure how it is used, clarify */
-    sttHdr->mss = 0;
     sttHdr->vlanTCI = 0;
     sttHdr->key = tunKey->tunnelId;
     /* Zero out stt padding */
     *(uint16 *)(sttHdr + 1) = 0;
 
     /* Offload IP and TCP checksum */
+    ULONG tcpHeaderOffset = sizeof *outerEthHdr +
+                        outerIpHdr->ihl * 4;
     csumInfo.Value = 0;
     csumInfo.Transmit.IpHeaderChecksum = 1;
     csumInfo.Transmit.TcpChecksum = 1;
     csumInfo.Transmit.IsIPv4 = 1;
-    csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr +
-                                        outerIpHdr->ihl * 4;
+    csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
     NET_BUFFER_LIST_INFO(curNbl,
                          TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
 
+    UINT32 encapMss = ETH_DEFAULT_MTU - sizeof(IPHdr) - sizeof(TCPHdr);
+    if ((STT_HDR_LEN + innerFrameLen) > encapMss) {
+        lsoInfo.Value = 0;
+        lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
+        lsoInfo.LsoV2Transmit.MSS = encapMss;
+        lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+        lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
+        NET_BUFFER_LIST_INFO(curNbl,
+                             TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
+    }
+
     return STATUS_SUCCESS;
 
 ret_error:
@@ -338,16 +368,22 @@ ret_error:
 
 /*
  *----------------------------------------------------------------------------
- * OvsCalculateTCPChecksum
- *     Calculate TCP checksum
+ * OvsValidateTCPChecksum
+ *     Validate TCP checksum
  *----------------------------------------------------------------------------
  */
 static __inline NDIS_STATUS
-OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
+OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
 {
     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
     csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
                                           TcpIpChecksumNetBufferListInfo);
+
+    /* Check if NIC has indicated TCP checksum failure */
+    if (csumInfo.Receive.TcpChecksumFailed) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+
     UINT16 checkSum;
 
     /* Check if TCP Checksum has been calculated by NIC */
@@ -500,6 +536,164 @@ OvsSttDefragCleaner(PVOID data)
     PsTerminateSystemThread(STATUS_SUCCESS);
 }
 
+static OVS_STT_PKT_KEY
+OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
+{
+    OVS_STT_PKT_KEY key;
+    key.sAddr = ipHdr->saddr;
+    key.dAddr = ipHdr->daddr;
+    key.ackSeq = ntohl(tcpHdr->ack_seq);
+    return key;
+}
+
+UINT32
+OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
+{
+    UINT32 arr[3];
+    arr[0] = pktKey->ackSeq;
+    arr[1] = pktKey->dAddr;
+    arr[2] = pktKey->sAddr;
+    return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
+}
+
+VOID *
+OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
+{
+    PLIST_ENTRY link;
+    POVS_STT_PKT_ENTRY entry;
+
+    LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
+        entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
+        if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
+            entry->ovsPktKey.dAddr == pktKey->dAddr &&
+            entry->ovsPktKey.sAddr == pktKey->sAddr) {
+            return entry;
+        }
+    }
+    return NULL;
+}
+
+/*
+*
+--------------------------------------------------------------------------
+* OvsSttReassemble --
+*     Reassemble an LSO packet from multiple STT-Fragments.
+*
+--------------------------------------------------------------------------
+*/
+PNET_BUFFER_LIST
+OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
+                 PNET_BUFFER_LIST curNbl,
+                 IPHdr *ipHdr,
+                 TCPHdr *tcp,
+                 SttHdr *newSttHdr)
+{
+    UINT32 seq = ntohl(tcp->seq);
+    UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
+    UINT32 segOffset = STT_SEGMENT_OFF(seq);
+    UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
+    OVS_STT_PKT_ENTRY *pktFragEntry;
+    PNET_BUFFER_LIST targetPNbl = NULL;
+    BOOLEAN lastPacket = FALSE;
+    PNET_BUFFER sourceNb;
+    UINT32 fragmentLength;
+    SttHdr stt;
+    SttHdr *sttHdr = NULL;
+    sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+
+    /* XXX optimize this lock */
+    NdisAcquireSpinLock(&OvsSttSpinLock);
+
+    /* If this is the first fragment, copy the STT header */
+    if (segOffset == 0) {
+        sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
+        if (sttHdr == NULL) {
+            OVS_LOG_ERROR("Unable to retrieve STT header");
+            return NULL;
+        }
+
+        /* Advance to inner packet */
+        NdisAdvanceNetBufferDataStart(sourceNb, STT_HDR_LEN, FALSE, NULL);
+    }
+
+    /* Lookup fragment */
+    OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
+    UINT32 hash = OvsSttGetPktHash(&pktKey);
+    pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
+
+    if (pktFragEntry == NULL) {
+        /* Create a new Packet Entry */
+        POVS_STT_PKT_ENTRY entry;
+        entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
+                                         OVS_STT_POOL_TAG);
+        RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
+
+        /* Update Key, timestamp and recvdLen */
+        NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
+
+        fragmentLength = NET_BUFFER_DATA_LENGTH(sourceNb);
+        entry->recvdLen = fragmentLength;
+
+        UINT64 currentTime;
+        NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
+        entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
+
+        if (segOffset == 0) {
+            entry->sttHdr = *sttHdr;
+        }
+
+        /* Copy the data from Source to new buffer */
+        entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
+                                                    OVS_STT_POOL_TAG);
+        if (OvsGetPacketBytes(curNbl, fragmentLength,
+                              0, entry->packetBuf + offset) == NULL) {
+            OVS_LOG_ERROR("Error when obtaining bytes from Packet");
+            goto handle_error;
+        }
+
+        /* Insert the entry in the Static Buffer */
+        InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
+                       &entry->link);
+    } else {
+        fragmentLength = NET_BUFFER_DATA_LENGTH(sourceNb);
+        /* Add to recieved length to identify if this is the last fragment */
+        pktFragEntry->recvdLen += fragmentLength;
+        lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
+
+        if (segOffset == 0) {
+            pktFragEntry->sttHdr = *sttHdr;
+        }
+
+        /* Copy the fragment data from Source to existing buffer */
+        if (OvsGetPacketBytes(curNbl, fragmentLength, 0,
+                              pktFragEntry->packetBuf + offset) == NULL) {
+            OVS_LOG_ERROR("Error when obtaining bytes from Packet");
+            goto handle_error;
+        }
+    }
+
+handle_error:
+    if (segOffset == 0) {
+        /* Retreat the original NBL if it was advanced */
+        NdisRetreatNetBufferDataStart(sourceNb, STT_HDR_LEN, 0, NULL);
+    }
+
+    if (lastPacket) {
+        /* Retrieve the original STT header */
+        NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
+        targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
+                                              innerPacketLen);
+
+        /* Delete this entry and free up the memory/ */
+        RemoveEntryList(&pktFragEntry->link);
+        OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
+        OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
+    }
+
+    NdisReleaseSpinLock(&OvsSttSpinLock);
+    return lastPacket ? targetPNbl : NULL;
+}
+
 /*
  * --------------------------------------------------------------------------
  * OvsDecapStt --
@@ -513,34 +707,21 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
             PNET_BUFFER_LIST *newNbl)
 {
     NDIS_STATUS status = NDIS_STATUS_FAILURE;
-    PNET_BUFFER curNb;
+    PNET_BUFFER curNb, newNb;
     IPHdr *ipHdr;
     char *ipBuf[sizeof(IPHdr)];
+    SttHdr stt;
     SttHdr *sttHdr;
     char *sttBuf[STT_HDR_LEN];
     UINT32 advanceCnt, hdrLen;
-    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+    BOOLEAN isLsoPacket = FALSE;
+    PNET_BUFFER_LIST origNbl = curNbl;
 
     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
     ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
 
-    if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
-        OVS_LOG_ERROR("Packet length received is less than the tunnel header:"
-            " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize());
-        return NDIS_STATUS_INVALID_LENGTH;
-    }
-
-    /* Verify outer TCP Checksum */
-    csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
-                                          TcpIpChecksumNetBufferListInfo);
-
-    /* Check if NIC has indicated TCP checksum failure */
-    if (csumInfo.Receive.TcpChecksumFailed) {
-        return NDIS_STATUS_INVALID_PACKET;
-    }
-
-    /* Calculate the TCP Checksum */
-    status = OvsCalculateTCPChecksum(curNbl, curNb);
+    /* Validate the TCP Checksum */
+    status = OvsValidateTCPChecksum(curNbl, curNb);
     if (status != NDIS_STATUS_SUCCESS) {
         return status;
     }
@@ -554,34 +735,67 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
                                                     1 /*no align*/, 0);
     ASSERT(ipHdr);
 
+    TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
+
     /* Skip IP & TCP headers */
     hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
     NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
     advanceCnt += hdrLen;
 
-    /* STT Header */
-    sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
-                                                    1 /*no align*/, 0);
+    UINT32 seq = ntohl(tcp->seq);
+    UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
+    UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
+                        - (ipHdr->ihl * 4)
+                        - (sizeof * tcp);
+
+    /* Check if incoming packet requires reassembly */
+    if (totalLen != payloadLen) {
+        sttHdr = &stt;
+        PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
+                                                 ipHdr, tcp, sttHdr);
+        if (pNbl == NULL) {
+            /* Complete the current NBL; This is not the last packet*/
+            status = NDIS_STATUS_SUCCESS;
+            goto complete;
+        }
+
+        status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
+        curNbl = pNbl;
+        curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+        isLsoPacket = TRUE;
+    } else {
+        /* STT Header */
+        sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
+                                                        1 /*no align*/, 0);
+        /* Skip stt header, DataOffset points to inner pkt now. */
+        hdrLen = STT_HDR_LEN;
+        NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
+        advanceCnt += hdrLen;
+    }
     ASSERT(sttHdr);
 
     /* Initialize the tunnel key */
     tunKey->dst = ipHdr->daddr;
     tunKey->src = ipHdr->saddr;
     tunKey->tunnelId = sttHdr->key;
-    tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
+    tunKey->flags = OVS_TNL_F_KEY;
     tunKey->tos = ipHdr->tos;
     tunKey->ttl = ipHdr->ttl;
     tunKey->pad = 0;
 
-    /* Skip stt header, DataOffset points to inner pkt now. */
-    hdrLen = STT_HDR_LEN;
-    NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
-    advanceCnt += hdrLen;
+    *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
+                                0, FALSE /*copy NBL info*/);
+    if (*newNbl == NULL) {
+        OVS_LOG_ERROR("Unable to copy NBL");
+        return NDIS_STATUS_FAILURE;
+    }
+    newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+    BOOLEAN requiresLSO = sttHdr->mss != 0;
 
     /* Verify checksum for inner packet if it's required */
     if (!(sttHdr->flags & STT_CSUM_VERIFIED)) {
         BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL;
-        EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
+        EthHdr *eth = (EthHdr *)NdisGetDataBuffer(newNb, sizeof(EthHdr),
                                                   NULL, 1, 0);
 
         /* XXX Figure out a way to offload checksum receives */
@@ -597,14 +811,16 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
                                                   IPPROTO_TCP,
                                                   (UINT16)l4Payload);
                 }
-                tcp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+                if (!requiresLSO) {
+                    tcp->check = CalculateChecksumNB(newNb, l4Payload, offset);
+                }
             } else if (ip->protocol == IPPROTO_UDP) {
                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
                 if (!innerChecksumPartial){
                     udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
                                                   IPPROTO_UDP, l4Payload);
                 }
-                udp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+                udp->check = CalculateChecksumNB(newNb, l4Payload, offset);
             }
         } else if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) {
             IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth);
@@ -617,7 +833,9 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
                                                     (UINT32 *)&ip->daddr,
                                                     IPPROTO_TCP, totalLength);
                 }
-                tcp->check = CalculateChecksumNB(curNb, totalLength, offset);
+                if (!requiresLSO) {
+                    tcp->check = CalculateChecksumNB(newNb, totalLength, offset);
+                }
             }
             else if (ip->nexthdr == IPPROTO_UDP) {
                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
@@ -626,22 +844,43 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
                                                     (UINT32 *)&ip->daddr,
                                                     IPPROTO_UDP, totalLength);
                 }
-                udp->check = CalculateChecksumNB(curNb, totalLength, offset);
+                udp->check = CalculateChecksumNB(newNb, totalLength, offset);
             }
         }
 
-        NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+        NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
     }
 
-    *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE,
-                                0, FALSE /*copy NBL info*/);
+    if (requiresLSO) {
+        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
+        lsoInfo.Value = 0;
+        lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
+        lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - sizeof(IPHdr) - sizeof(TCPHdr);
+        lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+        if (sttHdr->flags & STT_PROTO_IPV4) {
+            lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
+        } else {
+            lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
+        }
+        NET_BUFFER_LIST_INFO(*newNbl,
+                                TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
+    }
 
-    ASSERT(advanceCnt == OvsGetSttTunHdrSize());
-    status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
+complete:
+    if (!isLsoPacket) {
+        /* Retreat the NBL if it's not LSO */
+        status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
+    } else {
+        /* Complete the new NBL and reset the original NBL */
+        OvsCompleteNBL(switchContext, curNbl, TRUE);
+        curNbl = origNbl;
+    }
 
     if (*newNbl == NULL) {
-        OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL");
-        status = NDIS_STATUS_RESOURCES;
+        if (status != NDIS_STATUS_SUCCESS) {
+           OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL");
+            status = NDIS_STATUS_RESOURCES;
+        }
     }
 
     return status;
-- 
1.9.5.msysgit.0




More information about the dev mailing list