[ovs-dev] [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP Segmentation offloads
Gurucharan Shetty
shettyg at nicira.com
Tue Oct 27 20:49:52 UTC 2015
> Acked-by: Nithin Raju <nithin at vmware.com>
Series applied.
>
> -----Original Message-----
> From: Sairam Venugopal <vsairam at vmware.com>
> Date: Tuesday, October 27, 2015 at 10:20 AM
> To: Nithin Raju <nithin at vmware.com>
> Subject: Fw: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP
> Segmentation offloads
>
>>
>>________________________________________
>>From: Sairam Venugopal <vsairam at vmware.com>
>>Sent: Monday, October 26, 2015 4:48 PM
>>To: dev at openvswitch.org
>>Cc: Sairam Venugopal
>>Subject: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP
>>Segmentation offloads
>>
>>Add support to STT - Encap and Decap functions to reassemble the packet
>>fragments. Also add support to offload the packet to NDIS.
>>
>>Signed-off-by: Sairam Venugopal <vsairam at vmware.com>
>>---
>> datapath-windows/ovsext/Actions.c | 40 ++--
>> datapath-windows/ovsext/Stt.c | 398
>>+++++++++++++++++++++++++++++---------
>> 2 files changed, 329 insertions(+), 109 deletions(-)
>>
>>diff --git a/datapath-windows/ovsext/Actions.c
>>b/datapath-windows/ovsext/Actions.c
>>index b4644a7..ce592b3 100644
>>--- a/datapath-windows/ovsext/Actions.c
>>+++ b/datapath-windows/ovsext/Actions.c
>>@@ -594,7 +594,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
>> InitializeListHead(&missedPackets);
>> status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS,
>>vport,
>> &key,ovsFwdCtx->curNbl,
>>- ovsFwdCtx->tunnelRxNic != NULL,
>>&ovsFwdCtx->layers,
>>+ FALSE, &ovsFwdCtx->layers,
>> ovsFwdCtx->switchContext, &missedPackets,
>>&num);
>> if (num) {
>> OvsQueuePackets(&missedPackets, num);
>>@@ -709,6 +709,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>> NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>> PNET_BUFFER_LIST newNbl = NULL;
>> POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
>>+ PCWSTR dropReason = L"OVS-dropped due to new decap packet";
>>
>> if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
>> != NDIS_STATUS_SUCCESS) {
>>@@ -730,6 +731,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>> case OVS_VPORT_TYPE_STT:
>> status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
>> &ovsFwdCtx->tunKey, &newNbl);
>>+ if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
>>+ /* This was an STT-LSO Fragment */
>>+ dropReason = L"OVS-STT segment is cached";
>>+ }
>> break;
>> default:
>> OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
>>@@ -747,25 +752,26 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
>> * tunnelRxNic and other fields will be cleared, re-init the context
>> * before usage.
>> */
>>- OvsCompleteNBLForwardingCtx(ovsFwdCtx,
>>- L"OVS-dropped due to new decap packet");
>>+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
>>
>>- /* Decapsulated packet is in a new NBL */
>>- ovsFwdCtx->tunnelRxNic = tunnelRxVport;
>>- OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
>>- newNbl, tunnelRxVport->portNo, 0,
>>-
>>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
>>- ovsFwdCtx->completionList,
>>- &ovsFwdCtx->layers, FALSE);
>>+ if (newNbl) {
>>+ /* Decapsulated packet is in a new NBL */
>>+ ovsFwdCtx->tunnelRxNic = tunnelRxVport;
>>+ OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
>>+ newNbl, tunnelRxVport->portNo, 0,
>>+
>>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
>>+ ovsFwdCtx->completionList,
>>+ &ovsFwdCtx->layers, FALSE);
>>
>>- /*
>>- * Set the NBL's SourcePortId and SourceNicIndex to default values to
>>- * keep NDIS happy when we forward the packet.
>>- */
>>- ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
>>- ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
>>+ /*
>>+ * Set the NBL's SourcePortId and SourceNicIndex to default
>>values to
>>+ * keep NDIS happy when we forward the packet.
>>+ */
>>+ ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
>>+ ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
>>
>>- status = OvsDoFlowLookupOutput(ovsFwdCtx);
>>+ status = OvsDoFlowLookupOutput(ovsFwdCtx);
>>+ }
>> ASSERT(ovsFwdCtx->curNbl == NULL);
>> OvsClearTunRxCtx(ovsFwdCtx);
>>
>>diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
>>index b78ef95..ef44d23 100644
>>--- a/datapath-windows/ovsext/Stt.c
>>+++ b/datapath-windows/ovsext/Stt.c
>>@@ -34,6 +34,7 @@
>> #endif
>> #define OVS_DBG_MOD OVS_DBG_STT
>> #include "Debug.h"
>>+#include "Jhash.h"
>>
>> KSTART_ROUTINE OvsSttDefragCleaner;
>> static PLIST_ENTRY OvsSttPktFragHash;
>>@@ -152,8 +153,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>> UINT32 headRoom = OvsGetSttTunHdrSize();
>> UINT32 tcpChksumLen;
>> PUINT8 bufferStart;
>>-
>>- UNREFERENCED_PARAMETER(layers);
>>+ ULONG mss = 0;
>>+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>
>> curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>
>>@@ -162,14 +163,20 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>> BOOLEAN innerPartialChecksum = FALSE;
>>
>> if (layers->isTcp) {
>>- NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>-
>> lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>> TcpLargeSendNetBufferListInfo);
>>- if (lsoInfo.LsoV1Transmit.MSS) {
>>- /* XXX We don't handle LSO yet */
>>- OVS_LOG_ERROR("LSO on STT is not supported");
>>- return NDIS_STATUS_FAILURE;
>>+
>>+ switch (lsoInfo.Transmit.Type) {
>>+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
>>+ mss = lsoInfo.LsoV1Transmit.MSS;
>>+ break;
>>+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
>>+ mss = lsoInfo.LsoV2Transmit.MSS;
>>+ break;
>>+ default:
>>+ OVS_LOG_ERROR("Unknown LSO transmit type:%d",
>>+ lsoInfo.Transmit.Type);
>>+ return NDIS_STATUS_FAILURE;
>> }
>> }
>>
>>@@ -186,21 +193,36 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>> return NDIS_STATUS_FAILURE;
>> }
>>
>>- curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
>>+ curNbl = *newNbl;
>>+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>> curMdl = NET_BUFFER_CURRENT_MDL(curNb);
>>+ /* NB Chain should be split before */
>>+ ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>+ innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
>>+
>> bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
>> LowPagePriority);
>> bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
>>
>>- if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) {
>>+ if (layers->isIPv4) {
>> IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
>>- ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
>>+ if (!ip->tot_len) {
>>+ ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
>>+ }
>>+ if (!ip->check) {
>>+ ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
>>+ }
>> }
>>+
>> if (layers->isTcp) {
>>- if(!csumInfo.Transmit.TcpChecksum) {
>>- innerChecksumVerified = TRUE;
>>- } else {
>>+ if (mss) {
>> innerPartialChecksum = TRUE;
>>+ } else {
>>+ if (!csumInfo.Transmit.TcpChecksum) {
>>+ innerChecksumVerified = TRUE;
>>+ } else {
>>+ innerPartialChecksum = TRUE;
>>+ }
>> }
>> } else if (layers->isUdp) {
>> if(!csumInfo.Transmit.UdpChecksum) {
>>@@ -210,24 +232,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>> }
>> }
>>
>>- curNbl = *newNbl;
>>- curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>- /* NB Chain should be split before */
>>- ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>-
>>- innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
>>- /*
>>- * External port can't be removed as we hold the dispatch lock
>>- * We also check if the external port was removed beforecalling
>>- * port encapsulation functions
>>- */
>>- if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
>>- OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't
>>encapsulate",
>>- innerFrameLen, OvsGetExternalMtu(switchContext));
>>- status = NDIS_STATUS_FAILURE;
>>- goto ret_error;
>>- }
>>-
>> status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
>> if (status != NDIS_STATUS_SUCCESS) {
>> ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
>>@@ -301,33 +305,52 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
>> IPPROTO_TCP, (uint16)
>>tcpChksumLen);
>> sttHdr->version = 0;
>>
>>- /* XXX need to peek into the inner packet, hard code for now */
>>- sttHdr->flags = STT_PROTO_IPV4;
>>- if (innerChecksumVerified) {
>>- sttHdr->flags |= STT_CSUM_VERIFIED;
>>- } else if (innerPartialChecksum) {
>>+ /* Set STT Header */
>>+ sttHdr->flags = 0;
>>+ if (innerPartialChecksum) {
>> sttHdr->flags |= STT_CSUM_PARTIAL;
>>+ if (layers->isIPv4) {
>>+ sttHdr->flags |= STT_PROTO_IPV4;
>>+ }
>>+ if (layers->isTcp) {
>>+ sttHdr->flags |= STT_PROTO_TCP;
>>+ }
>>+ sttHdr->l4Offset = (UINT8) layers->l4Offset;
>>+ sttHdr->mss = (UINT16) htons(mss);
>>+ } else if (innerChecksumVerified) {
>>+ sttHdr->flags = STT_CSUM_VERIFIED;
>>+ sttHdr->l4Offset = 0;
>>+ sttHdr->mss = 0;
>> }
>>- sttHdr->l4Offset = 0;
>>
>> sttHdr->reserved = 0;
>>- /* XXX Used for large TCP packets.Not sure how it is used, clarify */
>>- sttHdr->mss = 0;
>> sttHdr->vlanTCI = 0;
>> sttHdr->key = tunKey->tunnelId;
>> /* Zero out stt padding */
>> *(uint16 *)(sttHdr + 1) = 0;
>>
>> /* Offload IP and TCP checksum */
>>+ ULONG tcpHeaderOffset = sizeof *outerEthHdr +
>>+ outerIpHdr->ihl * 4;
>> csumInfo.Value = 0;
>> csumInfo.Transmit.IpHeaderChecksum = 1;
>> csumInfo.Transmit.TcpChecksum = 1;
>> csumInfo.Transmit.IsIPv4 = 1;
>>- csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr +
>>- outerIpHdr->ihl * 4;
>>+ csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
>> NET_BUFFER_LIST_INFO(curNbl,
>> TcpIpChecksumNetBufferListInfo) =
>>csumInfo.Value;
>>
>>+ UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) -
>>sizeof(TCPHdr);
>>+ if (ipTotalLen > encapMss) {
>>+ lsoInfo.Value = 0;
>>+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
>>+ lsoInfo.LsoV2Transmit.MSS = encapMss;
>>+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
>>+ lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
>>+ NET_BUFFER_LIST_INFO(curNbl,
>>+ TcpLargeSendNetBufferListInfo) =
>>lsoInfo.Value;
>>+ }
>>+
>> return STATUS_SUCCESS;
>>
>> ret_error:
>>@@ -338,16 +361,22 @@ ret_error:
>>
>> /*
>>
>>*-------------------------------------------------------------------------
>>---
>>- * OvsCalculateTCPChecksum
>>- * Calculate TCP checksum
>>+ * OvsValidateTCPChecksum
>>+ * Validate TCP checksum
>>
>>*-------------------------------------------------------------------------
>>---
>> */
>> static __inline NDIS_STATUS
>>-OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
>>+OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
>> {
>> NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
>> csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>>
>>TcpIpChecksumNetBufferListInfo);
>>+
>>+ /* Check if NIC has indicated TCP checksum failure */
>>+ if (csumInfo.Receive.TcpChecksumFailed) {
>>+ return NDIS_STATUS_INVALID_PACKET;
>>+ }
>>+
>> UINT16 checkSum;
>>
>> /* Check if TCP Checksum has been calculated by NIC */
>>@@ -399,10 +428,9 @@ OvsInitSttDefragmentation()
>> NdisAllocateSpinLock(&OvsSttSpinLock);
>>
>> /* Init the Hash Buffer */
>>- OvsSttPktFragHash = (PLIST_ENTRY) OvsAllocateMemoryWithTag(
>>- sizeof(LIST_ENTRY)
>>- * STT_HASH_TABLE_SIZE,
>>- OVS_STT_POOL_TAG);
>>+ OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
>>+ * STT_HASH_TABLE_SIZE,
>>+ OVS_STT_POOL_TAG);
>> if (OvsSttPktFragHash == NULL) {
>> NdisFreeSpinLock(&OvsSttSpinLock);
>> return STATUS_INSUFFICIENT_RESOURCES;
>>@@ -487,6 +515,7 @@ OvsSttDefragCleaner(PVOID data)
>> entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
>> if (entry->timeout < currentTime) {
>> RemoveEntryList(&entry->link);
>>+ OvsFreeMemoryWithTag(entry->packetBuf,
>>OVS_STT_POOL_TAG);
>> OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
>> }
>> }
>>@@ -500,6 +529,158 @@ OvsSttDefragCleaner(PVOID data)
>> PsTerminateSystemThread(STATUS_SUCCESS);
>> }
>>
>>+static OVS_STT_PKT_KEY
>>+OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
>>+{
>>+ OVS_STT_PKT_KEY key;
>>+ key.sAddr = ipHdr->saddr;
>>+ key.dAddr = ipHdr->daddr;
>>+ key.ackSeq = ntohl(tcpHdr->ack_seq);
>>+ return key;
>>+}
>>+
>>+static UINT32
>>+OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
>>+{
>>+ UINT32 arr[3];
>>+ arr[0] = pktKey->ackSeq;
>>+ arr[1] = pktKey->dAddr;
>>+ arr[2] = pktKey->sAddr;
>>+ return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
>>+}
>>+
>>+static VOID *
>>+OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
>>+{
>>+ PLIST_ENTRY link;
>>+ POVS_STT_PKT_ENTRY entry;
>>+
>>+ LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
>>+ entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
>>+ if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
>>+ entry->ovsPktKey.dAddr == pktKey->dAddr &&
>>+ entry->ovsPktKey.sAddr == pktKey->sAddr) {
>>+ return entry;
>>+ }
>>+ }
>>+ return NULL;
>>+}
>>+
>>+/*
>>+*
>>+-------------------------------------------------------------------------
>>-
>>+* OvsSttReassemble --
>>+* Reassemble an LSO packet from multiple STT-Fragments.
>>+*
>>+-------------------------------------------------------------------------
>>-
>>+*/
>>+PNET_BUFFER_LIST
>>+OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
>>+ PNET_BUFFER_LIST curNbl,
>>+ IPHdr *ipHdr,
>>+ TCPHdr *tcp,
>>+ SttHdr *newSttHdr,
>>+ UINT16 payloadLen)
>>+{
>>+ UINT32 seq = ntohl(tcp->seq);
>>+ UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
>>+ UINT32 segOffset = STT_SEGMENT_OFF(seq);
>>+ UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
>>+ UINT32 startOffset = 0;
>>+ OVS_STT_PKT_ENTRY *pktFragEntry;
>>+ PNET_BUFFER_LIST targetPNbl = NULL;
>>+ BOOLEAN lastPacket = FALSE;
>>+ PNET_BUFFER sourceNb;
>>+ UINT32 fragmentLength = payloadLen;
>>+ SttHdr stt;
>>+ SttHdr *sttHdr = NULL;
>>+ sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>>+
>>+ /* XXX optimize this lock */
>>+ NdisAcquireSpinLock(&OvsSttSpinLock);
>>+
>>+ /* If this is the first fragment, copy the STT header */
>>+ if (segOffset == 0) {
>>+ sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
>>+ if (sttHdr == NULL) {
>>+ OVS_LOG_ERROR("Unable to retrieve STT header");
>>+ return NULL;
>>+ }
>>+ fragmentLength = fragmentLength - STT_HDR_LEN;
>>+ startOffset = startOffset + STT_HDR_LEN;
>>+ }
>>+
>>+ /* Lookup fragment */
>>+ OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
>>+ UINT32 hash = OvsSttGetPktHash(&pktKey);
>>+ pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
>>+
>>+ if (pktFragEntry == NULL) {
>>+ /* Create a new Packet Entry */
>>+ POVS_STT_PKT_ENTRY entry;
>>+ entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
>>+ OVS_STT_POOL_TAG);
>>+ RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
>>+
>>+ /* Update Key, timestamp and recvdLen */
>>+ NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof
>>(OVS_STT_PKT_KEY));
>>+
>>+ entry->recvdLen = fragmentLength;
>>+
>>+ UINT64 currentTime;
>>+ NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
>>+ entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
>>+
>>+ if (segOffset == 0) {
>>+ entry->sttHdr = *sttHdr;
>>+ }
>>+
>>+ /* Copy the data from Source to new buffer */
>>+ entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
>>+ OVS_STT_POOL_TAG);
>>+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
>>+ entry->packetBuf + offset) == NULL) {
>>+ OVS_LOG_ERROR("Error when obtaining bytes from Packet");
>>+ goto handle_error;
>>+ }
>>+
>>+ /* Insert the entry in the Static Buffer */
>>+ InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
>>+ &entry->link);
>>+ } else {
>>+ /* Add to recieved length to identify if this is the last
>>fragment */
>>+ pktFragEntry->recvdLen += fragmentLength;
>>+ lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
>>+
>>+ if (segOffset == 0) {
>>+ pktFragEntry->sttHdr = *sttHdr;
>>+ }
>>+
>>+ /* Copy the fragment data from Source to existing buffer */
>>+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
>>+ pktFragEntry->packetBuf + offset) == NULL)
>>{
>>+ OVS_LOG_ERROR("Error when obtaining bytes from Packet");
>>+ goto handle_error;
>>+ }
>>+ }
>>+
>>+handle_error:
>>+ if (lastPacket) {
>>+ /* Retrieve the original STT header */
>>+ NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof
>>(SttHdr));
>>+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext,
>>pktFragEntry->packetBuf,
>>+ innerPacketLen);
>>+
>>+ /* Delete this entry and free up the memory/ */
>>+ RemoveEntryList(&pktFragEntry->link);
>>+ OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
>>+ OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
>>+ }
>>+
>>+ NdisReleaseSpinLock(&OvsSttSpinLock);
>>+ return lastPacket ? targetPNbl : NULL;
>>+}
>>+
>> /*
>> *
>>--------------------------------------------------------------------------
>> * OvsDecapStt --
>>@@ -513,34 +694,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>> PNET_BUFFER_LIST *newNbl)
>> {
>> NDIS_STATUS status = NDIS_STATUS_FAILURE;
>>- PNET_BUFFER curNb;
>>+ PNET_BUFFER curNb, newNb;
>> IPHdr *ipHdr;
>> char *ipBuf[sizeof(IPHdr)];
>>+ SttHdr stt;
>> SttHdr *sttHdr;
>> char *sttBuf[STT_HDR_LEN];
>> UINT32 advanceCnt, hdrLen;
>>- NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
>>+ BOOLEAN isLsoPacket = FALSE;
>>
>> curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>> ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
>>
>>- if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
>>- OVS_LOG_ERROR("Packet length received is less than the tunnel
>>header:"
>>- " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb),
>>OvsGetSttTunHdrSize());
>>- return NDIS_STATUS_INVALID_LENGTH;
>>- }
>>-
>>- /* Verify outer TCP Checksum */
>>- csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
>>-
>>TcpIpChecksumNetBufferListInfo);
>>-
>>- /* Check if NIC has indicated TCP checksum failure */
>>- if (csumInfo.Receive.TcpChecksumFailed) {
>>- return NDIS_STATUS_INVALID_PACKET;
>>- }
>>-
>>- /* Calculate the TCP Checksum */
>>- status = OvsCalculateTCPChecksum(curNbl, curNb);
>>+ /* Validate the TCP Checksum */
>>+ status = OvsValidateTCPChecksum(curNbl, curNb);
>> if (status != NDIS_STATUS_SUCCESS) {
>> return status;
>> }
>>@@ -554,34 +721,73 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>> 1 /*no align*/, 0);
>> ASSERT(ipHdr);
>>
>>+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
>>+
>> /* Skip IP & TCP headers */
>> hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
>> NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>> advanceCnt += hdrLen;
>>
>>- /* STT Header */
>>- sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
>>- 1 /*no align*/, 0);
>>+ UINT32 seq = ntohl(tcp->seq);
>>+ UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
>>+ UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
>>+ - (ipHdr->ihl * 4)
>>+ - (sizeof * tcp);
>>+
>>+ /* Check if incoming packet requires reassembly */
>>+ if (totalLen != payloadLen) {
>>+ sttHdr = &stt;
>>+ PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
>>+ ipHdr, tcp, sttHdr,
>>+ payloadLen);
>>+ if (pNbl == NULL) {
>>+ return NDIS_STATUS_SUCCESS;
>>+ }
>>+
>>+ *newNbl = pNbl;
>>+ isLsoPacket = TRUE;
>>+ } else {
>>+ /* STT Header */
>>+ sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
>>+ (PVOID) &sttBuf, 1 /*no align*/, 0);
>>+ /* Skip stt header, DataOffset points to inner pkt now. */
>>+ hdrLen = STT_HDR_LEN;
>>+ NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>>+ advanceCnt += hdrLen;
>>+
>>+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
>>+ 0, FALSE /*copy NBL info*/);
>>+ }
>>+
>>+ if (*newNbl == NULL) {
>>+ OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
>>+ return NDIS_STATUS_RESOURCES;
>>+ }
>>+
>>+ status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
>>+ if (status != NDIS_STATUS_SUCCESS) {
>>+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
>>+ return NDIS_STATUS_FAILURE;
>>+ }
>>+ newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
>>+
>> ASSERT(sttHdr);
>>
>> /* Initialize the tunnel key */
>> tunKey->dst = ipHdr->daddr;
>> tunKey->src = ipHdr->saddr;
>> tunKey->tunnelId = sttHdr->key;
>>- tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
>>+ tunKey->flags = OVS_TNL_F_KEY;
>> tunKey->tos = ipHdr->tos;
>> tunKey->ttl = ipHdr->ttl;
>> tunKey->pad = 0;
>>
>>- /* Skip stt header, DataOffset points to inner pkt now. */
>>- hdrLen = STT_HDR_LEN;
>>- NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
>>- advanceCnt += hdrLen;
>>+ BOOLEAN requiresLSO = sttHdr->mss != 0;
>>
>> /* Verify checksum for inner packet if it's required */
>> if (!(sttHdr->flags & STT_CSUM_VERIFIED)) {
>> BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL;
>>- EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
>>+ EthHdr *eth = (EthHdr *)NdisGetDataBuffer(newNb, sizeof(EthHdr),
>> NULL, 1, 0);
>>
>> /* XXX Figure out a way to offload checksum receives */
>>@@ -597,14 +803,16 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>> IPPROTO_TCP,
>> (UINT16)l4Payload);
>> }
>>- tcp->check = CalculateChecksumNB(curNb, l4Payload,
>>offset);
>>+ if (!requiresLSO) {
>>+ tcp->check = CalculateChecksumNB(newNb, l4Payload,
>>offset);
>>+ }
>> } else if (ip->protocol == IPPROTO_UDP) {
>> UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
>> if (!innerChecksumPartial){
>> udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
>> IPPROTO_UDP,
>>l4Payload);
>> }
>>- udp->check = CalculateChecksumNB(curNb, l4Payload,
>>offset);
>>+ udp->check = CalculateChecksumNB(newNb, l4Payload,
>>offset);
>> }
>> } else if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) {
>> IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth);
>>@@ -617,7 +825,9 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>> (UINT32 *)&ip->daddr,
>> IPPROTO_TCP,
>>totalLength);
>> }
>>- tcp->check = CalculateChecksumNB(curNb, totalLength,
>>offset);
>>+ if (!requiresLSO) {
>>+ tcp->check = CalculateChecksumNB(newNb, totalLength,
>>offset);
>>+ }
>> }
>> else if (ip->nexthdr == IPPROTO_UDP) {
>> UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
>>@@ -626,23 +836,27 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
>> (UINT32 *)&ip->daddr,
>> IPPROTO_UDP,
>>totalLength);
>> }
>>- udp->check = CalculateChecksumNB(curNb, totalLength,
>>offset);
>>+ udp->check = CalculateChecksumNB(newNb, totalLength,
>>offset);
>> }
>> }
>>
>>- NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
>>+ NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) =
>>0;
>> }
>>
>>- *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
>>OVS_DEFAULT_COPY_SIZE,
>>- 0, FALSE /*copy NBL info*/);
>>-
>>- ASSERT(advanceCnt == OvsGetSttTunHdrSize());
>>- status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
>>-
>>- if (*newNbl == NULL) {
>>- OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned
>>NBL");
>>- status = NDIS_STATUS_RESOURCES;
>>+ if (requiresLSO) {
>>+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
>>+ lsoInfo.Value = 0;
>>+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
>>+ lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - sizeof(IPHdr) -
>>sizeof(TCPHdr);
>>+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
>>+ if (sttHdr->flags & STT_PROTO_IPV4) {
>>+ lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
>>+ } else {
>>+ lsoInfo.LsoV2Transmit.IPVersion =
>>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
>>+ }
>>+ NET_BUFFER_LIST_INFO(*newNbl,
>>+ TcpLargeSendNetBufferListInfo) =
>>lsoInfo.Value;
>> }
>>
>>- return status;
>>+ return NDIS_STATUS_SUCCESS;
>> }
>>--
>>1.9.5.msysgit.0
>>
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list