[ovs-dev] [PATCH V2 4/4] datapath-windows: Add ECN support on STT decapsulation
Sairam Venugopal
vsairam at vmware.com
Mon Jun 6 17:27:08 UTC 2016
Thanks for incorporating the changes.
Acked-by: Sairam Venugopal <vsairam at vmware.com>
On 6/6/16, 9:45 AM, "Paul Boca" <pboca at cloudbasesolutions.com> wrote:
>Signed-off-by: Paul-Daniel Boca <pboca at cloudbasesolutions.com>
>---
>V2: Removed previously added OvsGetEthHdr and use OveExtractLayers instead
> on STT decapsulation. Extracted layers will be used in SetOffloads if
>needed.
>---
> datapath-windows/ovsext/NetProto.h | 10 ++++-
> datapath-windows/ovsext/Stt.c | 85
>+++++++++++++++++++++++++++++++++-----
> datapath-windows/ovsext/Stt.h | 1 +
> 3 files changed, 85 insertions(+), 11 deletions(-)
>
>diff --git a/datapath-windows/ovsext/NetProto.h
>b/datapath-windows/ovsext/NetProto.h
>index f7527f8..6cf6d8e 100644
>--- a/datapath-windows/ovsext/NetProto.h
>+++ b/datapath-windows/ovsext/NetProto.h
>@@ -45,6 +45,8 @@ typedef struct EthHdr {
> #define ICMP_CSUM_OFFSET 2
> #define INET_CSUM_LENGTH (sizeof(UINT16))
>
>+#define PACKET_MAX_LENGTH 64*1024 // 64K
>+
> #define IP4_UNITS_TO_BYTES(x) ((x) << 2)
> #define IP4_BYTES_TO_UNITS(x) ((x) >> 2)
>
>@@ -245,7 +247,13 @@ typedef union _OVS_PACKET_HDR_INFO {
> typedef struct IPHdr {
> UINT8 ihl:4,
> version:4;
>- UINT8 tos;
>+ union {
>+ struct {
>+ UINT8 ecn:2,
>+ dscp:6;
>+ };
>+ UINT8 tos;
>+ };
> UINT16 tot_len;
> UINT16 id;
> UINT16 frag_off;
>diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
>index fd9b32b..0bac5f2 100644
>--- a/datapath-windows/ovsext/Stt.c
>+++ b/datapath-windows/ovsext/Stt.c
>@@ -647,6 +647,9 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
> NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof
>(OVS_STT_PKT_KEY));
>
> entry->recvdLen = fragmentLength;
>+ if (ipHdr->ecn == IP_ECN_CE) {
>+ entry->ecn = IP_ECN_CE;
>+ }
>
> UINT64 currentTime;
> NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
>@@ -681,6 +684,9 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
> if (segOffset == 0) {
> pktFragEntry->sttHdr = *sttHdr;
> }
>+ if (ipHdr->ecn == IP_ECN_CE) {
>+ pktFragEntry->ecn = IP_ECN_CE;
>+ }
>
> /* Copy the fragment data from Source to existing buffer */
> if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
>@@ -692,6 +698,14 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
>
> handle_error:
> if (lastPacket) {
>+ /* It is RECOMMENDED that if any segment of the received STT
>+ * frame has the CE (congestion experienced) bit set
>+ * in its IP header, then the CE bit SHOULD be set in the IP
>+ * header of the decapsulated STT frame.*/
>+ if (pktFragEntry->ecn == IP_ECN_CE) {
>+ ipHdr->ecn = IP_ECN_CE;
>+ }
>+
> /* Retrieve the original STT header */
> NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof
>(SttHdr));
> targetPNbl = OvsAllocateNBLFromBuffer(switchContext,
>@@ -723,7 +737,9 @@ handle_error:
>
>*-------------------------------------------------------------------------
>---
> */
> NDIS_STATUS
>-OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
>+OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl,
>+ SttHdr *sttHdr,
>+ OVS_PACKET_HDR_INFO *layers)
> {
> if ((sttHdr->flags & STT_CSUM_VERIFIED)
> || !(sttHdr->flags & STT_CSUM_PARTIAL)) {
>@@ -767,11 +783,13 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl,
>SttHdr *sttHdr)
> PMDL curMdl = NULL;
> PNET_BUFFER curNb;
> PUINT8 buf = NULL;
>- OVS_PACKET_HDR_INFO layers;
>
>- status = OvsExtractLayers(*curNbl, &layers);
>- if (status != NDIS_STATUS_SUCCESS) {
>- return status;
>+ // if layers not initialized by the caller we extract layers here
>+ if (layers->value == 0) {
>+ status = OvsExtractLayers(*curNbl, layers);
>+ if (status != NDIS_STATUS_SUCCESS) {
>+ return status;
>+ }
> }
>
> curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
>@@ -786,8 +804,8 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr
>*sttHdr)
> IPHdr *ipHdr;
> TCPHdr *tcpHdr;
>
>- ipHdr = (IPHdr *)(buf + layers.l3Offset);
>- tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
>+ ipHdr = (IPHdr *)(buf + layers->l3Offset);
>+ tcpHdr = (TCPHdr *)(buf + layers->l4Offset);
>
> tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr,
> (uint32 *)&ipHdr->daddr,
>@@ -796,8 +814,8 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr
>*sttHdr)
> IPv6Hdr *ipHdr;
> TCPHdr *tcpHdr;
>
>- ipHdr = (IPv6Hdr *)(buf + layers.l3Offset);
>- tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
>+ ipHdr = (IPv6Hdr *)(buf + layers->l3Offset);
>+ tcpHdr = (TCPHdr *)(buf + layers->l4Offset);
>
> tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr,
> (UINT32*)&ipHdr->daddr,
>@@ -919,6 +937,53 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
> tunKey->ttl = ipHdr->ttl;
> tunKey->pad = 0;
>
>+ /* Handle ECN */
>+ OVS_PACKET_HDR_INFO layers = {0};
>+ if (0 != ipHdr->tos) {
>+ status = OvsExtractLayers(*newNbl, &layers);
>+ if (status != NDIS_STATUS_SUCCESS) {
>+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
>+ return NDIS_STATUS_FAILURE;
>+ }
>+
>+ if (layers.isIPv4) {
>+ IPHdr ip_storage;
>+ IPHdr *innerIpHdr;
>+
>+ /*
>+ * If CE is set for outer IP header, reset ECN of inner IP
>+ * header to CE, all other values are kept the same
>+ */
>+ innerIpHdr = (IPHdr*)OvsGetIp(*newNbl,
>+ layers.l3Offset,
>+ &ip_storage);
>+ if (innerIpHdr) {
>+ if (ipHdr->ecn == IP_ECN_CE) {
>+ innerIpHdr->ecn |= IP_ECN_CE;
>+ }
>+ /* copy DSCP from outer header to inner header */
>+ innerIpHdr->dscp = ipHdr->dscp;
>+ /* fix IP checksum */
>+ innerIpHdr->check = IPChecksum((UINT8 *)innerIpHdr,
>+ innerIpHdr->ihl * 4, 0);
>+ }
>+ } else if (layers.isIPv6) {
>+ IPv6Hdr ipv6_storage;
>+ IPv6Hdr *innerIpv6Hdr = (IPv6Hdr*)OvsGetPacketBytes(
>+ *newNbl,
>+ sizeof
>*innerIpv6Hdr,
>+ layers.l3Offset,
>+ &ipv6_storage);
>+ if (innerIpv6Hdr) {
>+ /* copy ECN and DSCN to inner header */
>+ innerIpv6Hdr->priority = ipHdr->ecn
>+ | ((innerIpv6Hdr->flow_lbl[0] & 0x3)
><< 2);
>+ innerIpv6Hdr->flow_lbl[0] = (innerIpv6Hdr->flow_lbl[0] &
>0xF)
>+ | ((ipHdr->tos & 0xF) << 4);
>+ }
>+ }
>+ }
>+
> /* Apply VLAN tag if present */
> if (ntohs(sttHdr->vlanTCI) & OVSWIN_VLAN_CFI) {
> NDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
>@@ -930,7 +995,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
> }
>
> /* Set Checksum and LSO offload flags */
>- OvsDecapSetOffloads(newNbl, sttHdr);
>+ OvsDecapSetOffloads(newNbl, sttHdr, &layers);
>
> return NDIS_STATUS_SUCCESS;
> }
>diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h
>index faa00d7..1b7e797 100644
>--- a/datapath-windows/ovsext/Stt.h
>+++ b/datapath-windows/ovsext/Stt.h
>@@ -69,6 +69,7 @@ typedef struct _OVS_STT_PKT_ENTRY {
> UINT64 timeout;
> UINT32 recvdLen;
> UINT32 allocatedLen;
>+ UINT8 ecn;
> SttHdr sttHdr;
> PCHAR packetBuf;
> LIST_ENTRY link;
>--
>2.7.2.windows.1
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma
>n_listinfo_dev&d=CwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dc
>ruz40PROJ40ROzSpxyQSLw6fcrOWpJgEcEmNR3JEQ&m=5aipE_WvdmGVbK_DaS1yi5E0knrbZ3
>cAPaJXUO-mYIU&s=RnhWpIhKQ2RCAVJph-zYBlpNww2bXUQ_pZRTk4qGFo4&e=
More information about the dev
mailing list