[ovs-dev] [PATCH V2 4/4] datapath-windows: Add ECN support on STT decapsulation

Paul Boca pboca at cloudbasesolutions.com
Mon Jun 6 16:45:06 UTC 2016


Signed-off-by: Paul-Daniel Boca <pboca at cloudbasesolutions.com>
---
V2: Removed previously added OvsGetEthHdr and use OveExtractLayers instead
    on STT decapsulation. Extracted layers will be used in SetOffloads if needed.
---
 datapath-windows/ovsext/NetProto.h | 10 ++++-
 datapath-windows/ovsext/Stt.c      | 85 +++++++++++++++++++++++++++++++++-----
 datapath-windows/ovsext/Stt.h      |  1 +
 3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/datapath-windows/ovsext/NetProto.h b/datapath-windows/ovsext/NetProto.h
index f7527f8..6cf6d8e 100644
--- a/datapath-windows/ovsext/NetProto.h
+++ b/datapath-windows/ovsext/NetProto.h
@@ -45,6 +45,8 @@ typedef struct EthHdr {
 #define ICMP_CSUM_OFFSET       2
 #define INET_CSUM_LENGTH       (sizeof(UINT16))
 
+#define PACKET_MAX_LENGTH      64*1024 // 64K
+
 #define IP4_UNITS_TO_BYTES(x) ((x) << 2)
 #define IP4_BYTES_TO_UNITS(x) ((x) >> 2)
 
@@ -245,7 +247,13 @@ typedef union _OVS_PACKET_HDR_INFO {
 typedef struct IPHdr {
    UINT8    ihl:4,
             version:4;
-   UINT8    tos;
+   union {
+       struct {
+           UINT8 ecn:2,
+                 dscp:6;
+       };
+       UINT8    tos;
+   };
    UINT16   tot_len;
    UINT16   id;
    UINT16   frag_off;
diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
index fd9b32b..0bac5f2 100644
--- a/datapath-windows/ovsext/Stt.c
+++ b/datapath-windows/ovsext/Stt.c
@@ -647,6 +647,9 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
         NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
 
         entry->recvdLen = fragmentLength;
+        if (ipHdr->ecn == IP_ECN_CE) {
+            entry->ecn = IP_ECN_CE;
+        }
 
         UINT64 currentTime;
         NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
@@ -681,6 +684,9 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
         if (segOffset == 0) {
             pktFragEntry->sttHdr = *sttHdr;
         }
+        if (ipHdr->ecn == IP_ECN_CE) {
+            pktFragEntry->ecn = IP_ECN_CE;
+        }
 
         /* Copy the fragment data from Source to existing buffer */
         if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
@@ -692,6 +698,14 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
 
 handle_error:
     if (lastPacket) {
+        /* It is RECOMMENDED that if any segment of the received STT
+        *  frame has the CE (congestion experienced) bit set
+        *  in its IP header, then the CE bit SHOULD be set in the IP
+        *  header of the decapsulated STT frame.*/
+        if (pktFragEntry->ecn == IP_ECN_CE) {
+            ipHdr->ecn = IP_ECN_CE;
+        }
+
         /* Retrieve the original STT header */
         NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
         targetPNbl = OvsAllocateNBLFromBuffer(switchContext,
@@ -723,7 +737,9 @@ handle_error:
 *----------------------------------------------------------------------------
 */
 NDIS_STATUS
-OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
+OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, 
+                    SttHdr *sttHdr,
+                    OVS_PACKET_HDR_INFO *layers)
 {
     if ((sttHdr->flags & STT_CSUM_VERIFIED)
         || !(sttHdr->flags & STT_CSUM_PARTIAL)) {
@@ -767,11 +783,13 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
         PMDL curMdl = NULL;
         PNET_BUFFER curNb;
         PUINT8 buf = NULL;
-        OVS_PACKET_HDR_INFO layers;
 
-        status = OvsExtractLayers(*curNbl, &layers);
-        if (status != NDIS_STATUS_SUCCESS) {
-            return status;
+        // if layers not initialized by the caller we extract layers here
+        if (layers->value == 0) {
+            status = OvsExtractLayers(*curNbl, layers);
+            if (status != NDIS_STATUS_SUCCESS) {
+                return status;
+            }
         }
 
         curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
@@ -786,8 +804,8 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
             IPHdr *ipHdr;
             TCPHdr *tcpHdr;
 
-            ipHdr = (IPHdr *)(buf + layers.l3Offset);
-            tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
+            ipHdr = (IPHdr *)(buf + layers->l3Offset);
+            tcpHdr = (TCPHdr *)(buf + layers->l4Offset);
 
             tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr,
                                                 (uint32 *)&ipHdr->daddr,
@@ -796,8 +814,8 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
             IPv6Hdr *ipHdr;
             TCPHdr *tcpHdr;
 
-            ipHdr = (IPv6Hdr *)(buf + layers.l3Offset);
-            tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
+            ipHdr = (IPv6Hdr *)(buf + layers->l3Offset);
+            tcpHdr = (TCPHdr *)(buf + layers->l4Offset);
 
             tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr,
                                         (UINT32*)&ipHdr->daddr,
@@ -919,6 +937,53 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
     tunKey->ttl = ipHdr->ttl;
     tunKey->pad = 0;
 
+    /* Handle ECN */
+    OVS_PACKET_HDR_INFO layers = {0};
+    if (0 != ipHdr->tos) {
+        status = OvsExtractLayers(*newNbl, &layers);
+        if (status != NDIS_STATUS_SUCCESS) {
+            OvsCompleteNBL(switchContext, *newNbl, TRUE);
+            return NDIS_STATUS_FAILURE;
+        }
+
+        if (layers.isIPv4) {
+            IPHdr ip_storage;
+            IPHdr *innerIpHdr;
+
+            /*
+            *  If CE is set for outer IP header, reset ECN of inner IP
+            *  header to CE, all other values are kept the same
+            */
+            innerIpHdr = (IPHdr*)OvsGetIp(*newNbl,
+                                          layers.l3Offset,
+                                          &ip_storage);
+            if (innerIpHdr) {
+                if (ipHdr->ecn == IP_ECN_CE) {
+                        innerIpHdr->ecn |= IP_ECN_CE;
+                }
+                /* copy DSCP from outer header to inner header */
+                innerIpHdr->dscp = ipHdr->dscp;
+                /* fix IP checksum */
+                innerIpHdr->check = IPChecksum((UINT8 *)innerIpHdr,
+                                                innerIpHdr->ihl * 4, 0);
+            }
+        } else if (layers.isIPv6) {
+            IPv6Hdr ipv6_storage;
+            IPv6Hdr *innerIpv6Hdr = (IPv6Hdr*)OvsGetPacketBytes(
+                                                      *newNbl,
+                                                      sizeof *innerIpv6Hdr,
+                                                      layers.l3Offset,
+                                                      &ipv6_storage);
+            if (innerIpv6Hdr) {
+                /* copy ECN and DSCN to inner header */
+                innerIpv6Hdr->priority = ipHdr->ecn
+                                    | ((innerIpv6Hdr->flow_lbl[0] & 0x3) << 2);
+                innerIpv6Hdr->flow_lbl[0] = (innerIpv6Hdr->flow_lbl[0] & 0xF)
+                                             | ((ipHdr->tos & 0xF) << 4);
+            }
+        }
+    }
+
     /* Apply VLAN tag if present */
     if (ntohs(sttHdr->vlanTCI) & OVSWIN_VLAN_CFI) {
         NDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
@@ -930,7 +995,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
     }
 
     /* Set Checksum and LSO offload flags */
-    OvsDecapSetOffloads(newNbl, sttHdr);
+    OvsDecapSetOffloads(newNbl, sttHdr, &layers);
 
     return NDIS_STATUS_SUCCESS;
 }
diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h
index faa00d7..1b7e797 100644
--- a/datapath-windows/ovsext/Stt.h
+++ b/datapath-windows/ovsext/Stt.h
@@ -69,6 +69,7 @@ typedef struct _OVS_STT_PKT_ENTRY {
     UINT64              timeout;
     UINT32              recvdLen;
     UINT32              allocatedLen;
+    UINT8               ecn;
     SttHdr              sttHdr;
     PCHAR               packetBuf;
     LIST_ENTRY          link;
-- 
2.7.2.windows.1



More information about the dev mailing list