[ovs-dev] [PATCH] datapath-windows: Create a NBL for each NB when required

Samuel Ghinet sghinet at cloudbasesolutions.com
Wed Aug 13 14:25:41 UTC 2014


Create a NBL for each NB when required

ovs/ovs-issues#15

All NET_BUFFERs of a NET_BUFFER_LIST must go through
the pipeline: extract, find flow, execute. Previously,
only the first NET_BUFFER of a NET_BUFFER_LIST was going
through this pipeline, which was erroneous.

OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER
have its own NET_BUFFER_LIST.

Some functions that used to take as argument a NET_BUFFER_LIST
now take as argument a NET_BUFFER. I have also added a few
ASSERTs where the NET_BUFFER_LIST is expected to have
only one NET_BUFFER."

Signed-off-by: Samuel Ghinet <sghinet at cloudbasesolutions.com>
---
 datapath-windows/ovsext/OvsActions.c      |  21 ++++--
 datapath-windows/ovsext/OvsBufferMgmt.c   |   6 +-
 datapath-windows/ovsext/OvsBufferMgmt.h   |   5 ++
 datapath-windows/ovsext/OvsChecksum.c     |   7 +-
 datapath-windows/ovsext/OvsFlow.c         |  44 ++++++------
 datapath-windows/ovsext/OvsFlow.h         |   6 +-
 datapath-windows/ovsext/OvsPacketIO.c     | 107 ++++++++++++++++++------------
 datapath-windows/ovsext/OvsPacketParser.c |  20 +++---
 datapath-windows/ovsext/OvsPacketParser.h |  30 ++++-----
 datapath-windows/ovsext/OvsTunnel.c       |  16 +++--
 datapath-windows/ovsext/OvsUser.c         |  22 ++++--
 datapath-windows/ovsext/OvsVxlan.c        |  37 ++++++++---
 12 files changed, 199 insertions(+), 122 deletions(-)

diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c
index 4a2c117..e342d75 100644
--- a/datapath-windows/ovsext/OvsActions.c
+++ b/datapath-windows/ovsext/OvsActions.c
@@ -495,6 +495,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
     OvsFlow *flow;
     UINT64 hash;
     NDIS_STATUS status;
+    VOID* vlanTagValue;
+    NET_BUFFER* pNb;
+
     POVS_VPORT_ENTRY vport =
         OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
     if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
@@ -505,12 +508,18 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
         return NDIS_STATUS_SUCCESS;
     }
     ASSERT(vport->nicState == NdisSwitchNicStateConnected);
+    ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL);

     /* Assert that in the Rx direction, key is always setup. */
     ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
-    status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
-                          &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
-                                         &ovsFwdCtx->tunKey : NULL);
+    vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
+        Ieee8021QNetBufferListInfo);
+
+    pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+
+    status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo,
+        &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
+        &ovsFwdCtx->tunKey : NULL, vlanTagValue);
     if (status != NDIS_STATUS_SUCCESS) {
         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
                                     L"OVS-Flow extract failed");
@@ -520,7 +529,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)

     flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
     if (flow) {
-        OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
+        OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers);
         ovsFwdCtx->switchContext->datapath.hits++;
         status = OvsActionsExecute(ovsFwdCtx->switchContext,
                                  ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
@@ -828,6 +837,8 @@ dropit:
  *
  * Side effects:
  *     This function consumes the NBL.
+
+ * XXX: it is called by OvsIpHlprCbVxlan, which is not used!
  * --------------------------------------------------------------------------
  */
 VOID
@@ -1348,6 +1359,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
         NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);

+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /* XXX: ASSERT that the flow table lock is held. */
     status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
                                   sendFlags, fwdDetail, completionList,
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c
index 8aa8060..18eff07 100644
--- a/datapath-windows/ovsext/OvsBufferMgmt.c
+++ b/datapath-windows/ovsext/OvsBufferMgmt.c
@@ -1085,7 +1085,7 @@ nblcopy_error:
  * --------------------------------------------------------------------------
  */
 static NDIS_STATUS
-GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
+GetSegmentHeaderInfo(PNET_BUFFER nb,
                      const POVS_PACKET_HDR_INFO hdrInfo,
                      UINT32 *hdrSize, UINT32 *seqNumber)
 {
@@ -1093,7 +1093,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
     const TCPHdr *tcp;

     /* Parse the orginal Eth/IP/TCP header */
-    tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
+    tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
     if (tcp == NULL) {
         return NDIS_STATUS_FAILURE;
     }
@@ -1199,7 +1199,7 @@ OvsTcpSegmentNBL(PVOID ovsContext,
     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);

     /* Figure out the segment header size */
-    status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+    status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber);
     if (status != NDIS_STATUS_SUCCESS) {
         OVS_LOG_INFO("Cannot parse NBL header");
         return NULL;
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h
index 9c00b1b..a0c2c77 100644
--- a/datapath-windows/ovsext/OvsBufferMgmt.h
+++ b/datapath-windows/ovsext/OvsBufferMgmt.h
@@ -81,6 +81,11 @@ typedef struct _OVS_NBL_POOL {
 #endif
 } OVS_NBL_POOL, *POVS_NBL_POOL;

+#define OVS_NBL_FOR_EACH(pCurNbl, nblList)  \
+for (NET_BUFFER_LIST* pCurNbl = nblList;    \
+    pCurNbl != NULL;   \
+    pCurNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl))
+

 NDIS_STATUS OvsInitBufferPool(PVOID context);
 VOID OvsCleanupBufferPool(PVOID context);
diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c
index e192373..5b9cbfa 100644
--- a/datapath-windows/ovsext/OvsChecksum.c
+++ b/datapath-windows/ovsext/OvsChecksum.c
@@ -535,8 +535,13 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl,
     }

     /* Next, check if the NIC did not validate the RX checksum. */
+    /*
+    * XXX: we have no confidence that this transfer is an Rx, except
+    * if we check the caller function
+    */
     if (!csumInfo.Receive.IpChecksumSucceeded) {
-        ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage);
+        ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl),
+            hdrInfo->l3Offset, &ip_storage);
         if (ipHdr) {
             ip_storage = *ipHdr;
             hdrChecksum = ipHdr->check;
diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c
index daa64e0..60b01e2 100644
--- a/datapath-windows/ovsext/OvsFlow.c
+++ b/datapath-windows/ovsext/OvsFlow.c
@@ -104,7 +104,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,

 /*
  *----------------------------------------------------------------------------
- *  GetStartAddrNBL --
+ *  GetStartAddrNB --
  *    Get the virtual address of the frame.
  *
  *  Results:
@@ -112,7 +112,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,
  *----------------------------------------------------------------------------
  */
 static __inline VOID *
-GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
+GetStartAddrNB(const NET_BUFFER *_pNB)
 {
     PMDL curMdl;
     PUINT8 curBuffer;
@@ -121,21 +121,21 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
     ASSERT(_pNB);

     // Ethernet Header is a guaranteed safe access.
-    curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
+    curMdl = (_pNB)->CurrentMdl;
     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
     if (!curBuffer) {
         return NULL;
     }

     curHeader = (PEthHdr)
-    (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
+        (curBuffer + (_pNB)->CurrentMdlOffset);

     return (VOID *) curHeader;
 }

 VOID
 OvsFlowUsed(OvsFlow *flow,
-            const NET_BUFFER_LIST *packet,
+            const NET_BUFFER *packet,
             const POVS_PACKET_HDR_INFO layers)
 {
     LARGE_INTEGER tickCount;
@@ -144,7 +144,7 @@ OvsFlowUsed(OvsFlow *flow,
     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
     flow->used += ovsUserTimestampDelta;
     flow->packetCount++;
-    flow->byteCount += OvsPacketLenNBL(packet);
+    flow->byteCount += NET_BUFFER_DATA_LENGTH(packet);
     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
 }

@@ -191,15 +191,14 @@ DeleteAllFlows(OVS_DATAPATH *datapath)
  *----------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsExtractFlow(const NET_BUFFER_LIST *packet,
+OvsExtractFlow(const NET_BUFFER* pNb,
                UINT32 inPort,
                OvsFlowKey *flow,
                POVS_PACKET_HDR_INFO layers,
-               OvsIPv4TunnelKey *tunKey)
+               OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue)
 {
     struct Eth_Header *eth;
     UINT8 offset = 0;
-    PVOID vlanTagValue;

     layers->value = 0;

@@ -213,21 +212,20 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     }

     flow->l2.inPort = inPort;
-
-    if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
+
+    if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) {
         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
         return NDIS_STATUS_SUCCESS;
     }

     /* Link layer. */
-    eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
+    eth = (Eth_Header *)GetStartAddrNB(pNb);
     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);

     /*
      * vlan_tci.
      */
-    vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
     if (vlanTagValue) {
         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
@@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
         flow->l2.dlType = eth->dix.typeNBO;
         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
-    } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
+    } else if (NET_BUFFER_DATA_LENGTH(pNb)  >= ETH_HEADER_LEN_802_3 &&
               eth->e802_3.llc.dsap == 0xaa &&
               eth->e802_3.llc.ssap == 0xaa &&
               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
@@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,

         flow->l2.keyLen += OVS_IP_KEY_SIZE;
         layers->isIPv4 = 1;
-        nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
+        nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage);
         if (nh) {
             layers->l4Offset = layers->l3Offset + nh->ihl * 4;

@@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,

             if (!(nh->frag_off & htons(IP_OFFSET))) {
                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
-                    OvsParseTcp(packet, &ipKey->l4, layers);
+                    OvsParseTcp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
-                    OvsParseUdp(packet, &ipKey->l4, layers);
+                    OvsParseUdp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
                     ICMPHdr icmpStorage;
                     const ICMPHdr *icmp;

-                    icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
+                    icmp = OvsGetIcmp(pNb, layers->l4Offset, &icmpStorage);
                     if (icmp) {
                         ipKey->l4.tpSrc = htons(icmp->type);
                         ipKey->l4.tpDst = htons(icmp->code);
@@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
         NDIS_STATUS status;
         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
-        status = OvsParseIPv6(packet, flow, layers);
+        status = OvsParseIPv6(pNb, flow, layers);
         if (status != NDIS_STATUS_SUCCESS) {
             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
             return status;
@@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         flow->ipv6Key.pad = 0;

         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
-            OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
-            OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
-            OvsParseIcmpV6(packet, flow, layers);
+            OvsParseIcmpV6(pNb, flow, layers);
             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
         }
     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
@@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         ((UINT64 *)arpKey)[1] = 0;
         ((UINT64 *)arpKey)[2] = 0;
         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
-        arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
+        arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage);
         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h
index 93368b3..aeec803 100644
--- a/datapath-windows/ovsext/OvsFlow.h
+++ b/datapath-windows/ovsext/OvsFlow.h
@@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath);
 NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath,
                                  POVS_SWITCH_CONTEXT switchContext);

-NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort,
+NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort,
                            OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers,
-                           OvsIPv4TunnelKey *tunKey);
+                           OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue);
 OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key,
                        UINT64 *hash, BOOLEAN hashValid);
 UINT64 OvsHashFlow(const OvsFlowKey *key);
-VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt,
+VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt,
                  const POVS_PACKET_HDR_INFO layers);

 NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c
index 39e5703..7896e7b 100644
--- a/datapath-windows/ovsext/OvsPacketIO.c
+++ b/datapath-windows/ovsext/OvsPacketIO.c
@@ -211,6 +211,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
         OvsFlowKey key;
         UINT64 hash;
         PNET_BUFFER curNb;
+        VOID* vlanTagValue;

         nextNbl = curNbl->Next;
         curNbl->Next = NULL;
@@ -223,6 +224,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
         } else {
             POVS_BUFFER_CONTEXT ctx;
             OvsFlow *flow;
+            NET_BUFFER_LIST* pNewNbl;

             fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
             sourcePort = fwdDetail->SourcePortId;
@@ -261,53 +263,76 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,

             vport->stats.rxPackets++;
             vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb);
+            vlanTagValue = NET_BUFFER_LIST_INFO(curNbl,
+                Ieee8021QNetBufferListInfo);

-            status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL);
-            if (status != NDIS_STATUS_SUCCESS) {
-                RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
-                goto dropit;
-            }
+            pNewNbl = OvsPartialCopyToMultipleNBLs(switchContext, curNbl,
+                0, 0, TRUE);
+            if (!pNewNbl) {
+                RtlInitUnicodeString(&filterReason,
+                    L"Cannot allocate new NBL: partial copy NB to "
+                    L"multiple NBLs.");
+
+                OvsStartNBLIngressError(switchContext, curNbl,
+                    sendCompleteFlags, &filterReason,
+                    NDIS_STATUS_RESOURCES);

-            ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
-            OvsAcquireDatapathRead(datapath, &dpLockState, dispatch);
-
-            flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
-            if (flow) {
-                OvsFlowUsed(flow, curNbl, &layers);
-                datapath->hits++;
-                /* If successful, OvsActionsExecute() consumes the NBL.
-                 * Otherwise, it adds it to the completionList. No need to
-                 * check the return value. */
-                OvsActionsExecute(switchContext, &completionList, curNbl,
-                                portNo, SendFlags, &key, &hash, &layers,
-                                flow->actions, flow->actionsLen);
-                OvsReleaseDatapath(datapath, &dpLockState);
                 NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
                 continue;
-            } else {
-                OvsReleaseDatapath(datapath, &dpLockState);
-
-                datapath->misses++;
-                status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE,
-                                                NULL, 0, OVS_PACKET_CMD_MISS,
-                                                portNo,
-                                                key.tunKey.dst != 0 ?
-                                                (OvsIPv4TunnelKey *)&key.tunKey :
-                                                NULL, curNbl,
-                                                sourcePort ==
-                                                switchContext->externalPortId,
-                                                &layers, switchContext,
-                                                &missedPackets, &num);
-                if (status == NDIS_STATUS_SUCCESS) {
-                    /* Complete the packet since it was copied to user
-                     * buffer. */
-                    RtlInitUnicodeString(&filterReason,
-                        L"OVS-Dropped since packet was copied to userspace");
+            }
+
+            OVS_NBL_FOR_EACH(pCurNbl, pNewNbl)
+            {
+                status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pCurNbl),
+                    vport->portNo, &key, &layers, NULL, vlanTagValue);
+                if (status != NDIS_STATUS_SUCCESS) {
+                    RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
+                    goto dropit;
+                }
+
+                ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
+                OvsAcquireDatapathRead(datapath, &dpLockState, dispatch);
+
+                flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
+                if (flow) {
+                    OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(pCurNbl), &layers);
+                    datapath->hits++;
+
+                    /* If successful, OvsActionsExecute() consumes the NBL.
+                    * Otherwise, it adds it to the completionList. No need to
+                    * check the return value. */
+                    OvsActionsExecute(switchContext, &completionList, pCurNbl,
+                        portNo, SendFlags, &key, &hash, &layers,
+                        flow->actions, flow->actionsLen);
+
+                    OvsReleaseDatapath(datapath, &dpLockState);
+                    NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+                    continue;
                 } else {
-                    RtlInitUnicodeString(&filterReason,
-                        L"OVS-Dropped due to failure to queue to userspace");
+                    OvsReleaseDatapath(datapath, &dpLockState);
+
+                    datapath->misses++;
+                    status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE,
+                        NULL, 0, OVS_PACKET_CMD_MISS,
+                        portNo,
+                        key.tunKey.dst != 0 ?
+                        (OvsIPv4TunnelKey *)&key.tunKey :
+                        NULL, curNbl,
+                        sourcePort ==
+                        switchContext->externalPortId,
+                        &layers, switchContext,
+                        &missedPackets, &num);
+                    if (status == NDIS_STATUS_SUCCESS) {
+                        /* Complete the packet since it was copied to user
+                        * buffer. */
+                        RtlInitUnicodeString(&filterReason,
+                            L"OVS-Dropped since packet was copied to userspace");
+                    } else {
+                        RtlInitUnicodeString(&filterReason,
+                            L"OVS-Dropped due to failure to queue to userspace");
+                    }
+                    goto dropit;
                 }
-                goto dropit;
             }

 dropit:
diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c
index 0a93435..666d3d5 100644
--- a/datapath-windows/ovsext/OvsPacketParser.c
+++ b/datapath-windows/ovsext/OvsPacketParser.c
@@ -18,13 +18,12 @@

 //XXX consider moving to NdisGetDataBuffer.
 const VOID *
-OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
+OvsGetPacketBytes(const NET_BUFFER *netBuffer,
                   UINT32 len,
                   UINT32 srcOffset,
                   VOID *storage)
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
-    PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl);
     PMDL currentMdl;
     BOOLEAN firstMDL = TRUE;
     ULONG destOffset = 0;
@@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
 }

 NDIS_STATUS
-OvsParseIPv6(const NET_BUFFER_LIST *packet,
+OvsParseIPv6(const NET_BUFFER *packet,
           OvsFlowKey *key,
           POVS_PACKET_HDR_INFO layers)
 {
@@ -142,7 +141,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
             len = extHdr->hdrExtLen;
             ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8;
             nextHdr = extHdr->nextHeader;
-            if (OvsPacketLenNBL(packet) < ofs) {
+            if (NET_BUFFER_DATA_LENGTH(packet) < ofs) {
                 return NDIS_STATUS_FAILURE;
              }
         } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) {
@@ -177,12 +176,12 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
 }

 VOID
-OvsParseTcp(const NET_BUFFER_LIST *packet,
+OvsParseTcp(const NET_BUFFER* pNb,
          L4Key *flow,
          POVS_PACKET_HDR_INFO layers)
 {
     TCPHdr tcpStorage;
-    const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage);
+    const TCPHdr *tcp = OvsGetTcp(pNb, layers->l4Offset, &tcpStorage);
     if (tcp) {
         flow->tpSrc = tcp->source;
         flow->tpDst = tcp->dest;
@@ -192,7 +191,7 @@ OvsParseTcp(const NET_BUFFER_LIST *packet,
 }

 VOID
-OvsParseUdp(const NET_BUFFER_LIST *packet,
+OvsParseUdp(const NET_BUFFER *packet,
          L4Key *flow,
          POVS_PACKET_HDR_INFO layers)
 {
@@ -210,7 +209,7 @@ OvsParseUdp(const NET_BUFFER_LIST *packet,
 }

 NDIS_STATUS
-OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
+OvsParseIcmpV6(const NET_BUFFER *packet,
             OvsFlowKey *key,
             POVS_PACKET_HDR_INFO layers)
 {
@@ -249,7 +248,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
         }
         flow->ndTarget = *ndTarget;

-        while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) {
+        while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) {
             /*
              * The minimum size of an option is 8 bytes, which also is
              * the size of Ethernet link-layer options.
@@ -264,7 +263,8 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
             }

             optLen = ndOpt->len * 8;
-            if (!optLen || (UINT32)(ofs + optLen) >  OvsPacketLenNBL(packet)) {
+            if (!optLen || (UINT32)(ofs + optLen) >
+                NET_BUFFER_DATA_LENGTH(packet)) {
                 goto invalid;
             }

diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h
index ab3c613..aaf3877 100644
--- a/datapath-windows/ovsext/OvsPacketParser.h
+++ b/datapath-windows/ovsext/OvsPacketParser.h
@@ -20,15 +20,15 @@
 #include "precomp.h"
 #include "OvsNetProto.h"

-const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len,
+const VOID* OvsGetPacketBytes(const NET_BUFFER *pNB, UINT32 len,
                               UINT32 SrcOffset, VOID *storage);
-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key,
                         POVS_PACKET_HDR_INFO layers);
-VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseTcp(const NET_BUFFER* pNb, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key,
                             POVS_PACKET_HDR_INFO layers);

 static __inline ULONG
@@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB)
  * which C does not allow.
  */
 static UINT16
-OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN
+OvsGetTcpCtl(const NET_BUFFER *packet, // IN
              const POVS_PACKET_HDR_INFO layers) // IN
 {
 #define TCP_CTL_OFS 12                // Offset of "ctl" field in TCP header.
@@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN


 static UINT8
-OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
+OvsGetTcpFlags(const NET_BUFFER *packet,    // IN
                const OvsFlowKey *key,   // IN
                const POVS_PACKET_HDR_INFO layers) // IN
 {
@@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
 }

 static const EtherArp *
-OvsGetArp(const NET_BUFFER_LIST *packet,
+OvsGetArp(const NET_BUFFER *packet,
           UINT32 ofs,
           EtherArp *storage)
 {
@@ -96,14 +96,14 @@ OvsGetArp(const NET_BUFFER_LIST *packet,
 }

 static const IPHdr *
-OvsGetIp(const NET_BUFFER_LIST *packet,
+OvsGetIp(const NET_BUFFER* pNb,
          UINT32 ofs,
          IPHdr *storage)
 {
-    const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage);
+    const IPHdr *ip = OvsGetPacketBytes(pNb, sizeof *ip, ofs, storage);
     if (ip) {
         int ipLen = ip->ihl * 4;
-        if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) {
+        if (ipLen >= sizeof *ip && NET_BUFFER_DATA_LENGTH(pNb) >= ofs + ipLen) {
             return ip;
         }
     }
@@ -111,14 +111,14 @@ OvsGetIp(const NET_BUFFER_LIST *packet,
 }

 static const TCPHdr *
-OvsGetTcp(const NET_BUFFER_LIST *packet,
+OvsGetTcp(const NET_BUFFER *packet,
           UINT32 ofs,
           TCPHdr *storage)
 {
     const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage);
     if (tcp) {
         int tcpLen = tcp->doff * 4;
-        if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) {
+        if (tcpLen >= sizeof *tcp && NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) {
             return tcp;
         }
     }
@@ -126,7 +126,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet,
 }

 static const UDPHdr *
-OvsGetUdp(const NET_BUFFER_LIST *packet,
+OvsGetUdp(const NET_BUFFER *packet,
           UINT32 ofs,
           UDPHdr *storage)
 {
@@ -134,7 +134,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet,
 }

 static const ICMPHdr *
-OvsGetIcmp(const NET_BUFFER_LIST *packet,
+OvsGetIcmp(const NET_BUFFER *packet,
            UINT32 ofs,
            ICMPHdr *storage)
 {
diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c
index b5a369a..a1387cb 100644
--- a/datapath-windows/ovsext/OvsTunnel.c
+++ b/datapath-windows/ovsext/OvsTunnel.c
@@ -228,6 +228,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
     OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath;

     ASSERT(gOvsSwitchContext);
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);

     /* Fill the tunnel key */
     status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey);
@@ -265,6 +266,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         UINT64 hash;
         PNET_BUFFER curNb;
         OvsFlow *flow;
+        VOID* vlanTagValue;

         fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);

@@ -295,20 +297,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN);

         portNo = vport->portNo;
+        vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo);

-        status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey);
+        status = OvsExtractFlow(curNb, portNo, &key, &layers,
+            &tunnelKey, vlanTagValue);
         if (status != NDIS_STATUS_SUCCESS) {
             goto unlockAndDrop;
         }

         flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
         if (flow) {
-            OvsFlowUsed(flow, pNbl, &layers);
+            OvsFlowUsed(flow, curNb, &layers);
             datapath->hits++;

             OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl,
-                            portNo, SendFlags, &key, &hash, &layers,
-                            flow->actions, flow->actionsLen);
+                portNo, SendFlags, &key, &hash, &layers,
+                flow->actions, flow->actionsLen);

             OvsReleaseDatapath(datapath, &dpLockState);
         } else {
@@ -316,8 +320,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,

             datapath->misses++;
             elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS,
-                                        portNo, &key.tunKey, pNbl, curNb,
-                                        TRUE, &layers);
+                portNo, &key.tunKey, pNbl, curNb,
+                TRUE, &layers);
             if (elem) {
                 /* Complete the packet since it was copied to user buffer. */
                 InsertTailList(&missedPackets, &elem->link);
diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c
index 8271d52..267ef1b 100644
--- a/datapath-windows/ovsext/OvsUser.c
+++ b/datapath-windows/ovsext/OvsUser.c
@@ -314,6 +314,7 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
     OvsFlowKey key;
     OVS_PACKET_HDR_INFO layers;
+    VOID* vlanTagValue;

     if (inputLength < sizeof(*execute) || outputLength != 0) {
         return STATUS_INFO_LENGTH_MISMATCH;
@@ -355,17 +356,22 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
     fwdDetail->SourceNicIndex = 0;
     // XXX: Figure out if any of the other members of fwdDetail need to be set.

-    ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
-                              NULL);
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);
+
+    vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo);
+
+    ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl),
+        fwdDetail->SourcePortId, &key, &layers,
+        NULL, vlanTagValue);
     if (ndisStatus == NDIS_STATUS_SUCCESS) {
         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
-                              NDIS_RWL_AT_DISPATCH_LEVEL);
+            NDIS_RWL_AT_DISPATCH_LEVEL);
         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
-                                     0, // XXX: we are passing 0 for srcVportNo
-                                     NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
-                                     &key, NULL, &layers, actions,
-                                     execute->actionsLen);
+            0, // XXX: we are passing 0 for srcVportNo
+            NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
+            &key, NULL, &layers, actions,
+            execute->actionsLen);
         pNbl = NULL;
         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
     }
@@ -820,6 +826,8 @@ OvsCreateAndAddPackets(UINT32 queueId,
     PNET_BUFFER_LIST newNbl = NULL;
     PNET_BUFFER nb;

+    ASSERT(nbl->FirstNetBuffer->Next == NULL);
+
     if (hdrInfo->isTcp) {
         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
         UINT32 packetLength;
diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c
index 63909ae..953dfc7 100644
--- a/datapath-windows/ovsext/OvsVxlan.c
+++ b/datapath-windows/ovsext/OvsVxlan.c
@@ -116,6 +116,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
     UINT32 headRoom = OvsGetVxlanTunHdrSize();
     UINT32 packetLength;

+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /*
      * XXX: the assumption currently is that the NBL is owned by OVS, and
      * headroom has already been allocated as part of allocating the NBL and
@@ -286,13 +288,19 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
     OvsFlowKey key;
     NDIS_STATUS status;
     UNREFERENCED_PARAMETER(inPort);
-
-    status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
-    if (result == STATUS_SUCCESS) {
-        status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
+    VOID* vlanTagValue;
+
+    vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, Ieee8021QNetBufferListInfo);
+    for (NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+        pNb != NULL; pNb = NET_BUFFER_NEXT_NB(pNb))
+    {
+        status = OvsExtractFlow(pNb, inPort, &key, &layers, NULL, vlanTagValue);
+        if (result == STATUS_SUCCESS) {
+            status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
                 (POVS_SWITCH_CONTEXT)cbData1, NULL);
-    } else {
-        status = NDIS_STATUS_FAILURE;
+        } else {
+            status = NDIS_STATUS_FAILURE;
+        }
     }

     if (status != NDIS_STATUS_SUCCESS) {
@@ -457,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
     OVS_PACKET_HDR_INFO layers;

     layers.value = 0;
+    ASSERT(packet->FirstNetBuffer->Next == NULL);

     do {
-        nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
+        NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(packet);
+
+        /*
+        * given that it is an encapsulated packet (UDP),
+        * we can use the first NB for checks
+        */
+        nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage);
         if (nh) {
             layers.l4Offset = layers.l3Offset + nh->ihl * 4;
         } else {
@@ -467,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         }

         /* make sure it's a VXLAN packet */
-        udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
+        udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage);
         if (udp) {
             layers.l7Offset = layers.l4Offset + sizeof *udp;
         } else {
@@ -477,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         /* XXX Should be tested against the dynamic port # in the VXLAN vport */
         ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));

-        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
+        /*
+        * we may have multiple VXLAN packets here that need to be decapsulated
+        * but for the VXLAN header, we need only the first NET_BUFFER
+        */
+        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb,
                                                     sizeof(*VxlanHeader),
                                                     layers.l7Offset,
                                                     &VxlanHeaderBuffer);
--
1.8.3.msysgit.0




More information about the dev mailing list