[ovs-dev] [PATCH v2] Create a NBL for each NB when required

Samuel Ghinet sghinet at cloudbasesolutions.com
Mon Sep 8 18:53:26 UTC 2014


I have tested:
- vxlan
- vlan (using patch ports, as specified in INSTALL.Windows)

using:
- ping
- tcp
- tcp LSO (tcp segmentation)

I have put both the "each NB -> NBL" and its refactor in the same patch.

Thanks,
Sam
________________________________________
From: Samuel Ghinet
Sent: Monday, September 08, 2014 9:33 PM
To: dev at openvswitch.org
Cc: Alin Serdean; nithin at vmware.com; eliahue at vmware.com; ssaurabh at vmware.com; Ankur Sharma
Subject: [PATCH v2] Create a NBL for each NB when required

ovs/ovs-issues#15

All NET_BUFFERs of a NET_BUFFER_LIST must go through
the pipeline: extract, find flow, execute. Previously,
only the first NET_BUFFER of a NET_BUFFER_LIST was going
through this pipeline, which was erroneous.

OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER
have its own NET_BUFFER_LIST.

Some functions that used to take as argument a NET_BUFFER_LIST
now take as argument a NET_BUFFER. I have also added a few
ASSERTs where the NET_BUFFER_LIST is expected to have
only one NET_BUFFER."

Also refactored OvsInitNBLContext and OvsInitExternalNBLContext
and OvsStartNBLIngress, addressing issues:
o) clearer variable names
o) merge OvsExtSendNBL and OvsStartNBLIngress into OvsExtSendNBL
(there is no reason to have functionalities in these two functions
separate)
o) all the processing for one NBL in the original NBL chain is now
processed in a separate function: OvsProcessOneNbl
o) all the processing for one NB (where the NB is wrapped inside a
NBL) is now processed in a separate function: OvsProcessOneNb
o) OvsSplitNblByNB was created to return either:
- the original NBL (if the original NBL has only one NB)
or
- a cloned / partial copy to multiple NBLs, if the original NBL had
more NBs.

Signed-off-by: Samuel Ghinet <sghinet at cloudbasesolutions.com>
Co-authored-by: Alin Gabriel Serdean <aserdean at cloudbasesolutions.com>
---
 datapath-windows/ovsext/Actions.c      |  60 +++++-
 datapath-windows/ovsext/BufferMgmt.c   |  78 +++++---
 datapath-windows/ovsext/BufferMgmt.h   |  19 ++
 datapath-windows/ovsext/Checksum.c     |   3 +-
 datapath-windows/ovsext/Flow.c         |  46 ++---
 datapath-windows/ovsext/Flow.h         |   6 +-
 datapath-windows/ovsext/PacketIO.c     | 345 ++++++++++++++++++++-------------
 datapath-windows/ovsext/PacketParser.c |  36 ++--
 datapath-windows/ovsext/PacketParser.h |  30 +--
 datapath-windows/ovsext/Tunnel.c       |  13 +-
 datapath-windows/ovsext/User.c         |  16 +-
 datapath-windows/ovsext/Vxlan.c        |  30 ++-
 12 files changed, 442 insertions(+), 240 deletions(-)

diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index 35ebfdf..1f43c24 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -515,6 +515,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
     OvsFlow *flow;
     UINT64 hash;
     NDIS_STATUS status;
+    PVOID vlanTagValue;
+    PNET_BUFFER pNb;
+
     POVS_VPORT_ENTRY vport =
         OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
     if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
@@ -525,12 +528,26 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
         return NDIS_STATUS_SUCCESS;
     }
     ASSERT(vport->nicState == NdisSwitchNicStateConnected);
+    ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL);

     /* Assert that in the Rx direction, key is always setup. */
     ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
-    status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
-                          &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
-                                         &ovsFwdCtx->tunKey : NULL);
+    /*
+    * XXX: we should normally not have a vlan tag NBL info here.
+    * For a decapsulate packet: we never have.
+    * For an encapsulated packet: we shouldn't have.
+    */
+    vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
+                                        Ieee8021QNetBufferListInfo);
+    ASSERT(vlanTagValue == NULL);
+
+    pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+
+    status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo,
+                            &key, &ovsFwdCtx->layers,
+                            ovsFwdCtx->tunKey.dst != 0 ?
+                            &ovsFwdCtx->tunKey : NULL,
+                            vlanTagValue);
     if (status != NDIS_STATUS_SUCCESS) {
         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
                                     L"OVS-Flow extract failed");
@@ -540,7 +557,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)

     flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
     if (flow) {
-        OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
+        OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers);
         ovsFwdCtx->switchContext->datapath.hits++;
         status = OvsActionsExecute(ovsFwdCtx->switchContext,
                                  ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
@@ -630,12 +647,37 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
     OvsClearTunTxCtx(ovsFwdCtx);

     if (status == NDIS_STATUS_SUCCESS) {
+        PNET_BUFFER_LIST splitNbl, nextNbl;
+
         ASSERT(newNbl);
+
         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
-                                    L"Complete after cloning NBL for encapsulation");
-        ovsFwdCtx->curNbl = newNbl;
-        status = OvsDoFlowLookupOutput(ovsFwdCtx);
-        ASSERT(ovsFwdCtx->curNbl == NULL);
+                                    L"Complete after cloning NBL for "
+                                    L"encapsulation");
+
+        /*
+        * split NBLs: each NBL must contain only one NB.
+        * NOTE: if newNbl has only one NB => splitNbl will be newNbl
+        * NOTE: if we partial copy nbl, we decrement refcount now, so we would
+        * not need to add a Complete later.
+        */
+        splitNbl = OvsSplitNblByNB(newNbl, ovsFwdCtx->switchContext, TRUE);
+        if (!splitNbl) {
+            return NDIS_STATUS_RESOURCES;
+        }
+
+        OVS_NBL_FOR_EACH_NEXT(curNbl, splitNbl, nextNbl) {
+            nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl);
+            NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL;
+
+            ovsFwdCtx->curNbl = curNbl;
+            status = OvsDoFlowLookupOutput(ovsFwdCtx);
+            ASSERT(ovsFwdCtx->curNbl == NULL);
+
+            if (status != STATUS_SUCCESS) {
+                break;
+            }
+        }
     } else {
         /*
         * XXX: Temporary freeing of the packet until we register a
@@ -1368,6 +1410,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
         NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);

+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /* XXX: ASSERT that the flow table lock is held. */
     status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
                                   sendFlags, fwdDetail, completionList,
diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c
index e0377c1..d04f9a4 100644
--- a/datapath-windows/ovsext/BufferMgmt.c
+++ b/datapath-windows/ovsext/BufferMgmt.c
@@ -254,16 +254,16 @@ OvsCleanupBufferPool(PVOID ovsContext)


 static VOID
-OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
-                  UINT16 flags,
+OvsInitNBLContext(POVS_BUFFER_CONTEXT bufferContext,
+                  UINT16 bufferFlags,
                   UINT32 origDataLength,
                   UINT32 srcPortNo)
 {
-    ctx->magic = OVS_CTX_MAGIC;
-    ctx->refCount = 1;
-    ctx->flags = flags;
-    ctx->srcPortNo = srcPortNo;
-    ctx->origDataLength = origDataLength;
+    bufferContext->magic = OVS_CTX_MAGIC;
+    bufferContext->refCount = 1;
+    bufferContext->flags = bufferFlags;
+    bufferContext->srcPortNo = srcPortNo;
+    bufferContext->origDataLength = origDataLength;
 }


@@ -552,15 +552,15 @@ OvsInitExternalNBLContext(PVOID ovsContext,
                           BOOLEAN isRecv)
 {
     NDIS_HANDLE poolHandle;
-    POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
-    POVS_BUFFER_CONTEXT ctx;
-    PNET_BUFFER nb;
+    POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)ovsContext;
+    POVS_BUFFER_CONTEXT bufferContext;
+    ULONG nbLen;
     NDIS_STATUS status;
-    UINT16 flags;
+    UINT16 bufferFlags;

     poolHandle = NdisGetPoolFromNetBufferList(nbl);

-    if (poolHandle == context->ovsPool.ndisHandle) {
+    if (poolHandle == switchContext->ovsPool.ndisHandle) {
         return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
     }
     status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
@@ -571,19 +571,20 @@ OvsInitExternalNBLContext(PVOID ovsContext,
     }
 #ifdef DBG
     OvsDumpNBLContext(nbl);
-    InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
+    InterlockedIncrement((LONG volatile *)&switchContext->ovsPool.sysNBLCount);
 #endif
-    flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
-    flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
-    ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);

-    nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+    bufferFlags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
+    bufferFlags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
+    bufferContext = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+
+    nbLen = NET_BUFFER_DATA_LENGTH(nbl->FirstNetBuffer);
     /*
      * we use first nb to decide whether we need advance or retreat during
      * complete.
      */
-    OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO);
-    return ctx;
+    OvsInitNBLContext(bufferContext, bufferFlags, nbLen, OVS_DEFAULT_PORT_NO);
+    return bufferContext;
 }

 /*
@@ -863,7 +864,7 @@ OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
         if (prevNbl == NULL) {
             firstNbl = newNbl;
         } else {
-            NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl;
+            NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
             NET_BUFFER_NEXT_NB(prevNb) = nb;
         }
         prevNbl = newNbl;
@@ -1085,7 +1086,7 @@ nblcopy_error:
  * --------------------------------------------------------------------------
  */
 static NDIS_STATUS
-GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
+GetSegmentHeaderInfo(PNET_BUFFER nb,
                      const POVS_PACKET_HDR_INFO hdrInfo,
                      UINT32 *hdrSize, UINT32 *seqNumber)
 {
@@ -1093,7 +1094,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
     const TCPHdr *tcp;

     /* Parse the orginal Eth/IP/TCP header */
-    tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
+    tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
     if (tcp == NULL) {
         return NDIS_STATUS_FAILURE;
     }
@@ -1199,7 +1200,7 @@ OvsTcpSegmentNBL(PVOID ovsContext,
     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);

     /* Figure out the segment header size */
-    status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+    status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber);
     if (status != NDIS_STATUS_SUCCESS) {
         OVS_LOG_INFO("Cannot parse NBL header");
         return NULL;
@@ -1533,3 +1534,34 @@ OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
     *portNo = ctx->srcPortNo;
     return NDIS_STATUS_SUCCESS;
 }
+
+PNET_BUFFER_LIST
+OvsSplitNblByNB(PNET_BUFFER_LIST nblList,
+                POVS_SWITCH_CONTEXT switchContext,
+                BOOLEAN decrementBufRefCount)
+{
+    PNET_BUFFER curNb;
+    PNET_BUFFER_LIST newNblList;
+    POVS_BUFFER_CONTEXT bufferContext;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(nblList);
+
+    if (curNb->Next == NULL) {
+        return nblList;
+    }
+
+    bufferContext = (POVS_BUFFER_CONTEXT)
+                     NET_BUFFER_LIST_CONTEXT_DATA_START(nblList);
+
+    if (decrementBufRefCount) {
+        InterlockedDecrement((volatile LONG*)&bufferContext->refCount);
+    }
+
+    newNblList = OvsPartialCopyToMultipleNBLs(switchContext, nblList, 0, 0,
+                                              TRUE);
+    if (!newNblList) {
+        return NULL;
+    }
+
+    return newNblList;
+}
\ No newline at end of file
diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h
index 915d7f5..a71d100 100644
--- a/datapath-windows/ovsext/BufferMgmt.h
+++ b/datapath-windows/ovsext/BufferMgmt.h
@@ -17,6 +17,10 @@
 #ifndef __BUFFER_MGMT_H_
 #define __BUFFER_MGMT_H_ 1

+#include "precomp.h"
+
+typedef struct _OVS_SWITCH_CONTEXT *POVS_SWITCH_CONTEXT;
+
 #define MEM_ALIGN                       MEMORY_ALLOCATION_ALIGNMENT
 #define MEM_ALIGN_SIZE(_x)  ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN)
 #define OVS_CTX_MAGIC                   0xabcd
@@ -81,6 +85,16 @@ typedef struct _OVS_NBL_POOL {
 #endif
 } OVS_NBL_POOL, *POVS_NBL_POOL;

+#define OVS_NBL_FOR_EACH(_curNbl, _nblList)       \
+for (PNET_BUFFER_LIST _curNbl = _nblList;         \
+     _curNbl != NULL;                             \
+     _curNbl = NET_BUFFER_LIST_NEXT_NBL(_curNbl))
+
+#define OVS_NBL_FOR_EACH_NEXT(_curNbl, _nblList, _nextNbl)   \
+for (PNET_BUFFER_LIST _curNbl = _nblList;                    \
+     _curNbl != NULL;                                        \
+     _curNbl = _nextNbl)
+

 NDIS_STATUS OvsInitBufferPool(PVOID context);
 VOID OvsCleanupBufferPool(PVOID context);
@@ -121,4 +135,9 @@ NDIS_STATUS OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 portNo);

 NDIS_STATUS OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 *portNo);

+PNET_BUFFER_LIST
+OvsSplitNblByNB(PNET_BUFFER_LIST nblList,
+                POVS_SWITCH_CONTEXT switchContext,
+                BOOLEAN decrementBufRefCount);
+
 #endif /* __BUFFER_MGMT_H_ */
diff --git a/datapath-windows/ovsext/Checksum.c b/datapath-windows/ovsext/Checksum.c
index 510a094..5ec2436 100644
--- a/datapath-windows/ovsext/Checksum.c
+++ b/datapath-windows/ovsext/Checksum.c
@@ -536,7 +536,8 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl,

     /* Next, check if the NIC did not validate the RX checksum. */
     if (!csumInfo.Receive.IpChecksumSucceeded) {
-        ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage);
+        ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl),
+                         hdrInfo->l3Offset, &ip_storage);
         if (ipHdr) {
             ip_storage = *ipHdr;
             hdrChecksum = ipHdr->check;
diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c
index dae1dca..57e7b47 100644
--- a/datapath-windows/ovsext/Flow.c
+++ b/datapath-windows/ovsext/Flow.c
@@ -105,7 +105,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,

 /*
  *----------------------------------------------------------------------------
- *  GetStartAddrNBL --
+ *  GetStartAddrNB --
  *    Get the virtual address of the frame.
  *
  *  Results:
@@ -113,7 +113,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,
  *----------------------------------------------------------------------------
  */
 static __inline VOID *
-GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
+GetStartAddrNB(const NET_BUFFER *_pNB)
 {
     PMDL curMdl;
     PUINT8 curBuffer;
@@ -122,21 +122,20 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
     ASSERT(_pNB);

     // Ethernet Header is a guaranteed safe access.
-    curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
+    curMdl = (_pNB)->CurrentMdl;
     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
     if (!curBuffer) {
         return NULL;
     }

-    curHeader = (PEthHdr)
-    (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
+    curHeader = (PEthHdr)(curBuffer + (_pNB)->CurrentMdlOffset);

     return (VOID *) curHeader;
 }

 VOID
 OvsFlowUsed(OvsFlow *flow,
-            const NET_BUFFER_LIST *packet,
+            const NET_BUFFER *packet,
             const POVS_PACKET_HDR_INFO layers)
 {
     LARGE_INTEGER tickCount;
@@ -144,7 +143,7 @@ OvsFlowUsed(OvsFlow *flow,
     KeQueryTickCount(&tickCount);
     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
     flow->packetCount++;
-    flow->byteCount += OvsPacketLenNBL(packet);
+    flow->byteCount += NET_BUFFER_DATA_LENGTH(packet);
     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
 }

@@ -191,15 +190,15 @@ DeleteAllFlows(OVS_DATAPATH *datapath)
  *----------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsExtractFlow(const NET_BUFFER_LIST *packet,
+OvsExtractFlow(const NET_BUFFER *pNb,
                UINT32 inPort,
                OvsFlowKey *flow,
                POVS_PACKET_HDR_INFO layers,
-               OvsIPv4TunnelKey *tunKey)
+               OvsIPv4TunnelKey *tunKey,
+               PVOID vlanTagValue)
 {
     struct Eth_Header *eth;
     UINT8 offset = 0;
-    PVOID vlanTagValue;

     layers->value = 0;

@@ -214,20 +213,19 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,

     flow->l2.inPort = inPort;

-    if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
+    if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) {
         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
         return NDIS_STATUS_SUCCESS;
     }

     /* Link layer. */
-    eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
+    eth = (Eth_Header *)GetStartAddrNB(pNb);
     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);

     /*
      * vlan_tci.
      */
-    vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
     if (vlanTagValue) {
         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
@@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
         flow->l2.dlType = eth->dix.typeNBO;
         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
-    } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
+    } else if (NET_BUFFER_DATA_LENGTH(pNb)  >= ETH_HEADER_LEN_802_3 &&
               eth->e802_3.llc.dsap == 0xaa &&
               eth->e802_3.llc.ssap == 0xaa &&
               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
@@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,

         flow->l2.keyLen += OVS_IP_KEY_SIZE;
         layers->isIPv4 = 1;
-        nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
+        nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage);
         if (nh) {
             layers->l4Offset = layers->l3Offset + nh->ihl * 4;

@@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,

             if (!(nh->frag_off & htons(IP_OFFSET))) {
                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
-                    OvsParseTcp(packet, &ipKey->l4, layers);
+                    OvsParseTcp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
-                    OvsParseUdp(packet, &ipKey->l4, layers);
+                    OvsParseUdp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
                     ICMPHdr icmpStorage;
                     const ICMPHdr *icmp;

-                    icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
+                    icmp = OvsGetIcmp(pNb, layers->l4Offset, &icmpStorage);
                     if (icmp) {
                         ipKey->l4.tpSrc = htons(icmp->type);
                         ipKey->l4.tpDst = htons(icmp->code);
@@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
         NDIS_STATUS status;
         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
-        status = OvsParseIPv6(packet, flow, layers);
+        status = OvsParseIPv6(pNb, flow, layers);
         if (status != NDIS_STATUS_SUCCESS) {
             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
             return status;
@@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         flow->ipv6Key.pad = 0;

         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
-            OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
-            OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
-            OvsParseIcmpV6(packet, flow, layers);
+            OvsParseIcmpV6(pNb, flow, layers);
             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
         }
     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
@@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         ((UINT64 *)arpKey)[1] = 0;
         ((UINT64 *)arpKey)[2] = 0;
         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
-        arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
+        arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage);
         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
@@ -420,9 +418,7 @@ AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
      */
     KeMemoryBarrier();

-    //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
     InsertTailList(head, &flow->ListEntry);
-    //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);

     datapath->nFlows++;

diff --git a/datapath-windows/ovsext/Flow.h b/datapath-windows/ovsext/Flow.h
index 3964c54..4b99a42 100644
--- a/datapath-windows/ovsext/Flow.h
+++ b/datapath-windows/ovsext/Flow.h
@@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath);
 NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath,
                                  POVS_SWITCH_CONTEXT switchContext);

-NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort,
+NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort,
                            OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers,
-                           OvsIPv4TunnelKey *tunKey);
+                           OvsIPv4TunnelKey *tunKey, PVOID vlanTagValue);
 OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key,
                        UINT64 *hash, BOOLEAN hashValid);
 UINT64 OvsHashFlow(const OvsFlowKey *key);
-VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt,
+VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt,
                  const POVS_PACKET_HDR_INFO layers);

 NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c
index ac7862d..e9ad5c0 100644
--- a/datapath-windows/ovsext/PacketIO.c
+++ b/datapath-windows/ovsext/PacketIO.c
@@ -28,6 +28,7 @@
 #include "Flow.h"
 #include "Event.h"
 #include "User.h"
+#include "BufferMgmt.h"

 /* Due to an imported header file */
 #pragma warning( disable:4505 )
@@ -177,148 +178,183 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext,
 }

 static VOID
-OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
-                   PNET_BUFFER_LIST netBufferLists,
-                   ULONG SendFlags)
+OvsProcessOneNb(PNET_BUFFER_LIST curNbl,
+                POVS_SWITCH_CONTEXT switchContext,
+                POVS_VPORT_ENTRY sourceVPort,
+                OvsCompletionList *completionList,
+                ULONG sendFlags,
+                PLIST_ENTRY missedPackets,
+                PUINT32 countMissedPackets,
+                PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTagValue)
 {
-    NDIS_SWITCH_PORT_ID sourcePort = 0;
-    NDIS_SWITCH_NIC_INDEX sourceIndex = 0;
-    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
-    PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL;
-    ULONG sendCompleteFlags;
-    UCHAR dispatch;
-    LOCK_STATE_EX lockState, dpLockState;
     NDIS_STATUS status;
+    OvsFlowKey key;
+    OVS_PACKET_HDR_INFO layers;
     NDIS_STRING filterReason;
-    LIST_ENTRY missedPackets;
-    UINT32 num = 0;
-    OvsCompletionList completionList;
+    LOCK_STATE_EX dpLockState;
+    OvsFlow *flow;
+    UINT64 hash;
+    BOOLEAN atDispatch;
+    POVS_DATAPATH datapath = &(switchContext->datapath);
+
+    atDispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags) ?
+                    NDIS_RWL_AT_DISPATCH_LEVEL : 0;
+
+    /* 1. Extract flow key*/
+    status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(curNbl),
+                            sourceVPort->portNo, &key, &layers, NULL,
+                            vlanTagValue);
+
+    if (status != NDIS_STATUS_SUCCESS) {
+        RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
+        OvsAddPktCompletionList(completionList, TRUE, sourceVPort->portId,
+                                curNbl, 0, &filterReason);
+        return;
+    }

-    dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)?
-                                            NDIS_RWL_AT_DISPATCH_LEVEL : 0;
-    sendCompleteFlags = OvsGetSendCompleteFlags(SendFlags);
-    SendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP;
+    ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
+    OvsAcquireDatapathRead(datapath, &dpLockState, atDispatch);
+
+    /* 2. Find flow matching key */
+    flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
+    if (flow) {
+        OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(curNbl), &layers);
+        datapath->hits++;
+
+        /*
+        * 2.a) execute actions on packet.
+        * If successful, OvsActionsExecute() consumes the NBL.
+        * Otherwise, it adds it to the completionList. No need to
+        * check the return value.
+        */
+        OvsActionsExecute(switchContext, completionList, curNbl,
+                          sourceVPort->portNo, sendFlags, &key, &hash,
+                          &layers, flow->actions, flow->actionsLen);
+
+        OvsReleaseDatapath(datapath, &dpLockState);
+        return;
+    }

-    InitializeListHead(&missedPackets);
-    OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags);
+    OvsReleaseDatapath(datapath, &dpLockState);
+
+    /* 2.b) no matching flow found => queue packet to userspace */
+    datapath->misses++;
+    status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE,
+                                    /*user data*/ NULL, /*user data length*/ 0,
+                                    OVS_PACKET_CMD_MISS, sourceVPort->portNo,
+                                    (key.tunKey.dst != 0 ? &key.tunKey : NULL),
+                                    curNbl,
+                                    (sourceVPort->portId ==
+                                    switchContext->externalPortId),
+                                    &layers, switchContext, missedPackets,
+                                    countMissedPackets);
+    if (status == NDIS_STATUS_SUCCESS) {
+        /* Complete the packet since it was copied to user
+        * buffer.
+        */
+        RtlInitUnicodeString(&filterReason,
+                             L"OVS-Dropped since packet was copied to "
+                             L"userspace");
+    }
+    else {
+        RtlInitUnicodeString(&filterReason,
+                             L"OVS-Dropped due to failure to queue to "
+                             L"userspace");
+    }

-    for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) {
-        POVS_VPORT_ENTRY vport;
-        UINT32 portNo;
-        OVS_DATAPATH *datapath = &switchContext->datapath;
-        OVS_PACKET_HDR_INFO layers;
-        OvsFlowKey key;
-        UINT64 hash;
-        PNET_BUFFER curNb;
+    OvsAddPktCompletionList(completionList, TRUE, sourceVPort->portId, curNbl,
+                            0, &filterReason);
+}

-        nextNbl = curNbl->Next;
-        curNbl->Next = NULL;
+static NDIS_STATUS
+OvsProcessOneNbl(PNET_BUFFER_LIST origNbl,
+                 POVS_SWITCH_CONTEXT switchContext,
+                 ULONG sendFlags,
+                 OvsCompletionList *completionList,
+                 PLIST_ENTRY missedPackets,
+                 PUINT32 countMissedPackets,
+                 PWCHAR* failReason,
+                 PNDIS_SWITCH_PORT_ID outSourcePortId)
+{
+    POVS_VPORT_ENTRY sourceVPort;
+    PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTagValue;
+    PNET_BUFFER_LIST nextNbl, newNbl;
+    POVS_BUFFER_CONTEXT bufferContext;
+    LOCK_STATE_EX lockState;
+    BOOLEAN atDispatch;
+    NDIS_STATUS status = STATUS_SUCCESS;

-        /* Ethernet Header is a guaranteed safe access. */
-        curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
-        if (curNb->Next != NULL) {
-            /* XXX: This case is not handled yet. */
-            ASSERT(FALSE);
-        } else {
-            POVS_BUFFER_CONTEXT ctx;
-            OvsFlow *flow;
-
-            fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
-            sourcePort = fwdDetail->SourcePortId;
-            sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex;
-
-            /* Take the DispatchLock so none of the VPORTs disconnect while
-             * we are setting destination ports.
-             *
-             * XXX: acquire/release the dispatch lock for a "batch" of packets
-             * rather than for each packet. */
-            NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
-                                  dispatch);
-
-            ctx = OvsInitExternalNBLContext(switchContext, curNbl,
-                                  sourcePort == switchContext->externalPortId);
-            if (ctx == NULL) {
-                RtlInitUnicodeString(&filterReason,
-                                     L"Cannot allocate external NBL context.");
-
-                OvsStartNBLIngressError(switchContext, curNbl,
-                                        sendCompleteFlags, &filterReason,
-                                        NDIS_STATUS_RESOURCES);
-                NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
-                continue;
-            }
-
-            vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort,
-                                                    sourceIndex);
-            if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
-                RtlInitUnicodeString(&filterReason,
-                    L"OVS-Cannot forward packet from unknown source port");
-                goto dropit;
-            } else {
-                portNo = vport->portNo;
-            }
-
-            vport->stats.rxPackets++;
-            vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb);
-
-            status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL);
-            if (status != NDIS_STATUS_SUCCESS) {
-                RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
-                goto dropit;
-            }
-
-            ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
-            OvsAcquireDatapathRead(datapath, &dpLockState, dispatch);
-
-            flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
-            if (flow) {
-                OvsFlowUsed(flow, curNbl, &layers);
-                datapath->hits++;
-                /* If successful, OvsActionsExecute() consumes the NBL.
-                 * Otherwise, it adds it to the completionList. No need to
-                 * check the return value. */
-                OvsActionsExecute(switchContext, &completionList, curNbl,
-                                portNo, SendFlags, &key, &hash, &layers,
-                                flow->actions, flow->actionsLen);
-                OvsReleaseDatapath(datapath, &dpLockState);
-                NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
-                continue;
-            } else {
-                OvsReleaseDatapath(datapath, &dpLockState);
-
-                datapath->misses++;
-                status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE,
-                                                NULL, 0, OVS_PACKET_CMD_MISS,
-                                                portNo,
-                                                key.tunKey.dst != 0 ?
-                                                (OvsIPv4TunnelKey *)&key.tunKey :
-                                                NULL, curNbl,
-                                                sourcePort ==
-                                                switchContext->externalPortId,
-                                                &layers, switchContext,
-                                                &missedPackets, &num);
-                if (status == NDIS_STATUS_SUCCESS) {
-                    /* Complete the packet since it was copied to user
-                     * buffer. */
-                    RtlInitUnicodeString(&filterReason,
-                        L"OVS-Dropped since packet was copied to userspace");
-                } else {
-                    RtlInitUnicodeString(&filterReason,
-                        L"OVS-Dropped due to failure to queue to userspace");
-                }
-                goto dropit;
-            }
-
-dropit:
-            OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0,
-                                    &filterReason);
-            NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
-        }
+    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+    NDIS_SWITCH_PORT_ID hvSourcePort = NDIS_SWITCH_DEFAULT_PORT_ID;
+    NDIS_SWITCH_NIC_INDEX hvSourceIndex = NDIS_SWITCH_DEFAULT_NIC_INDEX;
+
+    atDispatch = (NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags) ?
+                  NDIS_RWL_AT_DISPATCH_LEVEL : 0);
+    fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(origNbl);
+    hvSourcePort = fwdDetail->SourcePortId;
+    *outSourcePortId = hvSourcePort;
+    hvSourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex;
+
+    vlanTagValue = NET_BUFFER_LIST_INFO(origNbl, Ieee8021QNetBufferListInfo);
+
+    /* create buffer context */
+    bufferContext = OvsInitExternalNBLContext(switchContext, origNbl,
+                                              hvSourcePort ==
+                                              switchContext->externalPortId);
+    if (bufferContext == NULL) {
+        *failReason = L"Cannot allocate external NBL context.";
+        status = NDIS_STATUS_RESOURCES;
+        return status;
     }

-    /* Queue the missed packets. */
-    OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num);
-    OvsFinalizeCompletionList(&completionList);
+    /*
+    * split NBLs: each NBL must contain only one NB.
+    * NOTE: if origNbl has only one NB => newNbl will be origNbl
+    */
+    newNbl = OvsSplitNblByNB(origNbl, switchContext, TRUE);
+    if (!newNbl) {
+        *failReason = L"Cannot allocate new NBL: partial copy NB to "
+            L"multiple NBLs.";
+        status = NDIS_STATUS_RESOURCES;
+        goto Cleanup;
+    }
+
+    /*
+    * Take the DispatchLock so none of the VPORTs disconnect while
+    * we are setting destination ports.
+    */
+    NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
+                          atDispatch);
+
+    sourceVPort = OvsFindVportByPortIdAndNicIndex(switchContext, hvSourcePort,
+                                                  hvSourceIndex);
+    if (sourceVPort == NULL || sourceVPort->ovsState != OVS_STATE_CONNECTED) {
+        *failReason = L"OVS-Cannot forward packet from unknown source port";
+        status = NDIS_STATUS_INVALID_PORT;
+        goto Cleanup;
+    }
+
+    /* update vport stats */
+    OVS_NBL_FOR_EACH(nbl, newNbl) {
+        PNET_BUFFER firstNb = NET_BUFFER_LIST_FIRST_NB(nbl);
+
+        sourceVPort->stats.rxPackets++;
+        sourceVPort->stats.rxBytes += NET_BUFFER_DATA_LENGTH(firstNb);
+    }
+
+    OVS_NBL_FOR_EACH_NEXT(curNbl, newNbl, nextNbl) {
+        nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl);
+        NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL;
+
+        OvsProcessOneNb(curNbl, switchContext, sourceVPort, completionList,
+                        sendFlags, missedPackets, countMissedPackets,
+                        vlanTagValue);
+    }
+
+Cleanup:
+    NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+    return status;
 }


@@ -340,9 +376,17 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext,
     POVS_SWITCH_CONTEXT switchContext;
     switchContext = (POVS_SWITCH_CONTEXT) filterModuleContext;

+    PNET_BUFFER_LIST nextNbl = NULL;
+    ULONG sendCompleteFlags;
+    LIST_ENTRY missedPackets;
+    UINT32 countMissedPackets = 0;
+    OvsCompletionList completionList;
+
+    sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags);
+    sendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP;
+
     if (switchContext->dataFlowState == OvsSwitchPaused) {
         NDIS_STRING filterReason;
-        ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags);

         RtlInitUnicodeString(&filterReason,
                              L"Switch state PAUSED, drop on ingress.");
@@ -354,7 +398,37 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext,

     ASSERT(switchContext->dataFlowState == OvsSwitchRunning);

-    OvsStartNBLIngress(switchContext, netBufferLists, sendFlags);
+    InitializeListHead(&missedPackets);
+    OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags);
+
+    OVS_NBL_FOR_EACH_NEXT(curNbl, netBufferLists, nextNbl) {
+        PWCHAR failReason = NULL;
+        NDIS_STATUS status;
+        NDIS_STRING filterReason;
+        NDIS_SWITCH_PORT_ID sourcePortId;
+
+        nextNbl = curNbl->Next;
+        curNbl->Next = NULL;
+
+        status = OvsProcessOneNbl(curNbl, switchContext, sendFlags,
+                                  &completionList, &missedPackets,
+                                  &countMissedPackets, &failReason,
+                                  &sourcePortId);
+        if (NT_ERROR(status)) {
+            RtlInitUnicodeString(&filterReason, failReason);
+
+            OvsAddPktCompletionList(&completionList, TRUE, sourcePortId,
+                                    curNbl, 0, &filterReason);
+
+            OvsReportNBLIngressError(switchContext, curNbl, &filterReason,
+                                     status);
+            continue;
+        }
+    }
+
+    /* Queue the missed packets. */
+    OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, countMissedPackets);
+    OvsFinalizeCompletionList(&completionList);
 }

 static VOID
@@ -382,7 +456,8 @@ OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,

     /* Complete the NBL's that were sent by the upper layer. */
     if (newList.dropNbl != NULL) {
-        NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl,
+        NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle,
+                                        newList.dropNbl,
                                         sendCompleteFlags);
     }
 }
diff --git a/datapath-windows/ovsext/PacketParser.c b/datapath-windows/ovsext/PacketParser.c
index 2c955e1..9d94e58 100644
--- a/datapath-windows/ovsext/PacketParser.c
+++ b/datapath-windows/ovsext/PacketParser.c
@@ -18,13 +18,12 @@

 //XXX consider moving to NdisGetDataBuffer.
 const VOID *
-OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
+OvsGetPacketBytes(const NET_BUFFER *netBuffer,
                   UINT32 len,
                   UINT32 srcOffset,
                   VOID *storage)
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
-    PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl);
     PMDL currentMdl;
     BOOLEAN firstMDL = TRUE;
     ULONG destOffset = 0;
@@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
 }

 NDIS_STATUS
-OvsParseIPv6(const NET_BUFFER_LIST *packet,
+OvsParseIPv6(const NET_BUFFER *packet,
           OvsFlowKey *key,
           POVS_PACKET_HDR_INFO layers)
 {
@@ -134,7 +133,8 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
             const IPv6ExtHdr *extHdr;
             UINT8 len;

-            extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage);
+            extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs,
+                                       &extHdrStorage);
             if (!extHdr) {
                 return NDIS_STATUS_FAILURE;
             }
@@ -142,7 +142,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
             len = extHdr->hdrExtLen;
             ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8;
             nextHdr = extHdr->nextHeader;
-            if (OvsPacketLenNBL(packet) < ofs) {
+            if (NET_BUFFER_DATA_LENGTH(packet) < ofs) {
                 return NDIS_STATUS_FAILURE;
              }
         } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) {
@@ -177,9 +177,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
 }

 VOID
-OvsParseTcp(const NET_BUFFER_LIST *packet,
-         L4Key *flow,
-         POVS_PACKET_HDR_INFO layers)
+OvsParseTcp(const NET_BUFFER *packet,
+            L4Key *flow,
+            POVS_PACKET_HDR_INFO layers)
 {
     TCPHdr tcpStorage;
     const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage);
@@ -192,9 +192,9 @@ OvsParseTcp(const NET_BUFFER_LIST *packet,
 }

 VOID
-OvsParseUdp(const NET_BUFFER_LIST *packet,
-         L4Key *flow,
-         POVS_PACKET_HDR_INFO layers)
+OvsParseUdp(const NET_BUFFER *packet,
+            L4Key *flow,
+            POVS_PACKET_HDR_INFO layers)
 {
     UDPHdr udpStorage;
     const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage);
@@ -210,9 +210,9 @@ OvsParseUdp(const NET_BUFFER_LIST *packet,
 }

 NDIS_STATUS
-OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
-            OvsFlowKey *key,
-            POVS_PACKET_HDR_INFO layers)
+OvsParseIcmpV6(const NET_BUFFER *packet,
+               OvsFlowKey *key,
+               POVS_PACKET_HDR_INFO layers)
 {
     UINT16 ofs = layers->l4Offset;
     ICMPHdr icmpStorage;
@@ -249,7 +249,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
         }
         flow->ndTarget = *ndTarget;

-        while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) {
+        while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) {
             /*
              * The minimum size of an option is 8 bytes, which also is
              * the size of Ethernet link-layer options.
@@ -258,13 +258,15 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
             const IPv6NdOptHdr *ndOpt;
             UINT16 optLen;

-            ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage);
+            ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs,
+                                      &ndOptStorage);
             if (!ndOpt) {
                 return NDIS_STATUS_FAILURE;
             }

             optLen = ndOpt->len * 8;
-            if (!optLen || (UINT32)(ofs + optLen) >  OvsPacketLenNBL(packet)) {
+            if (!optLen || (UINT32)(ofs + optLen) >
+                NET_BUFFER_DATA_LENGTH(packet)) {
                 goto invalid;
             }

diff --git a/datapath-windows/ovsext/PacketParser.h b/datapath-windows/ovsext/PacketParser.h
index 55d110f..6f5d42c 100644
--- a/datapath-windows/ovsext/PacketParser.h
+++ b/datapath-windows/ovsext/PacketParser.h
@@ -20,15 +20,15 @@
 #include "precomp.h"
 #include "NetProto.h"

-const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len,
+const VOID* OvsGetPacketBytes(const NET_BUFFER *_pNB, UINT32 len,
                               UINT32 SrcOffset, VOID *storage);
-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key,
                         POVS_PACKET_HDR_INFO layers);
-VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseTcp(const NET_BUFFER *packet, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key,
                             POVS_PACKET_HDR_INFO layers);

 static __inline ULONG
@@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB)
  * which C does not allow.
  */
 static UINT16
-OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN
+OvsGetTcpCtl(const NET_BUFFER *packet, // IN
              const POVS_PACKET_HDR_INFO layers) // IN
 {
 #define TCP_CTL_OFS 12                // Offset of "ctl" field in TCP header.
@@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN


 static UINT8
-OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
+OvsGetTcpFlags(const NET_BUFFER *packet,    // IN
                const OvsFlowKey *key,   // IN
                const POVS_PACKET_HDR_INFO layers) // IN
 {
@@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
 }

 static const EtherArp *
-OvsGetArp(const NET_BUFFER_LIST *packet,
+OvsGetArp(const NET_BUFFER *packet,
           UINT32 ofs,
           EtherArp *storage)
 {
@@ -96,14 +96,15 @@ OvsGetArp(const NET_BUFFER_LIST *packet,
 }

 static const IPHdr *
-OvsGetIp(const NET_BUFFER_LIST *packet,
+OvsGetIp(const NET_BUFFER *packet,
          UINT32 ofs,
          IPHdr *storage)
 {
     const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage);
     if (ip) {
         int ipLen = ip->ihl * 4;
-        if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) {
+        if (ipLen >= sizeof *ip &&
+            NET_BUFFER_DATA_LENGTH(packet) >= ofs + ipLen) {
             return ip;
         }
     }
@@ -111,14 +112,15 @@ OvsGetIp(const NET_BUFFER_LIST *packet,
 }

 static const TCPHdr *
-OvsGetTcp(const NET_BUFFER_LIST *packet,
+OvsGetTcp(const NET_BUFFER *packet,
           UINT32 ofs,
           TCPHdr *storage)
 {
     const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage);
     if (tcp) {
         int tcpLen = tcp->doff * 4;
-        if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) {
+        if (tcpLen >= sizeof *tcp &&
+            NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) {
             return tcp;
         }
     }
@@ -126,7 +128,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet,
 }

 static const UDPHdr *
-OvsGetUdp(const NET_BUFFER_LIST *packet,
+OvsGetUdp(const NET_BUFFER *packet,
           UINT32 ofs,
           UDPHdr *storage)
 {
@@ -134,7 +136,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet,
 }

 static const ICMPHdr *
-OvsGetIcmp(const NET_BUFFER_LIST *packet,
+OvsGetIcmp(const NET_BUFFER *packet,
            UINT32 ofs,
            ICMPHdr *storage)
 {
diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c
index 2e7da10..d41f829 100644
--- a/datapath-windows/ovsext/Tunnel.c
+++ b/datapath-windows/ovsext/Tunnel.c
@@ -227,6 +227,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
     OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath;

     ASSERT(gOvsSwitchContext);
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);

     /* Fill the tunnel key */
     status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey);
@@ -264,6 +265,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         UINT64 hash;
         PNET_BUFFER curNb;
         OvsFlow *flow;
+        PVOID vlanTagValue;

         fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);

@@ -275,7 +277,6 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
          */

         curNb = NET_BUFFER_LIST_FIRST_NB(pNbl);
-        ASSERT(curNb->Next == NULL);

         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, dispatch);

@@ -294,20 +295,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN);

         portNo = vport->portNo;
+        vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo);

-        status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey);
+        status = OvsExtractFlow(curNb, portNo, &key, &layers,
+                                &tunnelKey, vlanTagValue);
         if (status != NDIS_STATUS_SUCCESS) {
             goto unlockAndDrop;
         }

         flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
         if (flow) {
-            OvsFlowUsed(flow, pNbl, &layers);
+            OvsFlowUsed(flow, curNb, &layers);
             datapath->hits++;

             OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl,
-                            portNo, SendFlags, &key, &hash, &layers,
-                            flow->actions, flow->actionsLen);
+                              portNo, SendFlags, &key, &hash, &layers,
+                              flow->actions, flow->actionsLen);

             OvsReleaseDatapath(datapath, &dpLockState);
         } else {
diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c
index 612a4bd..7ea5ee0 100644
--- a/datapath-windows/ovsext/User.c
+++ b/datapath-windows/ovsext/User.c
@@ -360,14 +360,20 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
         fwdDetail->SourceNicIndex = 0;
     }
-    // XXX: Figure out if any of the other members of fwdDetail need to be set.
+    /*
+     * XXX: Figure out if any of the other members of fwdDetail need to be
+     * set.
+     */

-    ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
-                              NULL);
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);
+
+    ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl),
+                                fwdDetail->SourcePortId, &key, &layers, NULL,
+                                NULL);
     if (ndisStatus == NDIS_STATUS_SUCCESS) {
         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
-                              NDIS_RWL_AT_DISPATCH_LEVEL);
+            NDIS_RWL_AT_DISPATCH_LEVEL);
         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
                                        vport ? vport->portNo :
                                                OVS_DEFAULT_PORT_NO,
@@ -828,6 +834,8 @@ OvsCreateAndAddPackets(UINT32 queueId,
     PNET_BUFFER_LIST newNbl = NULL;
     PNET_BUFFER nb;

+    ASSERT(nbl->FirstNetBuffer->Next == NULL);
+
     if (hdrInfo->isTcp) {
         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
         UINT32 packetLength;
diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c
index 3a1291c..a192e96 100644
--- a/datapath-windows/ovsext/Vxlan.c
+++ b/datapath-windows/ovsext/Vxlan.c
@@ -115,6 +115,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
     UINT32 headRoom = OvsGetVxlanTunHdrSize();
     UINT32 packetLength;

+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /*
      * XXX: the assumption currently is that the NBL is owned by OVS, and
      * headroom has already been allocated as part of allocating the NBL and
@@ -285,11 +287,18 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
     OvsFlowKey key;
     NDIS_STATUS status;
     UNREFERENCED_PARAMETER(inPort);
+    PVOID vlanTagValue;
+
+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);

-    status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
+    vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, Ieee8021QNetBufferListInfo);
+
+    status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(curNbl), inPort, &key,
+                            &layers, NULL, vlanTagValue);
     if (result == STATUS_SUCCESS) {
         status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
-                (POVS_SWITCH_CONTEXT)cbData1, NULL);
+                                 (POVS_SWITCH_CONTEXT)cbData1,
+                                 NULL);
     } else {
         status = NDIS_STATUS_FAILURE;
     }
@@ -456,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
     OVS_PACKET_HDR_INFO layers;

     layers.value = 0;
+    ASSERT(packet->FirstNetBuffer->Next == NULL);

     do {
-        nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
+        PNET_BUFFER pNb = NET_BUFFER_LIST_FIRST_NB(packet);
+
+        /*
+         * It is an encapsulated packet (UDP), we can use the first NB
+         * to start the check
+         */
+        nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage);
         if (nh) {
             layers.l4Offset = layers.l3Offset + nh->ihl * 4;
         } else {
@@ -466,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         }

         /* make sure it's a VXLAN packet */
-        udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
+        udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage);
         if (udp) {
             layers.l7Offset = layers.l4Offset + sizeof *udp;
         } else {
@@ -476,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         /* XXX Should be tested against the dynamic port # in the VXLAN vport */
         ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));

-        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
+        /*
+         * We may have multiple VXLAN packets here that need to be decapsulated
+         * For the VXLAN header, we need only the first NET_BUFFER
+         */
+        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb,
                                                     sizeof(*VxlanHeader),
                                                     layers.l7Offset,
                                                     &VxlanHeaderBuffer);
--
1.8.3.msysgit.0




More information about the dev mailing list