[ovs-dev] [PATCH] Create a NBL for each NB when required

Alin Serdean aserdean at cloudbasesolutions.com
Tue Aug 19 12:31:48 UTC 2014


ovs/ovs-issues#15

All NET_BUFFERs of a NET_BUFFER_LIST must go through
the pipeline: extract, find flow, execute. Previously,
only the first NET_BUFFER of a NET_BUFFER_LIST was going
through this pipeline, which was erroneous.

OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER
have its own NET_BUFFER_LIST.

Some functions that used to take as argument a NET_BUFFER_LIST
now take as argument a NET_BUFFER. I have also added a few
ASSERTs where the NET_BUFFER_LIST is expected to have
only one NET_BUFFER."

Signed-off-by: Samuel Ghinet <sghinet at cloudbasesolutions.com>
Co-authored-by: Alin Gabriel Serdean <aserdean at cloudbasesolutions.com>
---
 datapath-windows/ovsext/OvsActions.c      |  20 ++++-
 datapath-windows/ovsext/OvsBufferMgmt.c   |   8 +-
 datapath-windows/ovsext/OvsBufferMgmt.h   |   5 ++
 datapath-windows/ovsext/OvsChecksum.c     |   3 +-
 datapath-windows/ovsext/OvsFlow.c         |  44 +++++----
 datapath-windows/ovsext/OvsFlow.h         |   6 +-
 datapath-windows/ovsext/OvsPacketIO.c     | 143 +++++++++++++++++++-----------
 datapath-windows/ovsext/OvsPacketParser.c |  24 ++---
 datapath-windows/ovsext/OvsPacketParser.h |  30 ++++---
 datapath-windows/ovsext/OvsTunnel.c       |  12 ++-
 datapath-windows/ovsext/OvsUser.c         |  19 +++-
 datapath-windows/ovsext/OvsVxlan.c        |  37 ++++++--
 12 files changed, 221 insertions(+), 130 deletions(-)

diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c
index 635becc..c5c5116 100644
--- a/datapath-windows/ovsext/OvsActions.c
+++ b/datapath-windows/ovsext/OvsActions.c
@@ -517,6 +517,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
     OvsFlow *flow;
     UINT64 hash;
     NDIS_STATUS status;
+    VOID* vlanTagValue;
+    NET_BUFFER* pNb;
+
     POVS_VPORT_ENTRY vport =
         OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
     if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
@@ -527,12 +530,19 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
         return NDIS_STATUS_SUCCESS;
     }
     ASSERT(vport->nicState == NdisSwitchNicStateConnected);
+    ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL);
 
     /* Assert that in the Rx direction, key is always setup. */
     ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
-    status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
-                          &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
-                                         &ovsFwdCtx->tunKey : NULL);
+    vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
+                                        Ieee8021QNetBufferListInfo);
+
+    pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+
+    status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo,
+                            &key, &ovsFwdCtx->layers,
+                            ovsFwdCtx->tunKey.dst != 0 ?
+                            &ovsFwdCtx->tunKey : NULL, vlanTagValue);
     if (status != NDIS_STATUS_SUCCESS) {
         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
                                     L"OVS-Flow extract failed");
@@ -542,7 +552,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
 
     flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
     if (flow) {
-        OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
+        OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers);
         ovsFwdCtx->switchContext->datapath.hits++;
         status = OvsActionsExecute(ovsFwdCtx->switchContext,
                                  ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
@@ -1370,6 +1380,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
         NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
 
+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /* XXX: ASSERT that the flow table lock is held. */
     status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
                                   sendFlags, fwdDetail, completionList,
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c
index 8aa8060..c367d2f 100644
--- a/datapath-windows/ovsext/OvsBufferMgmt.c
+++ b/datapath-windows/ovsext/OvsBufferMgmt.c
@@ -863,7 +863,7 @@ OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
         if (prevNbl == NULL) {
             firstNbl = newNbl;
         } else {
-            NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl;
+            NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
             NET_BUFFER_NEXT_NB(prevNb) = nb;
         }
         prevNbl = newNbl;
@@ -1085,7 +1085,7 @@ nblcopy_error:
  * --------------------------------------------------------------------------
  */
 static NDIS_STATUS
-GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
+GetSegmentHeaderInfo(PNET_BUFFER nb,
                      const POVS_PACKET_HDR_INFO hdrInfo,
                      UINT32 *hdrSize, UINT32 *seqNumber)
 {
@@ -1093,7 +1093,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
     const TCPHdr *tcp;
 
     /* Parse the orginal Eth/IP/TCP header */
-    tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
+    tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
     if (tcp == NULL) {
         return NDIS_STATUS_FAILURE;
     }
@@ -1199,7 +1199,7 @@ OvsTcpSegmentNBL(PVOID ovsContext,
     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
 
     /* Figure out the segment header size */
-    status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+    status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber);
     if (status != NDIS_STATUS_SUCCESS) {
         OVS_LOG_INFO("Cannot parse NBL header");
         return NULL;
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h
index 9c00b1b..8e822e0 100644
--- a/datapath-windows/ovsext/OvsBufferMgmt.h
+++ b/datapath-windows/ovsext/OvsBufferMgmt.h
@@ -81,6 +81,11 @@ typedef struct _OVS_NBL_POOL {
 #endif
 } OVS_NBL_POOL, *POVS_NBL_POOL;
 
+#define OVS_NBL_FOR_EACH(pCurNbl, nblList)         \
+for (NET_BUFFER_LIST* pCurNbl = nblList;           \
+    pCurNbl != NULL;                               \
+    pCurNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl))
+
 
 NDIS_STATUS OvsInitBufferPool(PVOID context);
 VOID OvsCleanupBufferPool(PVOID context);
diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c
index e192373..0c65221 100644
--- a/datapath-windows/ovsext/OvsChecksum.c
+++ b/datapath-windows/ovsext/OvsChecksum.c
@@ -536,7 +536,8 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl,
 
     /* Next, check if the NIC did not validate the RX checksum. */
     if (!csumInfo.Receive.IpChecksumSucceeded) {
-        ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage);
+        ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl),
+                         hdrInfo->l3Offset, &ip_storage);
         if (ipHdr) {
             ip_storage = *ipHdr;
             hdrChecksum = ipHdr->check;
diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c
index daa64e0..19f6b2a 100644
--- a/datapath-windows/ovsext/OvsFlow.c
+++ b/datapath-windows/ovsext/OvsFlow.c
@@ -104,7 +104,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,
 
 /*
  *----------------------------------------------------------------------------
- *  GetStartAddrNBL --
+ *  GetStartAddrNB --
  *    Get the virtual address of the frame.
  *
  *  Results:
@@ -112,7 +112,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath,
  *----------------------------------------------------------------------------
  */
 static __inline VOID *
-GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
+GetStartAddrNB(const NET_BUFFER *_pNB)
 {
     PMDL curMdl;
     PUINT8 curBuffer;
@@ -121,21 +121,21 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
     ASSERT(_pNB);
 
     // Ethernet Header is a guaranteed safe access.
-    curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
+    curMdl = (_pNB)->CurrentMdl;
     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
     if (!curBuffer) {
         return NULL;
     }
 
     curHeader = (PEthHdr)
-    (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
+        (curBuffer + (_pNB)->CurrentMdlOffset);
 
     return (VOID *) curHeader;
 }
 
 VOID
 OvsFlowUsed(OvsFlow *flow,
-            const NET_BUFFER_LIST *packet,
+            const NET_BUFFER *packet,
             const POVS_PACKET_HDR_INFO layers)
 {
     LARGE_INTEGER tickCount;
@@ -144,7 +144,7 @@ OvsFlowUsed(OvsFlow *flow,
     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
     flow->used += ovsUserTimestampDelta;
     flow->packetCount++;
-    flow->byteCount += OvsPacketLenNBL(packet);
+    flow->byteCount += NET_BUFFER_DATA_LENGTH(packet);
     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
 }
 
@@ -191,15 +191,14 @@ DeleteAllFlows(OVS_DATAPATH *datapath)
  *----------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsExtractFlow(const NET_BUFFER_LIST *packet,
+OvsExtractFlow(const NET_BUFFER* pNb,
                UINT32 inPort,
                OvsFlowKey *flow,
                POVS_PACKET_HDR_INFO layers,
-               OvsIPv4TunnelKey *tunKey)
+               OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue)
 {
     struct Eth_Header *eth;
     UINT8 offset = 0;
-    PVOID vlanTagValue;
 
     layers->value = 0;
 
@@ -214,20 +213,19 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
 
     flow->l2.inPort = inPort;
 
-    if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
+    if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) {
         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
         return NDIS_STATUS_SUCCESS;
     }
 
     /* Link layer. */
-    eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
+    eth = (Eth_Header *)GetStartAddrNB(pNb);
     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
 
     /*
      * vlan_tci.
      */
-    vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
     if (vlanTagValue) {
         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
@@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
         flow->l2.dlType = eth->dix.typeNBO;
         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
-    } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
+    } else if (NET_BUFFER_DATA_LENGTH(pNb)  >= ETH_HEADER_LEN_802_3 &&
               eth->e802_3.llc.dsap == 0xaa &&
               eth->e802_3.llc.ssap == 0xaa &&
               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
@@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
 
         flow->l2.keyLen += OVS_IP_KEY_SIZE;
         layers->isIPv4 = 1;
-        nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
+        nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage);
         if (nh) {
             layers->l4Offset = layers->l3Offset + nh->ihl * 4;
 
@@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
 
             if (!(nh->frag_off & htons(IP_OFFSET))) {
                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
-                    OvsParseTcp(packet, &ipKey->l4, layers);
+                    OvsParseTcp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
-                    OvsParseUdp(packet, &ipKey->l4, layers);
+                    OvsParseUdp(pNb, &ipKey->l4, layers);
                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
                     ICMPHdr icmpStorage;
                     const ICMPHdr *icmp;
 
-                    icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
+                    icmp = OvsGetIcmp(pNb, layers->l4Offset, &icmpStorage);
                     if (icmp) {
                         ipKey->l4.tpSrc = htons(icmp->type);
                         ipKey->l4.tpDst = htons(icmp->code);
@@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
         NDIS_STATUS status;
         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
-        status = OvsParseIPv6(packet, flow, layers);
+        status = OvsParseIPv6(pNb, flow, layers);
         if (status != NDIS_STATUS_SUCCESS) {
             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
             return status;
@@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         flow->ipv6Key.pad = 0;
 
         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
-            OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
-            OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
+            OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers);
         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
-            OvsParseIcmpV6(packet, flow, layers);
+            OvsParseIcmpV6(pNb, flow, layers);
             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
         }
     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
@@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
         ((UINT64 *)arpKey)[1] = 0;
         ((UINT64 *)arpKey)[2] = 0;
         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
-        arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
+        arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage);
         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
@@ -420,9 +418,7 @@ AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
      */
     KeMemoryBarrier();
 
-    //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
     InsertTailList(head, &flow->ListEntry);
-    //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);
 
     datapath->nFlows++;
 
diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h
index 93368b3..aeec803 100644
--- a/datapath-windows/ovsext/OvsFlow.h
+++ b/datapath-windows/ovsext/OvsFlow.h
@@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath);
 NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath,
                                  POVS_SWITCH_CONTEXT switchContext);
 
-NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort,
+NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort,
                            OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers,
-                           OvsIPv4TunnelKey *tunKey);
+                           OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue);
 OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key,
                        UINT64 *hash, BOOLEAN hashValid);
 UINT64 OvsHashFlow(const OvsFlowKey *key);
-VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt,
+VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt,
                  const POVS_PACKET_HDR_INFO layers);
 
 NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c
index 39e5703..d6d2e0e 100644
--- a/datapath-windows/ovsext/OvsPacketIO.c
+++ b/datapath-windows/ovsext/OvsPacketIO.c
@@ -211,61 +211,97 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
         OvsFlowKey key;
         UINT64 hash;
         PNET_BUFFER curNb;
+        VOID* vlanTagValue;
+        NET_BUFFER_LIST* pNextNbl;
 
         nextNbl = curNbl->Next;
         curNbl->Next = NULL;
 
         /* Ethernet Header is a guaranteed safe access. */
-        curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
-        if (curNb->Next != NULL) {
-            /* XXX: This case is not handled yet. */
-            ASSERT(FALSE);
+        POVS_BUFFER_CONTEXT ctx;
+        OvsFlow *flow;
+        NET_BUFFER_LIST* pNewNbl;
+        BOOLEAN dropNbl;
+
+        fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
+        sourcePort = fwdDetail->SourcePortId;
+        sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex;
+
+        /* Take the DispatchLock so none of the VPORTs disconnect while
+        * we are setting destination ports.
+        *
+        * XXX: acquire/release the dispatch lock for a "batch" of packets
+        * rather than for each packet. */
+        NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
+            dispatch);
+
+        ctx = OvsInitExternalNBLContext(switchContext, curNbl,
+            sourcePort == switchContext->externalPortId);
+        if (ctx == NULL) {
+            RtlInitUnicodeString(&filterReason,
+                L"Cannot allocate external NBL context.");
+
+            OvsStartNBLIngressError(switchContext, curNbl,
+                sendCompleteFlags, &filterReason,
+                NDIS_STATUS_RESOURCES);
+            NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+            continue;
+        }
+
+        vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort,
+            sourceIndex);
+        if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
+            RtlInitUnicodeString(&filterReason,
+                L"OVS-Cannot forward packet from unknown source port");
+            goto dropit;
         } else {
-            POVS_BUFFER_CONTEXT ctx;
-            OvsFlow *flow;
-
-            fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
-            sourcePort = fwdDetail->SourcePortId;
-            sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex;
-
-            /* Take the DispatchLock so none of the VPORTs disconnect while
-             * we are setting destination ports.
-             *
-             * XXX: acquire/release the dispatch lock for a "batch" of packets
-             * rather than for each packet. */
-            NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
-                                  dispatch);
-
-            ctx = OvsInitExternalNBLContext(switchContext, curNbl,
-                                  sourcePort == switchContext->externalPortId);
-            if (ctx == NULL) {
+            portNo = vport->portNo;
+        }
+
+        curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+        vport->stats.rxPackets++;
+        vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb);
+        vlanTagValue = NET_BUFFER_LIST_INFO(curNbl,
+                                            Ieee8021QNetBufferListInfo);
+
+        if (curNb->Next == NULL)
+        {
+            pNewNbl = curNbl;
+        }
+        else
+        {
+            InterlockedDecrement((volatile LONG*)&ctx->refCount);
+            pNewNbl = OvsPartialCopyToMultipleNBLs(switchContext, curNbl,
+                0, 0, TRUE);
+            if (!pNewNbl) {
                 RtlInitUnicodeString(&filterReason,
-                                     L"Cannot allocate external NBL context.");
+                    L"Cannot allocate new NBL: partial copy NB to "
+                    L"multiple NBLs.");
 
                 OvsStartNBLIngressError(switchContext, curNbl,
                                         sendCompleteFlags, &filterReason,
                                         NDIS_STATUS_RESOURCES);
+
                 NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
                 continue;
             }
+        }
 
-            vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort,
-                                                    sourceIndex);
-            if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
-                RtlInitUnicodeString(&filterReason,
-                    L"OVS-Cannot forward packet from unknown source port");
-                goto dropit;
-            } else {
-                portNo = vport->portNo;
-            }
-
-            vport->stats.rxPackets++;
-            vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb);
+        dropNbl = FALSE;
+        for (NET_BUFFER_LIST* pCurNbl = pNewNbl; pCurNbl != NULL; pCurNbl = pNextNbl)
+        {
+            pNextNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl);
+            NET_BUFFER_LIST_NEXT_NBL(pCurNbl) = NULL;
 
-            status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL);
+            status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pCurNbl),
+                                    vport->portNo, &key, &layers,
+                                    NULL, vlanTagValue);
             if (status != NDIS_STATUS_SUCCESS) {
                 RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
-                goto dropit;
+                dropNbl = TRUE;
+                OvsAddPktCompletionList(&completionList, TRUE, sourcePort,
+                                        pCurNbl, 0, &filterReason);
+                continue;
             }
 
             ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
@@ -273,16 +309,17 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
 
             flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
             if (flow) {
-                OvsFlowUsed(flow, curNbl, &layers);
+                OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(pCurNbl), &layers);
                 datapath->hits++;
+
                 /* If successful, OvsActionsExecute() consumes the NBL.
-                 * Otherwise, it adds it to the completionList. No need to
-                 * check the return value. */
-                OvsActionsExecute(switchContext, &completionList, curNbl,
-                                portNo, SendFlags, &key, &hash, &layers,
-                                flow->actions, flow->actionsLen);
+                * Otherwise, it adds it to the completionList. No need to
+                * check the return value. */
+                OvsActionsExecute(switchContext, &completionList, pCurNbl,
+                                  portNo, SendFlags, &key, &hash, &layers,
+                                  flow->actions, flow->actionsLen);
+
                 OvsReleaseDatapath(datapath, &dpLockState);
-                NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
                 continue;
             } else {
                 OvsReleaseDatapath(datapath, &dpLockState);
@@ -293,28 +330,30 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
                                                 portNo,
                                                 key.tunKey.dst != 0 ?
                                                 (OvsIPv4TunnelKey *)&key.tunKey :
-                                                NULL, curNbl,
+                                                NULL, pCurNbl,
                                                 sourcePort ==
                                                 switchContext->externalPortId,
                                                 &layers, switchContext,
                                                 &missedPackets, &num);
                 if (status == NDIS_STATUS_SUCCESS) {
                     /* Complete the packet since it was copied to user
-                     * buffer. */
+                     * buffer.
+                     */
                     RtlInitUnicodeString(&filterReason,
                         L"OVS-Dropped since packet was copied to userspace");
                 } else {
                     RtlInitUnicodeString(&filterReason,
                         L"OVS-Dropped due to failure to queue to userspace");
                 }
-                goto dropit;
+                dropNbl = TRUE;
+                OvsAddPktCompletionList(&completionList, TRUE, sourcePort,
+                                        pCurNbl, 0, &filterReason);
+                continue;
             }
-
-dropit:
-            OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0,
-                                    &filterReason);
-            NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
         }
+
+    dropit:
+        NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
     }
 
     /* Queue the missed packets. */
diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c
index 0a93435..0b971a7 100644
--- a/datapath-windows/ovsext/OvsPacketParser.c
+++ b/datapath-windows/ovsext/OvsPacketParser.c
@@ -18,13 +18,12 @@
 
 //XXX consider moving to NdisGetDataBuffer.
 const VOID *
-OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
+OvsGetPacketBytes(const NET_BUFFER *netBuffer,
                   UINT32 len,
                   UINT32 srcOffset,
                   VOID *storage)
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
-    PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl);
     PMDL currentMdl;
     BOOLEAN firstMDL = TRUE;
     ULONG destOffset = 0;
@@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
 }
 
 NDIS_STATUS
-OvsParseIPv6(const NET_BUFFER_LIST *packet,
+OvsParseIPv6(const NET_BUFFER *packet,
           OvsFlowKey *key,
           POVS_PACKET_HDR_INFO layers)
 {
@@ -134,7 +133,8 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
             const IPv6ExtHdr *extHdr;
             UINT8 len;
 
-            extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage);
+            extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs,
+                                       &extHdrStorage);
             if (!extHdr) {
                 return NDIS_STATUS_FAILURE;
             }
@@ -142,7 +142,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
             len = extHdr->hdrExtLen;
             ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8;
             nextHdr = extHdr->nextHeader;
-            if (OvsPacketLenNBL(packet) < ofs) {
+            if (NET_BUFFER_DATA_LENGTH(packet) < ofs) {
                 return NDIS_STATUS_FAILURE;
              }
         } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) {
@@ -177,7 +177,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
 }
 
 VOID
-OvsParseTcp(const NET_BUFFER_LIST *packet,
+OvsParseTcp(const NET_BUFFER *packet,
          L4Key *flow,
          POVS_PACKET_HDR_INFO layers)
 {
@@ -192,7 +192,7 @@ OvsParseTcp(const NET_BUFFER_LIST *packet,
 }
 
 VOID
-OvsParseUdp(const NET_BUFFER_LIST *packet,
+OvsParseUdp(const NET_BUFFER *packet,
          L4Key *flow,
          POVS_PACKET_HDR_INFO layers)
 {
@@ -210,7 +210,7 @@ OvsParseUdp(const NET_BUFFER_LIST *packet,
 }
 
 NDIS_STATUS
-OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
+OvsParseIcmpV6(const NET_BUFFER *packet,
             OvsFlowKey *key,
             POVS_PACKET_HDR_INFO layers)
 {
@@ -249,7 +249,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
         }
         flow->ndTarget = *ndTarget;
 
-        while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) {
+        while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) {
             /*
              * The minimum size of an option is 8 bytes, which also is
              * the size of Ethernet link-layer options.
@@ -258,13 +258,15 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
             const IPv6NdOptHdr *ndOpt;
             UINT16 optLen;
 
-            ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage);
+            ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs,
+                                      &ndOptStorage);
             if (!ndOpt) {
                 return NDIS_STATUS_FAILURE;
             }
 
             optLen = ndOpt->len * 8;
-            if (!optLen || (UINT32)(ofs + optLen) >  OvsPacketLenNBL(packet)) {
+            if (!optLen || (UINT32)(ofs + optLen) >
+                NET_BUFFER_DATA_LENGTH(packet)) {
                 goto invalid;
             }
 
diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h
index ab3c613..0928932 100644
--- a/datapath-windows/ovsext/OvsPacketParser.h
+++ b/datapath-windows/ovsext/OvsPacketParser.h
@@ -20,15 +20,15 @@
 #include "precomp.h"
 #include "OvsNetProto.h"
 
-const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len,
+const VOID* OvsGetPacketBytes(const NET_BUFFER *_pNB, UINT32 len,
                               UINT32 SrcOffset, VOID *storage);
-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key,
                         POVS_PACKET_HDR_INFO layers);
-VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseTcp(const NET_BUFFER *packet, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow,
+VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow,
                  POVS_PACKET_HDR_INFO layers);
-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key,
                             POVS_PACKET_HDR_INFO layers);
 
 static __inline ULONG
@@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB)
  * which C does not allow.
  */
 static UINT16
-OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN
+OvsGetTcpCtl(const NET_BUFFER *packet, // IN
              const POVS_PACKET_HDR_INFO layers) // IN
 {
 #define TCP_CTL_OFS 12                // Offset of "ctl" field in TCP header.
@@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN
 
 
 static UINT8
-OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
+OvsGetTcpFlags(const NET_BUFFER *packet,    // IN
                const OvsFlowKey *key,   // IN
                const POVS_PACKET_HDR_INFO layers) // IN
 {
@@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet,    // IN
 }
 
 static const EtherArp *
-OvsGetArp(const NET_BUFFER_LIST *packet,
+OvsGetArp(const NET_BUFFER *packet,
           UINT32 ofs,
           EtherArp *storage)
 {
@@ -96,14 +96,15 @@ OvsGetArp(const NET_BUFFER_LIST *packet,
 }
 
 static const IPHdr *
-OvsGetIp(const NET_BUFFER_LIST *packet,
+OvsGetIp(const NET_BUFFER *packet,
          UINT32 ofs,
          IPHdr *storage)
 {
     const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage);
     if (ip) {
         int ipLen = ip->ihl * 4;
-        if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) {
+        if (ipLen >= sizeof *ip &&
+            NET_BUFFER_DATA_LENGTH(packet) >= ofs + ipLen) {
             return ip;
         }
     }
@@ -111,14 +112,15 @@ OvsGetIp(const NET_BUFFER_LIST *packet,
 }
 
 static const TCPHdr *
-OvsGetTcp(const NET_BUFFER_LIST *packet,
+OvsGetTcp(const NET_BUFFER *packet,
           UINT32 ofs,
           TCPHdr *storage)
 {
     const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage);
     if (tcp) {
         int tcpLen = tcp->doff * 4;
-        if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) {
+        if (tcpLen >= sizeof *tcp &&
+            NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) {
             return tcp;
         }
     }
@@ -126,7 +128,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet,
 }
 
 static const UDPHdr *
-OvsGetUdp(const NET_BUFFER_LIST *packet,
+OvsGetUdp(const NET_BUFFER *packet,
           UINT32 ofs,
           UDPHdr *storage)
 {
@@ -134,7 +136,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet,
 }
 
 static const ICMPHdr *
-OvsGetIcmp(const NET_BUFFER_LIST *packet,
+OvsGetIcmp(const NET_BUFFER *packet,
            UINT32 ofs,
            ICMPHdr *storage)
 {
diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c
index b5a369a..9c96aec 100644
--- a/datapath-windows/ovsext/OvsTunnel.c
+++ b/datapath-windows/ovsext/OvsTunnel.c
@@ -228,6 +228,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
     OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath;
 
     ASSERT(gOvsSwitchContext);
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);
 
     /* Fill the tunnel key */
     status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey);
@@ -265,6 +266,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         UINT64 hash;
         PNET_BUFFER curNb;
         OvsFlow *flow;
+        VOID* vlanTagValue;
 
         fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
 
@@ -295,20 +297,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
         ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN);
 
         portNo = vport->portNo;
+        vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo);
 
-        status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey);
+        status = OvsExtractFlow(curNb, portNo, &key, &layers,
+                                &tunnelKey, vlanTagValue);
         if (status != NDIS_STATUS_SUCCESS) {
             goto unlockAndDrop;
         }
 
         flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
         if (flow) {
-            OvsFlowUsed(flow, pNbl, &layers);
+            OvsFlowUsed(flow, curNb, &layers);
             datapath->hits++;
 
             OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl,
-                            portNo, SendFlags, &key, &hash, &layers,
-                            flow->actions, flow->actionsLen);
+                              portNo, SendFlags, &key, &hash, &layers,
+                              flow->actions, flow->actionsLen);
 
             OvsReleaseDatapath(datapath, &dpLockState);
         } else {
diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c
index 5093f20..9f717ee 100644
--- a/datapath-windows/ovsext/OvsUser.c
+++ b/datapath-windows/ovsext/OvsUser.c
@@ -315,6 +315,7 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
     OvsFlowKey key;
     OVS_PACKET_HDR_INFO layers;
     POVS_VPORT_ENTRY vport;
+    VOID* vlanTagValue;
 
     if (inputLength < sizeof(*execute) || outputLength != 0) {
         return STATUS_INFO_LENGTH_MISMATCH;
@@ -360,14 +361,22 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
         fwdDetail->SourceNicIndex = 0;
     }
-    // XXX: Figure out if any of the other members of fwdDetail need to be set.
+    /*
+     * XXX: Figure out if any of the other members of fwdDetail need to be
+     * set.
+     */
+
+    ASSERT(pNbl->FirstNetBuffer->Next == NULL);
 
-    ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
-                              NULL);
+    vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo);
+
+    ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl),
+                                fwdDetail->SourcePortId, &key, &layers, NULL,
+                                vlanTagValue);
     if (ndisStatus == NDIS_STATUS_SUCCESS) {
         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
-                              NDIS_RWL_AT_DISPATCH_LEVEL);
+            NDIS_RWL_AT_DISPATCH_LEVEL);
         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
                                        vport ? vport->portNo : 0,
                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
@@ -827,6 +836,8 @@ OvsCreateAndAddPackets(UINT32 queueId,
     PNET_BUFFER_LIST newNbl = NULL;
     PNET_BUFFER nb;
 
+    ASSERT(nbl->FirstNetBuffer->Next == NULL);
+
     if (hdrInfo->isTcp) {
         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
         UINT32 packetLength;
diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c
index 63909ae..c50d2d9 100644
--- a/datapath-windows/ovsext/OvsVxlan.c
+++ b/datapath-windows/ovsext/OvsVxlan.c
@@ -116,6 +116,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
     UINT32 headRoom = OvsGetVxlanTunHdrSize();
     UINT32 packetLength;
 
+    ASSERT(curNbl->FirstNetBuffer->Next == NULL);
+
     /*
      * XXX: the assumption currently is that the NBL is owned by OVS, and
      * headroom has already been allocated as part of allocating the NBL and
@@ -286,13 +288,19 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
     OvsFlowKey key;
     NDIS_STATUS status;
     UNREFERENCED_PARAMETER(inPort);
-
-    status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
-    if (result == STATUS_SUCCESS) {
-        status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
+    VOID* vlanTagValue;
+
+    vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, Ieee8021QNetBufferListInfo);
+    for (NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+         pNb != NULL; pNb = NET_BUFFER_NEXT_NB(pNb)) {
+        status = OvsExtractFlow(pNb, inPort, &key, &layers, NULL,
+                                vlanTagValue);
+        if (result == STATUS_SUCCESS) {
+            status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
                 (POVS_SWITCH_CONTEXT)cbData1, NULL);
-    } else {
-        status = NDIS_STATUS_FAILURE;
+        } else {
+            status = NDIS_STATUS_FAILURE;
+        }
     }
 
     if (status != NDIS_STATUS_SUCCESS) {
@@ -457,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
     OVS_PACKET_HDR_INFO layers;
 
     layers.value = 0;
+    ASSERT(packet->FirstNetBuffer->Next == NULL);
 
     do {
-        nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
+        NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(packet);
+
+        /*
+         * It is an encapsulated packet (UDP), we can use the first NB
+         * to start the check
+         */
+        nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage);
         if (nh) {
             layers.l4Offset = layers.l3Offset + nh->ihl * 4;
         } else {
@@ -467,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         }
 
         /* make sure it's a VXLAN packet */
-        udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
+        udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage);
         if (udp) {
             layers.l7Offset = layers.l4Offset + sizeof *udp;
         } else {
@@ -477,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
         /* XXX Should be tested against the dynamic port # in the VXLAN vport */
         ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));
 
-        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
+        /*
+         * We may have multiple VXLAN packets here that need to be decapsulated
+         * For the VXLAN header, we need only the first NET_BUFFER
+         */
+        VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb,
                                                     sizeof(*VxlanHeader),
                                                     layers.l7Offset,
                                                     &VxlanHeaderBuffer);
-- 
1.9.0.msysgit.0





More information about the dev mailing list