[ovs-dev] [PATCH] datapath-windows: Avoid double flow lookup in Tx tunnel function

Nithin Raju nithin at vmware.com
Tue Oct 13 17:35:53 UTC 2015


Hi Sorin/Alin,
Is this an optimization or a fix?

Can you pls. add more details?

We do two flow lookups for a reason - once for the inner packet from the
VIF, and once more after encapsulation. There¹s nothing to avoid here.

The only way to avoid the second lookup is by handing off the packet to
the Windows IP stack by using a kernel socket, and letting Windows do the
routing. For this, we¹d need to use a ³Private² Hyper-V switch.

-- Nithin

-----Original Message-----
From: Sorin Vinturis <svinturis at cloudbasesolutions.com>
Date: Wednesday, September 23, 2015 at 5:09 AM
To: "dev at openvswitch.org" <dev at openvswitch.org>
Subject: [ovs-dev] [PATCH] datapath-windows: Avoid double flow lookup in
Tx tunnel function

>Signed-off-by: Sorin Vinturis <svinturis at cloudbasesolutions.com>
>Co-authored-by: Alin Gabriel Serdean <aserdean at cloudbasesolutions.com>
>---
> datapath-windows/ovsext/Actions.c | 264
>+++++++++++++++++++++++++-------------
> datapath-windows/ovsext/Vport.c   |   1 -
> datapath-windows/ovsext/Vport.h   |   2 +-
> 3 files changed, 173 insertions(+), 94 deletions(-)
>
>diff --git a/datapath-windows/ovsext/Actions.c
>b/datapath-windows/ovsext/Actions.c
>index bfe5d7f..2a482b9 100644
>--- a/datapath-windows/ovsext/Actions.c
>+++ b/datapath-windows/ovsext/Actions.c
>@@ -322,65 +322,18 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
>     return FALSE;
> }
> 
>-
>-/*
>- * 
>--------------------------------------------------------------------------
>- * OvsAddPorts --
>- *     Add the specified destination vport into the forwarding context.
>If the
>- *     vport is a VIF/external port, it is added directly to the NBL. If
>it is
>- *     a tunneling port, it is NOT added to the NBL.
>- *
>- * Result:
>- *     NDIS_STATUS_SUCCESS on success
>- *     Other NDIS_STATUS upon failure.
>- * 
>--------------------------------------------------------------------------
>- */
>-static __inline NDIS_STATUS
>-OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>-            OvsFlowKey *flowKey,
>-            NDIS_SWITCH_PORT_ID dstPortId,
>-            BOOLEAN preserveVLAN,
>-            BOOLEAN preservePriority)
>+static NTSTATUS
>+OvsGrowNblDestinations(OvsForwardingContext *ovsFwdCtx)
> {
>-    POVS_VPORT_ENTRY vport;
>-    PNDIS_SWITCH_PORT_DESTINATION fwdPort;
>-    NDIS_STATUS status;
>+    NDIS_STATUS status = STATUS_SUCCESS;
>     POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
>+    BOOLEAN error = TRUE;
> 
>-    /*
>-     * We hold the dispatch lock that protects the list of vports, so
>vports
>-     * validated here can be added as destinations safely before we call
>into
>-     * NDIS.
>-     *
>-     * Some of the vports can be tunnelled ports as well in which case
>-     * they should be added to a separate list of tunnelled destination
>ports
>-     * instead of the VIF ports. The context for the tunnel is settable
>-     * in OvsForwardingContext.
>-     */
>-    vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
>-    if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
>-        /*
>-         * There may be some latency between a port disappearing, and
>userspace
>-         * updating the recalculated flows. In the meantime, handle
>invalid
>-         * ports gracefully.
>-         */
>-        ovsActionStats.noVport++;
>-        return NDIS_STATUS_SUCCESS;
>-    }
>-    ASSERT(vport->nicState == NdisSwitchNicStateConnected);
>-    vport->stats.txPackets++;
>-    vport->stats.txBytes +=
>-        
>NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
>-
>-    if (OvsIsBridgeInternalVport(vport)) {
>-        return NDIS_STATUS_SUCCESS;
>-    }
>-
>-    if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
>-        return NDIS_STATUS_SUCCESS;
>-    }
>+    do {
>+        if (ovsFwdCtx->destPortsSizeOut != ovsFwdCtx->destPortsSizeIn) {
>+            break;
>+        }
> 
>-    if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
>         if (ovsFwdCtx->destPortsSizeIn == 0) {
>             ASSERT(ovsFwdCtx->destinationPorts == NULL);
>             ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
>@@ -391,13 +344,14 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>                     &ovsFwdCtx->destinationPorts);
>             if (status != NDIS_STATUS_SUCCESS) {
>                 ovsActionStats.cannotGrowDest++;
>-                return status;
>+                break;
>             }
>             ovsFwdCtx->destPortsSizeIn =
>                 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
>             ASSERT(ovsFwdCtx->destinationPorts);
>         } else {
>             ASSERT(ovsFwdCtx->destinationPorts != NULL);
>+
>             /*
>              * NumElements:
>              * A ULONG value that specifies the total number of
>@@ -420,6 +374,7 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>                    ovsFwdCtx->destPortsSizeOut -
>                    ovsFwdCtx->fwdDetail->NumAvailableDestinations);
>             ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
>+
>             /*
>              * Before we grow the array of destination ports, the
>current set
>              * of ports needs to be committed. Only the ports added
>since the
>@@ -431,7 +386,7 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>                 ovsFwdCtx->destinationPorts);
>             if (status != NDIS_STATUS_SUCCESS) {
>                 ovsActionStats.cannotGrowDest++;
>-                return status;
>+                break;
>             }
>             ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
>                    ovsFwdCtx->destPortsSizeIn);
>@@ -444,26 +399,96 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>                 ovsFwdCtx->destPortsSizeIn,
>&ovsFwdCtx->destinationPorts);
>             if (status != NDIS_STATUS_SUCCESS) {
>                 ovsActionStats.cannotGrowDest++;
>-                return status;
>+                break;
>             }
>             ASSERT(ovsFwdCtx->destinationPorts != NULL);
>             ovsFwdCtx->destPortsSizeIn <<= 1;
>         }
>-    }
> 
>-    ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
>-    fwdPort =
>-        
>NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
>-                 
>ovsFwdCtx->destPortsSizeOut);
>+        error = FALSE;
>+    } while (error);
>+
>+    return status;
>+}
> 
>-    fwdPort->PortId = vport->portId;
>-    fwdPort->NicIndex = vport->nicIndex;
>-    fwdPort->IsExcluded = 0;
>-    fwdPort->PreserveVLAN = preserveVLAN;
>-    fwdPort->PreservePriority = preservePriority;
>-    ovsFwdCtx->destPortsSizeOut += 1;
> 
>-    return NDIS_STATUS_SUCCESS;
>+/*
>+ * 
>--------------------------------------------------------------------------
>+ * OvsAddPorts --
>+ *     Add the specified destination vport into the forwarding context.
>If the
>+ *     vport is a VIF/external port, it is added directly to the NBL. If
>it is
>+ *     a tunneling port, it is NOT added to the NBL.
>+ *
>+ * Result:
>+ *     NDIS_STATUS_SUCCESS on success
>+ *     Other NDIS_STATUS upon failure.
>+ * 
>--------------------------------------------------------------------------
>+ */
>+static __inline NDIS_STATUS
>+OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
>+            OvsFlowKey *flowKey,
>+            NDIS_SWITCH_PORT_ID dstPortId,
>+            BOOLEAN preserveVLAN,
>+            BOOLEAN preservePriority)
>+{
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    POVS_VPORT_ENTRY vport;
>+    BOOLEAN error = TRUE;
>+    PNDIS_SWITCH_PORT_DESTINATION fwdPort;
>+
>+    do {
>+        /*
>+         * We hold the dispatch lock that protects the list of vports,
>so vports
>+         * validated here can be added as destinations safely before we
>call into
>+         * NDIS.
>+         *
>+         * Some of the vports can be tunnelled ports as well in which
>case
>+         * they should be added to a separate list of tunnelled
>destination ports
>+         * instead of the VIF ports. The context for the tunnel is
>settable
>+         * in OvsForwardingContext.
>+         */
>+        vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext,
>dstPortId);
>+        if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
>+            /*
>+             * There may be some latency between a port disappearing,
>and userspace
>+             * updating the recalculated flows. In the meantime, handle
>invalid
>+             * ports gracefully.
>+             */
>+            ovsActionStats.noVport++;
>+            break;
>+        }
>+
>+        ASSERT(vport->nicState == NdisSwitchNicStateConnected);
>+        vport->stats.txPackets++;
>+        vport->stats.txBytes +=
>+            
>NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
>+
>+        if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
>+            break;
>+        }
>+
>+        status = OvsGrowNblDestinations(ovsFwdCtx);
>+        if (status != NDIS_STATUS_SUCCESS) {
>+            ovsActionStats.cannotGrowDest++;
>+            break;
>+        }
>+
>+        ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
>+        fwdPort =
>+            
>NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
>+            ovsFwdCtx->destPortsSizeOut);
>+
>+        fwdPort->PortId = vport->portId;
>+        fwdPort->NicIndex = vport->nicIndex;
>+        fwdPort->IsExcluded = 0;
>+        fwdPort->PreserveVLAN = preserveVLAN;
>+        fwdPort->PreservePriority = preservePriority;
>+        ovsFwdCtx->destPortsSizeOut += 1;
>+
>+        error = FALSE;
>+    } while (error);
>+
>+    return status;
> }
> 
> 
>@@ -581,10 +606,11 @@ OvsDoFlowLookupOutput(OvsForwardingContext
>*ovsFwdCtx)
>         OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
>         ovsFwdCtx->switchContext->datapath.hits++;
>         status = OvsActionsExecute(ovsFwdCtx->switchContext,
>-                                 ovsFwdCtx->completionList,
>ovsFwdCtx->curNbl,
>-                                 ovsFwdCtx->srcVportNo,
>ovsFwdCtx->sendFlags,
>-                                 &key, &hash, &ovsFwdCtx->layers,
>-                                 flow->actions, flow->actionsLen);
>+                                   ovsFwdCtx->completionList,
>+                                   ovsFwdCtx->curNbl,
>+                                   ovsFwdCtx->srcVportNo,
>ovsFwdCtx->sendFlags,
>+                                   &key, &hash, &ovsFwdCtx->layers,
>+                                   flow->actions, flow->actionsLen);
>         ovsFwdCtx->curNbl = NULL;
>     } else {
>         LIST_ENTRY missedPackets;
>@@ -631,35 +657,26 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
> {
>     NDIS_STATUS status = NDIS_STATUS_FAILURE;
>     PNET_BUFFER_LIST newNbl = NULL;
>+    POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
> 
>-    /*
>-     * Setup the source port to be the internal port to as to facilitate
>the
>-     * second OvsLookupFlow.
>-     */
>-    if (ovsFwdCtx->switchContext->internalVport == NULL ||
>-        ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
>+    if (switchContext->internalVport == NULL ||
>+        switchContext->virtualExternalVport == NULL) {
>         OvsClearTunTxCtx(ovsFwdCtx);
>         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
>             L"OVS-Dropped since either internal or external port is
>absent");
>         return NDIS_STATUS_FAILURE;
>     }
>-    ovsFwdCtx->srcVportNo =
>-        
>((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
>-
>-    ovsFwdCtx->fwdDetail->SourcePortId =
>ovsFwdCtx->switchContext->internalPortId;
>-    ovsFwdCtx->fwdDetail->SourceNicIndex =
>-        
>((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
> 
>     /* Do the encap. Encap function does not consume the NBL. */
>     switch(ovsFwdCtx->tunnelTxNic->ovsType) {
>     case OVS_VPORT_TYPE_VXLAN:
>         status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
>-                               &ovsFwdCtx->tunKey,
>ovsFwdCtx->switchContext,
>+                               &ovsFwdCtx->tunKey, switchContext,
>                                &ovsFwdCtx->layers, &newNbl);
>         break;
>     case OVS_VPORT_TYPE_STT:
>         status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
>-                             &ovsFwdCtx->tunKey,
>ovsFwdCtx->switchContext,
>+                             &ovsFwdCtx->tunKey, switchContext,
>                              &ovsFwdCtx->layers, &newNbl);
>         break;
>     default:
>@@ -670,15 +687,78 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
>     OvsClearTunTxCtx(ovsFwdCtx);
> 
>     if (status == NDIS_STATUS_SUCCESS) {
>+        PNDIS_SWITCH_PORT_DESTINATION fwdPort;
>+        PCWSTR dropReason = L"";
>+
>         ASSERT(newNbl);
>         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
>                                     L"Complete after cloning NBL for
>encapsulation");
>         ovsFwdCtx->curNbl = newNbl;
>-        status = OvsDoFlowLookupOutput(ovsFwdCtx);
>+
>+        OvsInitForwardingCtx(ovsFwdCtx, switchContext,
>+                             ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
>+                             ovsFwdCtx->sendFlags, ovsFwdCtx->fwdDetail,
>+                             ovsFwdCtx->completionList, NULL, TRUE);
>+
>+        status = OvsGrowNblDestinations(ovsFwdCtx);
>+        if (status != NDIS_STATUS_SUCCESS) {
>+            ovsActionStats.cannotGrowDest++;
>+            return status;
>+        }
>+
>+        ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
>+        fwdPort =
>+            
>NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
>+            ovsFwdCtx->destPortsSizeOut);
>+        fwdPort->PortId =
>ovsFwdCtx->switchContext->virtualExternalPortId;
>+        fwdPort->NicIndex = 1;
>+        fwdPort->IsExcluded = 0;
>+        fwdPort->PreserveVLAN = TRUE;
>+        fwdPort->PreservePriority = TRUE;
>+        ovsFwdCtx->destPortsSizeOut += 1;
>+
>+        if (ovsFwdCtx->destPortsSizeOut > 0) {
>+            PNET_BUFFER_LIST newNbl = NULL;
>+            UINT32 portsToUpdate =
>+                ovsFwdCtx->fwdDetail->NumAvailableDestinations -
>+                (ovsFwdCtx->destPortsSizeIn -
>ovsFwdCtx->destPortsSizeOut);
>+
>+            ASSERT(ovsFwdCtx->destinationPorts != NULL);
>+            ASSERT(portsToUpdate > 0);
>+
>+            status =
>switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
>+                switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
>+                portsToUpdate, ovsFwdCtx->destinationPorts);
>+            if (status == NDIS_STATUS_SUCCESS) {
>+                OvsSendNBLIngress(switchContext, ovsFwdCtx->curNbl,
>+                    ovsFwdCtx->sendFlags);
>+                ovsFwdCtx->destPortsSizeOut = 0;
>+                ovsFwdCtx->curNbl = NULL;
>+                if (newNbl) {
>+                    status = OvsInitForwardingCtx(ovsFwdCtx,
>switchContext,
>+                        newNbl, ovsFwdCtx->srcVportNo, 0,
>+                        NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
>+                        ovsFwdCtx->completionList,
>+                        &ovsFwdCtx->layers, FALSE);
>+                    if (status != NDIS_STATUS_SUCCESS) {
>+                        dropReason = L"Dropped due to resources.";
>+                    }
>+                }
>+            } else {
>+                OvsCompleteNBL(switchContext, newNbl, TRUE);
>+                ovsActionStats.cannotGrowDest++;
>+                dropReason = L"Dropped due to failure to update
>destinations.";
>+            }
>+        }
>+
>+        if (status != NDIS_STATUS_SUCCESS) {
>+            OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
>+        }
>+
>         ASSERT(ovsFwdCtx->curNbl == NULL);
>     } else {
>         /*
>-        * XXX: Temporary freeing of the packet until we register a
>+         * XXX: Temporary freeing of the packet until we register a
>          * callback to IP helper.
>          */
>         OvsCompleteNBLForwardingCtx(ovsFwdCtx,
>@@ -1433,7 +1513,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
>         case OVS_ACTION_ATTR_OUTPUT:
>             dstPortID = NlAttrGetU32(a);
>             status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
>-                                              TRUE, TRUE);
>+                                 TRUE, TRUE);
>             if (status != NDIS_STATUS_SUCCESS) {
>                 dropReason = L"OVS-adding destination port failed";
>                 goto dropit;
>@@ -1515,7 +1595,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
>             BOOLEAN isRecv = FALSE;
> 
>             POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
>-                portNo);
>+                                                          portNo);
> 
>             if (vport) {
>                 if (vport->isExternal ||
>diff --git a/datapath-windows/ovsext/Vport.c
>b/datapath-windows/ovsext/Vport.c
>index baab056..61a9379 100644
>--- a/datapath-windows/ovsext/Vport.c
>+++ b/datapath-windows/ovsext/Vport.c
>@@ -1010,7 +1010,6 @@ AssignNicNameSpecial(POVS_VPORT_ENTRY vport)
>     }
> }
> 
>-
> /*
>  * 
>--------------------------------------------------------------------------
>  * Functionality common to any port on the Hyper-V switch. This function
>is not
>diff --git a/datapath-windows/ovsext/Vport.h
>b/datapath-windows/ovsext/Vport.h
>index ead55a9..89099f2 100644
>--- a/datapath-windows/ovsext/Vport.h
>+++ b/datapath-windows/ovsext/Vport.h
>@@ -73,7 +73,7 @@ typedef struct _OVS_VPORT_FULL_STATS {
>     OVS_VPORT_ERR_STATS;
> }OVS_VPORT_FULL_STATS;
> /*
>- * Each internal, external adapter or vritual adapter has
>+ * Each internal, external adapter or virtual adapter has
>  * one vport entry. In addition, we have one vport for each
>  * tunnel type, such as vxlan, gre
>  */
>-- 
>1.9.0.msysgit.0
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma
>n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pN
>HQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=lPR4aO2DeQ7LHo8aCnpeHaZsgHDHdG
>6GfLKqPjGuULQ&s=4gdQ1BpakSNM0qCIs4cyMxIiVrlUS8G0S2LhhLzOdtc&e= 




More information about the dev mailing list