[ovs-dev] [PATCH 3/3 v2] datapath-windows: NAT integration with conntrack

Anand Kumar kumaranand at vmware.com
Thu Mar 23 19:01:08 UTC 2017


Hi Yin,

Thank you for the patches. Overall the patch looks good except for a few minor issues. 
Please find my comments inline prefixed with [AK]

Regards,
Anand Kumar

On 3/22/17, 3:12 PM, "ovs-dev-bounces at openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces at openvswitch.org on behalf of linyi at vmware.com> wrote:

    Signed-off-by: Yin Lin <linyi at vmware.com>
    ---
     datapath-windows/ovsext/Actions.c       | 179 ++++++++++++++++++++-----------
     datapath-windows/ovsext/Actions.h       |  77 +++++++++++++
     datapath-windows/ovsext/Conntrack-nat.c |   3 +-
     datapath-windows/ovsext/Conntrack.c     | 184 ++++++++++++++++++++------------
     datapath-windows/ovsext/Conntrack.h     |  25 +++--
     datapath-windows/ovsext/ovsext.vcxproj  |   2 +
     6 files changed, 331 insertions(+), 139 deletions(-)
    
    diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
    index 46f84bc..45b3841 100644
    --- a/datapath-windows/ovsext/Actions.c
    +++ b/datapath-windows/ovsext/Actions.c
    @@ -71,63 +71,6 @@ typedef struct _OVS_ACTION_STATS {
     OVS_ACTION_STATS ovsActionStats;
     
     /*
    - * There a lot of data that needs to be maintained while executing the pipeline
    - * as dictated by the actions of a flow, across different functions at different
    - * levels. Such data is put together in a 'context' structure. Care should be
    - * exercised while adding new members to the structure - only add ones that get
    - * used across multiple stages in the pipeline/get used in multiple functions.
    - */
    -typedef struct OvsForwardingContext {
    -    POVS_SWITCH_CONTEXT switchContext;
    -    /* The NBL currently used in the pipeline. */
    -    PNET_BUFFER_LIST curNbl;
    -    /* NDIS forwarding detail for 'curNbl'. */
    -    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
    -    /* Array of destination ports for 'curNbl'. */
    -    PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
    -    /* send flags while sending 'curNbl' into NDIS. */
    -    ULONG sendFlags;
    -    /* Total number of output ports, used + unused, in 'curNbl'. */
    -    UINT32 destPortsSizeIn;
    -    /* Total number of used output ports in 'curNbl'. */
    -    UINT32 destPortsSizeOut;
    -    /*
    -     * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
    -     * be freed/completed.
    -     */
    -    OvsCompletionList *completionList;
    -    /*
    -     * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
    -     * bridge. ie. during tunneling on the Rx side.
    -     */
    -    UINT32 srcVportNo;
    -
    -    /*
    -     * Tunnel key:
    -     * - specified in actions during tunneling Tx
    -     * - extracted from an NBL during tunneling Rx
    -     */
    -    OvsIPv4TunnelKey tunKey;
    -
    -    /*
    -     * Tunneling - Tx:
    -     * To store the output port, when it is a tunneled port. We don't foresee
    -     * multiple tunneled ports as outport for any given NBL.
    -     */
    -    POVS_VPORT_ENTRY tunnelTxNic;
    -
    -    /*
    -     * Tunneling - Rx:
    -     * Points to the Internal port on the PIF Bridge, if the packet needs to be
    -     * de-tunneled.
    -     */
    -    POVS_VPORT_ENTRY tunnelRxNic;
    -
    -    /* header information */
    -    OVS_PACKET_HDR_INFO layers;
    -} OvsForwardingContext;
    -
    -/*
      * --------------------------------------------------------------------------
      * OvsInitForwardingCtx --
      *     Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
    @@ -1388,7 +1331,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
      *      based on the specified key.
      *----------------------------------------------------------------------------
      */
    -static __inline NDIS_STATUS
    +NDIS_STATUS
     OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
                       const struct ovs_key_udp *udpAttr)
     {
    @@ -1435,7 +1378,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
      *      based on the specified key.
      *----------------------------------------------------------------------------
      */
    -static __inline NDIS_STATUS
    +NDIS_STATUS
     OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
                       const struct ovs_key_tcp *tcpAttr)
     {
    @@ -1474,11 +1417,125 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
     /*
      *----------------------------------------------------------------------------
[AK] – change function name to  OvsUpdateAddressAndPort in the comments
      * OvsUpdateIPv4Header --
    + *      Updates the source/destination IP field of IPv4 header in
    + *      ovsFwdCtx.curNbl inline based on the specified key.
    + *----------------------------------------------------------------------------
    + */
    +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
    +                        UINT32 newAddr, UINT16 newPort,
    +                        BOOLEAN isSource, BOOLEAN isTx)
    +{
    +    PUINT8 bufferStart;
    +    UINT32 hdrSize;
    +    OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
    +    IPHdr *ipHdr;
    +    TCPHdr *tcpHdr = NULL;
    +    UDPHdr *udpHdr = NULL;
    +    UINT32 *addrField = NULL;
    +    UINT16 *portField = NULL;
    +    UINT16 *checkField = NULL;
    +    BOOLEAN l4Offload = FALSE;
    +    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
    +
    +    ASSERT(layers->value != 0);
    +
    +    if (layers->isTcp || layers->isUdp) {
    +        hdrSize = layers->l4Offset +
    +                  layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
    +    } else {
    +        hdrSize = layers->l3Offset + sizeof (*ipHdr);
    +    }
    +
    +    bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize);
    +    if (!bufferStart) {
    +        return NDIS_STATUS_RESOURCES;
    +    }
    +
    +    ipHdr = (IPHdr *)(bufferStart + layers->l3Offset);
    +
    +    if (layers->isTcp) {
    +        tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset);
    +    } else if (layers->isUdp) {
    +        udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset);
    +    }
    +
    +    csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
    +                                          TcpIpChecksumNetBufferListInfo);
    +    /*
    +     * Adjust the IP header inline as dictated by the action, and also update
    +     * the IP and the TCP checksum for the data modified.
    +     *
    +     * In the future, this could be optimized to make one call to
    +     * ChecksumUpdate32(). Ignoring this for now, since for the most common
    +     * case, we only update the TTL.
    +     */
    +
    +    if (isSource)
[AK] – Indentation opening flower bracket
    +    {
    +        addrField = &ipHdr->saddr;
    +        if (tcpHdr) {
    +            portField = &tcpHdr->source;
    +            checkField = &tcpHdr->check;
    +            if (isTx) {
    +                l4Offload = (csumInfo.Transmit.TcpChecksum != 0);
    +            }
    +        }
    +        else if (udpHdr) {
    +            portField = &udpHdr->source;
    +            checkField = &udpHdr->check;
    +            if (isTx) {
    +                l4Offload = (csumInfo.Transmit.UdpChecksum != 0);
    +            }
    +        }
    +    } else {
    +        addrField = &ipHdr->daddr;
    +        if (tcpHdr) {
    +            portField = &tcpHdr->dest;
    +            checkField = &tcpHdr->check;
    +        } else if (udpHdr) {
    +            portField = &udpHdr->dest;
    +            checkField = &udpHdr->check;
    +        }
    +    }
    +    if (*addrField != newAddr) {
    +        UINT32 oldAddr = *addrField;
    +        if (checkField && *checkField != 0) {
    +            if (l4Offload) {
    +                /* Recompute IP pseudo checksum */
    +                *checkField = ~(*checkField);
    +                *checkField = ChecksumUpdate32(*checkField, oldAddr,
    +                                               newAddr);
    +                *checkField = ~(*checkField);
    +            } else {
    +                *checkField = ChecksumUpdate32(*checkField, oldAddr,
    +                                               newAddr);
    +            }
    +        }
    +        if (ipHdr->check != 0) {
    +            ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
    +                                            newAddr);
    +        }
    +        *addrField = newAddr;
    +    }
    +
    +    if (portField && *portField != newPort) {
    +        if (checkField && !l4Offload) {
    +            *checkField = ChecksumUpdate16(*checkField, *portField,
    +                                           newPort);
    +        }
    +        *portField = newPort;
    +    }
    +    return NDIS_STATUS_SUCCESS;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsUpdateIPv4Header --
      *      Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
      *      specified key.
      *----------------------------------------------------------------------------
      */
    -static __inline NDIS_STATUS
    +NDIS_STATUS
     OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
                         const struct ovs_key_ipv4 *ipAttr)
     {
    @@ -2032,7 +2089,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
                     }
                 }
     
    -            status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
    +            status = OvsExecuteConntrackAction(&ovsFwdCtx, layers,
                                                    key, (const PNL_ATTR)a);
                 if (status != NDIS_STATUS_SUCCESS) {
                     OVS_LOG_ERROR("CT Action failed");
    diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
    index c56c260..85d9dcb 100644
    --- a/datapath-windows/ovsext/Actions.h
    +++ b/datapath-windows/ovsext/Actions.h
    @@ -52,4 +52,81 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
                 UINT32 srcPortNo,
                 OVS_PACKET_HDR_INFO *layers);
     
    +/*
    + * There a lot of data that needs to be maintained while executing the pipeline
    + * as dictated by the actions of a flow, across different functions at different
    + * levels. Such data is put together in a 'context' structure. Care should be
    + * exercised while adding new members to the structure - only add ones that get
    + * used across multiple stages in the pipeline/get used in multiple functions.
    + */
    +typedef struct OvsForwardingContext {
    +    POVS_SWITCH_CONTEXT switchContext;
    +    /* The NBL currently used in the pipeline. */
    +    PNET_BUFFER_LIST curNbl;
    +    /* NDIS forwarding detail for 'curNbl'. */
    +    PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
    +    /* Array of destination ports for 'curNbl'. */
    +    PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
    +    /* send flags while sending 'curNbl' into NDIS. */
    +    ULONG sendFlags;
    +    /* Total number of output ports, used + unused, in 'curNbl'. */
    +    UINT32 destPortsSizeIn;
    +    /* Total number of used output ports in 'curNbl'. */
    +    UINT32 destPortsSizeOut;
    +    /*
    +     * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
    +     * be freed/completed.
    +     */
    +    OvsCompletionList *completionList;
    +    /*
    +     * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
    +     * bridge. ie. during tunneling on the Rx side.
    +     */
    +    UINT32 srcVportNo;
    +
    +    /*
    +     * Tunnel key:
    +     * - specified in actions during tunneling Tx
    +     * - extracted from an NBL during tunneling Rx
    +     */
    +    OvsIPv4TunnelKey tunKey;
    +
    +    /*
    +     * Tunneling - Tx:
    +     * To store the output port, when it is a tunneled port. We don't foresee
    +     * multiple tunneled ports as outport for any given NBL.
    +     */
    +    POVS_VPORT_ENTRY tunnelTxNic;
    +
    +    /*
    +     * Tunneling - Rx:
    +     * Points to the Internal port on the PIF Bridge, if the packet needs to be
    +     * de-tunneled.
    +     */
    +    POVS_VPORT_ENTRY tunnelRxNic;
    +
    +    /* header information */
    +    OVS_PACKET_HDR_INFO layers;
    +} OvsForwardingContext;
    +
    +PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
    +                          UINT32 size);
    +
    +NDIS_STATUS
    +OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
    +                  const struct ovs_key_udp *udpAttr);
    +
    +NDIS_STATUS
    +OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
    +                  const struct ovs_key_tcp *tcpAttr);
    +
    +NDIS_STATUS
    +OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
    +                    const struct ovs_key_ipv4 *ipAttr);
    +
    +NDIS_STATUS
    +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
    +                        UINT32 newAddr, UINT16 newPort,
    +                        BOOLEAN isSource, BOOLEAN isTx);
    +
     #endif /* __ACTIONS_H_ */
    diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
    index 4930694..17b312c 100644
    --- a/datapath-windows/ovsext/Conntrack-nat.c
    +++ b/datapath-windows/ovsext/Conntrack-nat.c
    @@ -197,7 +197,8 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
         if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
             OvsUpdateAddressAndPort(ovsFwdCtx,
                                     endpoint->addr.ipv4_aligned,
    -                                endpoint->port, isSrcNat);
    +                                endpoint->port, isSrcNat,
    +                                !reverse);
             if (isSrcNat) {
                 key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
             } else {
    diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
    index 924f260..167e13e 100644
    --- a/datapath-windows/ovsext/Conntrack.c
    +++ b/datapath-windows/ovsext/Conntrack.c
    @@ -18,6 +18,7 @@
     #include "Jhash.h"
     #include "PacketParser.h"
     #include "Event.h"
    +#include "Conntrack-nat.h"
     
     #pragma warning(push)
     #pragma warning(disable:4311)
    @@ -26,7 +27,7 @@
     #define SEC_TO_UNIX_EPOCH 11644473600LL
     #define SEC_TO_NANOSEC 1000000000LL
     
    -KSTART_ROUTINE ovsConntrackEntryCleaner;
    +KSTART_ROUTINE OvsConntrackEntryCleaner;
     static PLIST_ENTRY ovsConntrackTable;
     static OVS_CT_THREAD_CTX ctThreadCtx;
     static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
    @@ -71,7 +72,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
         /* Init CT Cleaner Thread */
         KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
         status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
    -                                  NULL, ovsConntrackEntryCleaner,
    +                                  NULL, OvsConntrackEntryCleaner,
                                       &ctThreadCtx);
     
         if (status != STATUS_SUCCESS) {
    @@ -88,6 +89,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
                                   &ctThreadCtx.threadObject, NULL);
         ZwClose(threadHandle);
         threadHandle = NULL;
    +
    +    status = OvsNatInit(context);
    +
    +    if (status != STATUS_SUCCESS) {
    +        OvsCleanupConntrack();
    +        return status;
    +    }
         return STATUS_SUCCESS;
     }
     
    @@ -120,6 +128,7 @@ OvsCleanupConntrack(VOID)
     
         NdisFreeRWLock(ovsConntrackLockObj);
         ovsConntrackLockObj = NULL;
    +    OvsNatCleanup();
     }
     
     static __inline VOID
    @@ -159,25 +168,42 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
         OvsPostCtEvent(&ctEventEntry);
     }
     
    -static __inline VOID
    -OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
    +static __inline BOOLEAN
    +OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
    +              PNAT_ACTION_INFO natInfo, UINT64 now)
     {
    -    NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
    -    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
    +    NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
    +    NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
         OvsCtKeyReverse(&entry->rev_key);
    +
    +    // NatInfo is always initialized to be disabled, so that if NAT action
    +    // fails, we will not end up deleting an non-existent NAT entry.
    +    if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) {
    +        entry->natInfo = *natInfo;
    +        if (!OvsNatCtEntry(entry)) {
    +            return FALSE;
    +        }
[AK] – Doesn’t this override the values updated by OvsNatCtEntry(entry)?
    +        entry->natInfo = *natInfo;
    +        ctx->hash = OvsHashCtKey(&entry->key);
    +    } else {
    +        entry->natInfo.natAction = natInfo->natAction;
    +    }
    +
         entry->timestampStart = now;
         InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
                        &entry->link);
     
         ctTotalEntries++;
    +    return TRUE;
     }
     
     static __inline POVS_CT_ENTRY
    -OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
    +OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
                      UINT8 ipProto,
                      UINT32 l4Offset,
                      OvsConntrackKeyLookupCtx *ctx,
                      OvsFlowKey *key,
    +                 PNAT_ACTION_INFO natInfo,
                      BOOLEAN commit,
                      UINT64 currentTime,
                      BOOLEAN *entryCreated)
    @@ -185,6 +211,7 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
         POVS_CT_ENTRY entry = NULL;
         *entryCreated = FALSE;
         UINT32 state = 0;
    +    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
         switch (ipProto)
         {
             case IPPROTO_TCP:
    @@ -212,12 +239,9 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
                     if (parentEntry != NULL) {
                         entry->parent = parentEntry;
                     }
    -                OvsCtAddEntry(entry, ctx, currentTime);
                     *entryCreated = TRUE;
                 }
    -
    -            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
    -            return entry;
    +            break;
             }
             case IPPROTO_ICMP:
             {
    @@ -231,35 +255,31 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
                 state |= OVS_CS_F_NEW;
                 if (commit) {
                     entry = OvsConntrackCreateIcmpEntry(currentTime);
    -                if (!entry) {
    -                    return NULL;
    -                }
    -                OvsCtAddEntry(entry, ctx, currentTime);
                     *entryCreated = TRUE;
                 }
    -
    -            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
    -            return entry;
    +            break;
             }
             case IPPROTO_UDP:
             {
                 state |= OVS_CS_F_NEW;
                 if (commit) {
                     entry = OvsConntrackCreateOtherEntry(currentTime);
    -                if (!entry) {
    -                    return NULL;
    -                }
    -                OvsCtAddEntry(entry, ctx, currentTime);
                     *entryCreated = TRUE;
                 }
    -
    -            OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
    -            return entry;
    +            break;
             }
             default:
                 goto invalid;
         }
     
    +    if (commit && !entry) {
    +        return NULL;
    +    }
    +    if (entry && !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
    +        return NULL;
    +    }
    +    OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
    +    return entry;
     invalid:
         state |= OVS_CS_F_INVALID;
         OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
    @@ -268,11 +288,11 @@ invalid:
     
     static enum CT_UPDATE_RES
     OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
    -                        PNET_BUFFER_LIST nbl,
    -                        UINT8 ipProto,
    -                        UINT32 l4Offset,
    -                        BOOLEAN reply,
    -                        UINT64 now)
    +                 PNET_BUFFER_LIST nbl,
    +                 UINT8 ipProto,
    +                 UINT32 l4Offset,
    +                 BOOLEAN reply,
    +                 UINT64 now)
     {
         switch (ipProto)
         {
    @@ -298,6 +318,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
     static __inline VOID
     OvsCtEntryDelete(POVS_CT_ENTRY entry)
     {
    +    if (entry == NULL) {
    +        return;
    +    }
    +    if (entry->natInfo.natAction) {
    +        OvsNatDeleteKey(&entry->key);
    +    }
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
         RemoveEntryList(&entry->link);
         OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
    @@ -307,10 +333,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry)
     static __inline BOOLEAN
     OvsCtEntryExpired(POVS_CT_ENTRY entry)
     {
    -    if (entry == NULL) {
    -        return TRUE;
    -    }
    -
         UINT64 currentTime;
         NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
         return entry->expiration < currentTime;
    @@ -338,7 +360,7 @@ OvsDetectCtPacket(OvsFlowKey *key)
         return NDIS_STATUS_NOT_SUPPORTED;
     }
     
    -static __inline BOOLEAN
    +BOOLEAN
     OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
     {
         return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
    @@ -364,13 +386,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
         }
     }
     
    -static __inline POVS_CT_ENTRY
    +POVS_CT_ENTRY
     OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     {
         PLIST_ENTRY link;
         POVS_CT_ENTRY entry;
         BOOLEAN reply = FALSE;
         POVS_CT_ENTRY found = NULL;
    +    OVS_CT_KEY key = ctx->key;
     
         if (!ctTotalEntries) {
             return found;
    @@ -379,13 +402,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
         LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
             entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
     
    -        if (OvsCtKeyAreSame(ctx->key,entry->key)) {
    +        if (OvsCtKeyAreSame(key,entry->key)) {
                 found = entry;
                 reply = FALSE;
                 break;
             }
     
    -        if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
    +        /* Reverse NAT must be performed before OvsCtLookup, so here
    +         * we simply need to flip the src and dst in key and compare
    +         * they are equal. Note that flipped key is not equal to
    +         * rev_key due to NAT effect. */
    +        OvsCtKeyReverse(&key);
    +        if (OvsCtKeyAreSame(key, entry->key)) {
                 found = entry;
                 reply = TRUE;
                 break;
    @@ -404,17 +432,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
         return found;
     }
     
    -static __inline UINT32
    -OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx)
    +UINT32
    +OvsHashCtKey(const OVS_CT_KEY *key)
     {
    -    UINT32 hsrc, hdst,hash;
    -    hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
    -    hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
    +    UINT32 hsrc, hdst, hash;
    +    hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
    +    hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
         hash = hsrc ^ hdst; /* TO identify reverse traffic */
    -    return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
    -                         ((uint32_t *) (&ctx->key + 1) -
    -                         (uint32_t *) (&ctx->key.dst + 1)),
    +    hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
    +                         ((uint32_t *) (key + 1) -
    +                         (uint32_t *) (&key->dst + 1)),
                              hash);
    +    return hash;
     }
     
     static UINT8
    @@ -445,6 +474,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
                         PNET_BUFFER_LIST curNbl,
                         UINT32 l4Offset)
     {
    +    const OVS_NAT_ENTRY *natEntry;
         ctx->key.zone = zone;
         ctx->key.dl_type = flowKey->l2.dlType;
         ctx->related = FALSE;
    @@ -506,7 +536,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
             return NDIS_STATUS_INVALID_PACKET;
         }
     
    -    ctx->hash = OvsExtractLookupCtxHash(ctx);
    +    natEntry = OvsNatLookup(&ctx->key, TRUE);
    +    if (natEntry) {
    +        // Translate address first for reverse NAT
    +        ctx->key = natEntry->ctEntry->key;
    +        OvsCtKeyReverse(&ctx->key);
    +    }
    +
    +    ctx->hash = OvsHashCtKey(&ctx->key);
         return NDIS_STATUS_SUCCESS;
     }
     
    @@ -524,17 +561,19 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
      *----------------------------------------------------------------------------
      */
     static __inline POVS_CT_ENTRY
    -OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
    +OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
                              UINT32 l4Offset,
                              OvsConntrackKeyLookupCtx *ctx,
                              OvsFlowKey *key,
                              UINT16 zone,
    +                         NAT_ACTION_INFO *natInfo,
                              BOOLEAN commit,
                              UINT64 currentTime,
                              BOOLEAN *entryCreated)
     {
         POVS_CT_ENTRY entry = ctx->entry;
         UINT32 state = 0;
    +    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
         *entryCreated = FALSE;
     
         /* If an entry was found, update the state based on TCP flags */
    @@ -561,8 +600,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
                 //Delete and update the Conntrack
                 OvsCtEntryDelete(ctx->entry);
                 ctx->entry = NULL;
    -            entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
    -                                     ctx, key, commit, currentTime,
    +            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
    +                                     ctx, key, natInfo, commit, currentTime,
                                          entryCreated);
                 if (!entry) {
                     return NULL;
    @@ -629,7 +668,7 @@ OvsConntrackSetLabels(OvsFlowKey *key,
     }
     
     static __inline NDIS_STATUS
    -OvsCtExecute_(PNET_BUFFER_LIST curNbl,
    +OvsCtExecute_(OvsForwardingContext *fwdCtx,
                   OvsFlowKey *key,
                   OVS_PACKET_HDR_INFO *layers,
                   BOOLEAN commit,
    @@ -641,13 +680,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
     {
         NDIS_STATUS status = NDIS_STATUS_SUCCESS;
         POVS_CT_ENTRY entry = NULL;
    +    PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
         OvsConntrackKeyLookupCtx ctx = { 0 };
         LOCK_STATE_EX lockState;
         UINT64 currentTime;
         NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
     
    -    /* XXX: Not referenced for now */
    -    UNREFERENCED_PARAMETER(natInfo);
     
         /* Retrieve the Conntrack Key related fields from packet */
         OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
    @@ -659,18 +697,29 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
         BOOLEAN entryCreated = FALSE;
         if (!entry) {
             /* If no matching entry was found, create one and add New state */
    -        entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
    +        entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
                                      layers->l4Offset, &ctx,
    -                                 key, commit, currentTime,
    +                                 key, natInfo, commit, currentTime,
                                      &entryCreated);
         } else {
             /* Process the entry and update CT flags */
             OvsCtIncrementCounters(entry, ctx.reply, curNbl);
    -        entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
    -                                         zone, commit, currentTime,
    +        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
    +                                         zone, natInfo, commit, currentTime,
                                              &entryCreated);
         }
     
    +    /* Note that natInfo is not the same as entry->natInfo here. natInfo
    +       is decided by action in the openflow rule, entry->natInfo is decided
    +       when the entry is created. In the reverse NAT case, natInfo is
    +       NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
    +       NAT_ACTION_DST without NAT_ACTION_REVERSE */
    +    if (entry && natInfo->natAction != NAT_ACTION_NONE)
    +    {
    +        OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
    +                     key, ctx.reply);
    +    }
    +
         if (entry && mark) {
             OvsConntrackSetMark(key, entry, mark->value, mark->mask);
         }
    @@ -706,7 +755,7 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
      *---------------------------------------------------------------------------
      */
     NDIS_STATUS
    -OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
    +OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
                               OVS_PACKET_HDR_INFO *layers,
                               OvsFlowKey *key,
                               const PNL_ATTR a)
    @@ -753,12 +802,12 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
             BOOLEAN hasMaxIp = FALSE;
             BOOLEAN hasMaxPort = FALSE;
             NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
    -            enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr);
    -            switch(sub_type_nest) {
    +            enum ovs_nat_attr subtype = NlAttrType(natAttr);
    +            switch(subtype) {
                 case OVS_NAT_ATTR_SRC:
                 case OVS_NAT_ATTR_DST:
                     natActionInfo.natAction |=
    -                    ((sub_type_nest == OVS_NAT_ATTR_SRC)
    +                    ((subtype == OVS_NAT_ATTR_SRC)
                             ? NAT_ACTION_SRC : NAT_ACTION_DST);
                     break;
                 case OVS_NAT_ATTR_IP_MIN:
    @@ -816,19 +865,19 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
             }
         }
     
    -    status = OvsCtExecute_(curNbl, key, layers,
    +    status = OvsCtExecute_(fwdCtx, key, layers,
                                commit, zone, mark, labels, helper, &natActionInfo);
         return status;
     }
     
     /*
      *----------------------------------------------------------------------------
    - * ovsConntrackEntryCleaner
    + * OvsConntrackEntryCleaner
      *     Runs periodically and cleans up the connection tracker
      *----------------------------------------------------------------------------
      */
     VOID
    -ovsConntrackEntryCleaner(PVOID data)
    +OvsConntrackEntryCleaner(PVOID data)
     {
     
         POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
    @@ -845,15 +894,13 @@ ovsConntrackEntryCleaner(PVOID data)
             }
     
             /* Set the timeout for the thread and cleanup */
    -        UINT64 currentTime, threadSleepTimeout;
    -        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
    -        threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
    +        INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
     
             if (ctTotalEntries) {
                 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
                     LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
                         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
    -                    if (entry->expiration < currentTime) {
    +                    if (entry && OvsCtEntryExpired(entry)) {
                             OvsCtEntryDelete(entry);
                         }
                     }
    @@ -894,6 +941,7 @@ OvsCtFlush(UINT16 zone)
         }
     
         NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
    +    OvsNatFlush(zone);
         return NDIS_STATUS_SUCCESS;
     }
     
    diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
    index 875c434..584c8ff 100644
    --- a/datapath-windows/ovsext/Conntrack.h
    +++ b/datapath-windows/ovsext/Conntrack.h
    @@ -20,6 +20,7 @@
     #include "precomp.h"
     #include "Flow.h"
     #include "Debug.h"
    +#include "Actions.h"
     #include <stddef.h>
     
     #ifdef OVS_DBG_MOD
    @@ -86,6 +87,14 @@ typedef struct _OVS_CT_KEY {
         UINT64 byteCount;
     } OVS_CT_KEY, *POVS_CT_KEY;
     
    +typedef struct _NAT_ACTION_INFO {
    +    struct ct_addr minAddr;
    +    struct ct_addr maxAddr;
    +    uint16_t minPort;
    +    uint16_t maxPort;
    +    uint16_t natAction;
    +} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
    +
     typedef struct OVS_CT_ENTRY {
         OVS_CT_KEY  key;
         OVS_CT_KEY  rev_key;
    @@ -94,6 +103,7 @@ typedef struct OVS_CT_ENTRY {
         UINT32      mark;
         UINT64      timestampStart;
         struct ovs_key_ct_labels labels;
    +    NAT_ACTION_INFO natInfo;
         PVOID       parent; /* Points to main connection */
     } OVS_CT_ENTRY, *POVS_CT_ENTRY;
     
    @@ -118,14 +128,6 @@ typedef struct OvsConntrackKeyLookupCtx {
         BOOLEAN         related;
     } OvsConntrackKeyLookupCtx;
     
    -typedef struct _NAT_ACTION_INFO {
    -    struct ct_addr minAddr;
    -    struct ct_addr maxAddr;
    -    uint16_t minPort;
    -    uint16_t maxPort;
    -    uint16_t natAction;
    -} NAT_ACTION_INFO, *PNAT_ACTION_INFO;
    -
     #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
     #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
     #define CT_INTERVAL_SEC 10000000LL //1s
    @@ -172,7 +174,7 @@ OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
     VOID OvsCleanupConntrack(VOID);
     NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
     
    -NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
    +NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
                                           OVS_PACKET_HDR_INFO *layers,
                                           OvsFlowKey *key,
                                           const PNL_ATTR a);
    @@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
                                POVS_CT_ENTRY entry,
                                BOOLEAN request);
     
    +UINT32 OvsHashCtKey(const OVS_CT_KEY *key);
    +BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey);
    +POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx);
    +
    +
     #endif /* __OVS_CONNTRACK_H_ */
    diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
    index 44aea19..75754fb 100644
    --- a/datapath-windows/ovsext/ovsext.vcxproj
    +++ b/datapath-windows/ovsext/ovsext.vcxproj
    @@ -103,6 +103,7 @@
         <ClInclude Include="Actions.h" />
         <ClInclude Include="Atomic.h" />
         <ClInclude Include="BufferMgmt.h" />
    +    <ClInclude Include="Conntrack-nat.h" />
         <ClInclude Include="Conntrack.h" />
         <ClInclude Include="Datapath.h" />
         <ClInclude Include="Debug.h" />
    @@ -256,6 +257,7 @@
       <ItemGroup>
         <ClCompile Include="Actions.c" />
         <ClCompile Include="BufferMgmt.c" />
    +    <ClCompile Include="Conntrack-nat.c" />
         <ClCompile Include="Conntrack-related.c" />
         <ClCompile Include="Conntrack-ftp.c" />
         <ClCompile Include="Conntrack-icmp.c" />
    -- 
    2.10.2.windows.1
    
    















More information about the dev mailing list