[ovs-dev] [PATCH v4 2/4] datapath-windows: Remove NAT locks in conntrack.

Shashank Ram rams at vmware.com
Mon Jun 18 18:54:24 UTC 2018



On 06/17/2018 10:37 PM, Anand Kumar wrote:
> This patch primarily gets rid of NdisRWLock in conntrack for NAT
> functionality along with some conntrack optimization. The subsequent
> patch will have a lock implementation inside NAT module.
>
> - Introduce a new function OvsGetTcpHeader() to retrieve TCP header
>    and payload length, to optimize for TCP traffic.
> - Optimize conntrack look up.
> - Remove 'bucketlockRef' member from conntrack entry structure.
>
> Signed-off-by: Anand Kumar <kumaranand at vmware.com>
> ---
>   datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
>   datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
>   datapath-windows/ovsext/Conntrack.c     | 110 +++++++++++++-------------------
>   datapath-windows/ovsext/Conntrack.h     |  36 +++++++----
>   4 files changed, 74 insertions(+), 91 deletions(-)
>
> diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c
> index 6830dfa..ce09a65 100644
> --- a/datapath-windows/ovsext/Conntrack-ftp.c
> +++ b/datapath-windows/ovsext/Conntrack-ftp.c
> @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
>       char temp[256] = { 0 };
>       char ftpMsg[256] = { 0 };
>   
> +    UINT32 len;
>       TCPHdr tcpStorage;
>       const TCPHdr *tcp;
> -    tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
> +    tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &len);
>       if (!tcp) {
>           return NDIS_STATUS_INVALID_PACKET;
>       }
>   
> -    UINT32 len = OvsGetTcpPayloadLength(curNbl);
>       if (len > sizeof(temp)) {
>           /* We only care up to 256 */
>           len = sizeof(temp);
> diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
> index 8cbab24..eda42ac 100644
> --- a/datapath-windows/ovsext/Conntrack-tcp.c
> +++ b/datapath-windows/ovsext/Conntrack-tcp.c
> @@ -194,9 +194,9 @@ OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
>   enum CT_UPDATE_RES
>   OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>                              const TCPHdr *tcp,
> -                           PNET_BUFFER_LIST nbl,
>                              BOOLEAN reply,
> -                           UINT64 now)
> +                           UINT64 now,
> +                           UINT32 tcpPayloadLen)
>   {
>       struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
>       /* The peer that sent 'pkt' */
> @@ -207,7 +207,6 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>       UINT16 tcp_flags = ntohs(tcp->flags);
>       uint16_t win = ntohs(tcp->window);
>       uint32_t ack, end, seq, orig_seq;
> -    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
>       int ackskew;
>   
>       if (OvsCtInvalidTcpFlags(tcp_flags)) {
> @@ -248,7 +247,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>   
>           ack = ntohl(tcp->ack_seq);
>   
> -        end = seq + p_len;
> +        end = seq + tcpPayloadLen;
>           if (tcp_flags & TCP_SYN) {
>               end++;
>               if (dst->wscale & CT_WSCALE_FLAG) {
> @@ -287,7 +286,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>   
>       } else {
>           ack = ntohl(tcp->ack_seq);
> -        end = seq + p_len;
> +        end = seq + tcpPayloadLen;
>           if (tcp_flags & TCP_SYN) {
>               end++;
>           }
> @@ -469,8 +468,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
>   
>   OVS_CT_ENTRY *
>   OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
> -                           PNET_BUFFER_LIST nbl,
> -                           UINT64 now)
> +                           UINT64 now,
> +                           UINT32 tcpPayloadLen)
>   {
>       struct conn_tcp* newconn;
>       struct tcp_peer *src, *dst;
> @@ -486,7 +485,7 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
>       dst = &newconn->peer[1];
>   
>       src->seqlo = ntohl(tcp->seq);
> -    src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
> +    src->seqhi = src->seqlo + tcpPayloadLen + 1;
>   
>       if (tcp->flags & TCP_SYN) {
>           src->seqhi++;
> diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
> index 7b54fba..2a85e57 100644
> --- a/datapath-windows/ovsext/Conntrack.c
> +++ b/datapath-windows/ovsext/Conntrack.c
> @@ -32,7 +32,6 @@ KSTART_ROUTINE OvsConntrackEntryCleaner;
>   static PLIST_ENTRY ovsConntrackTable;
>   static OVS_CT_THREAD_CTX ctThreadCtx;
>   static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
> -static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
>   extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
>   static ULONG ctTotalEntries;
>   
> @@ -54,19 +53,11 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>       ctTotalEntries = 0;
>       UINT32 numBucketLocks = CT_HASH_TABLE_SIZE;
>   
> -    /* Init the sync-lock */
> -    ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
> -    if (ovsCtNatLockObj == NULL) {
> -        return STATUS_INSUFFICIENT_RESOURCES;
> -    }
> -
>       /* Init the Hash Buffer */
>       ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
>                                                    * CT_HASH_TABLE_SIZE,
>                                                    OVS_CT_POOL_TAG);
>       if (ovsConntrackTable == NULL) {
> -        NdisFreeRWLock(ovsCtNatLockObj);
> -        ovsCtNatLockObj = NULL;
>           return STATUS_INSUFFICIENT_RESOURCES;
>       }
>   
> @@ -121,8 +112,6 @@ freeBucketLock:
>   freeTable:
>       OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
>       ovsConntrackTable = NULL;
> -    NdisFreeRWLock(ovsCtNatLockObj);
> -    ovsCtNatLockObj = NULL;
>       return status;
>   }
>   
> @@ -135,7 +124,6 @@ freeTable:
>   VOID
>   OvsCleanupConntrack(VOID)
>   {
> -    LOCK_STATE_EX lockStateNat;
>       ctThreadCtx.exit = 1;
>       KeSetEvent(&ctThreadCtx.event, 0, FALSE);
>       KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
> @@ -160,12 +148,7 @@ OvsCleanupConntrack(VOID)
>       }
>       OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
>       ovsCtBucketLock = NULL;
> -
> -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>       OvsNatCleanup();
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
> -    NdisFreeRWLock(ovsCtNatLockObj);
> -    ovsCtNatLockObj = NULL;
>   }
>   
>   static __inline VOID
> @@ -244,25 +227,20 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
>       if (natInfo == NULL) {
>           entry->natInfo.natAction = NAT_ACTION_NONE;
>       } else {
> -        LOCK_STATE_EX lockStateNat;
> -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>           if (OvsIsForwardNat(natInfo->natAction)) {
>               entry->natInfo = *natInfo;
>               if (!OvsNatTranslateCtEntry(entry)) {
> -                NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>                   return FALSE;
>               }
>               ctx->hash = OvsHashCtKey(&entry->key);
>           } else {
>               entry->natInfo.natAction = natInfo->natAction;
>           }
> -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       }
>   
>       entry->timestampStart = now;
>       NdisAllocateSpinLock(&(entry->lock));
>       UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
> -    entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
>       NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
>       InsertHeadList(&ovsConntrackTable[bucketIdx],
>                      &entry->link);
> @@ -275,7 +253,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
>   static __inline POVS_CT_ENTRY
>   OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>                    UINT8 ipProto,
> -                 UINT32 l4Offset,
> +                 OVS_PACKET_HDR_INFO *layers,
>                    OvsConntrackKeyLookupCtx *ctx,
>                    OvsFlowKey *key,
>                    PNAT_ACTION_INFO natInfo,
> @@ -293,16 +271,18 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>       switch (ipProto) {
>       case IPPROTO_TCP:
>       {
> +        UINT32 tcpPayloadLen;
>           TCPHdr tcpStorage;
>           const TCPHdr *tcp;
> -        tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
> +        tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &tcpPayloadLen);
>           if (!OvsConntrackValidateTcpPacket(tcp)) {
>               state = OVS_CS_F_INVALID;
>               break;
>           }
>   
>           if (commit) {
> -            entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
> +            entry = OvsConntrackCreateTcpEntry(tcp, currentTime,
> +                                               tcpPayloadLen);
>           }
>           break;
>       }
> @@ -310,7 +290,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>       {
>           ICMPHdr storage;
>           const ICMPHdr *icmp;
> -        icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
> +        icmp = OvsGetIcmp(curNbl, layers->l4Offset, &storage);
>           if (!OvsConntrackValidateIcmpPacket(icmp)) {
>               if(icmp) {
>                   OVS_LOG_TRACE("Invalid ICMP packet detected, icmp->type %u",
> @@ -371,7 +351,7 @@ static enum CT_UPDATE_RES
>   OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>                    PNET_BUFFER_LIST nbl,
>                    UINT8 ipProto,
> -                 UINT32 l4Offset,
> +                 OVS_PACKET_HDR_INFO *layers,
>                    BOOLEAN reply,
>                    UINT64 now)
>   {
> @@ -380,15 +360,17 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>       switch (ipProto) {
>       case IPPROTO_TCP:
>       {
> +        UINT32 tcpPayloadLen;
>           TCPHdr tcpStorage;
>           const TCPHdr *tcp;
> -        tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
> +        tcp = OvsGetTcpHeader(nbl, layers, &tcpStorage, &tcpPayloadLen);
>           if (!tcp) {
>               status = CT_UPDATE_INVALID;
>               break;
>           }
>           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
> -        status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
> +        status = OvsConntrackUpdateTcpEntry(entry, tcp, reply, now,
> +                                            tcpPayloadLen);
>           OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>           break;
>       }
> @@ -433,10 +415,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
>       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>       if (forceDelete || OvsCtEntryExpired(entry)) {
>           if (entry->natInfo.natAction) {
> -            LOCK_STATE_EX lockStateNat;
> -            NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>               OvsNatDeleteKey(&entry->key);
> -            NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>           }
>           OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
>           RemoveEntryList(&entry->link);
> @@ -479,15 +458,12 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
>   }
>   
>   BOOLEAN
> -OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
> +OvsCtEndpointsAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
>   {
>       return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
>                                sizeof(struct ct_endpoint))) &&
>               (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
> -                             sizeof(struct ct_endpoint))) &&
> -            (ctxKey.dl_type == entryKey.dl_type) &&
> -            (ctxKey.nw_proto == entryKey.nw_proto) &&
> -            (ctxKey.zone == entryKey.zone));
> +                             sizeof(struct ct_endpoint))));
>   }
>   
>   POVS_CT_ENTRY
> @@ -499,6 +475,11 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       POVS_CT_ENTRY found = NULL;
>       LOCK_STATE_EX lockStateTable;
>       UINT32 bucketIdx;
> +
> +    if (!ctTotalEntries) {
> +        return found;
> +    }
> +

The counter above needs protection to guard against change?
>       /* Reverse NAT must be performed before OvsCtLookup, so here
>        * we simply need to flip the src and dst in key and compare
>        * they are equal. Note that flipped key is not equal to
> @@ -507,10 +488,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       OVS_CT_KEY revCtxKey = ctx->key;
>       OvsCtKeyReverse(&revCtxKey);
>   
> -    if (!ctTotalEntries) {
> -        return found;
> -    }
> -
>       KIRQL irql = KeGetCurrentIrql();
>       bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
>       NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
> @@ -518,12 +495,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>           entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
>           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>   
> -        if (OvsCtKeyAreSame(ctx->key, entry->key)) {
> +        if ((ctx->key.dl_type != entry->key.dl_type) ||
> +            (ctx->key.nw_proto != entry->key.nw_proto) ||
> +            (ctx->key.zone != entry->key.zone)) {
> +            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
> +            continue;
> +        }
> +
> +        if (OvsCtEndpointsAreSame(ctx->key, entry->key)) {
>               found = entry;
>               reply = FALSE;
>           }
>   
> -        if (!found && OvsCtKeyAreSame(revCtxKey, entry->key)) {
> +        if (!found && OvsCtEndpointsAreSame(revCtxKey, entry->key)) {
>               found = entry;
>               reply = TRUE;
>           }
> @@ -649,10 +633,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
>           return NDIS_STATUS_INVALID_PACKET;
>       }
>   
> -    LOCK_STATE_EX lockStateNat;
> -    NdisAcquireRWLockRead(ovsCtNatLockObj, &lockStateNat, 0);
>       natEntry = OvsNatLookup(&ctx->key, TRUE);
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       if (natEntry) {
>           /* Translate address first for reverse NAT */
>           ctx->key = natEntry->ctEntry->key;
> @@ -678,7 +659,7 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
>    */
>   static __inline POVS_CT_ENTRY
>   OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
> -                         UINT32 l4Offset,
> +                         OVS_PACKET_HDR_INFO *layers,
>                            OvsConntrackKeyLookupCtx *ctx,
>                            OvsFlowKey *key,
>                            UINT16 zone,
> @@ -691,7 +672,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>       UINT32 state = 0;
>       PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>       LOCK_STATE_EX lockStateTable;
> -    PNDIS_RW_LOCK_EX bucketLockRef = NULL;
> +
>       *entryCreated = FALSE;
>   
>       /* If an entry was found, update the state based on TCP flags */
> @@ -702,8 +683,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>           }
>       } else {
>           CT_UPDATE_RES result;
> -        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
> -                                  l4Offset, ctx->reply, currentTime);
> +        UINT32 bucketIdx;
> +        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto, layers,
> +                                  ctx->reply, currentTime);
>           switch (result) {
>           case CT_UPDATE_VALID:
>               state |= OVS_CS_F_ESTABLISHED;
> @@ -716,12 +698,12 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>               break;
>           case CT_UPDATE_NEW:
>               //Delete and update the Conntrack
> -            bucketLockRef = entry->bucketLockRef;
> -            NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
> +            bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
> +            NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
>               OvsCtEntryDelete(ctx->entry, TRUE);
> -            NdisReleaseRWLock(bucketLockRef, &lockStateTable);
> +            NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
>               ctx->entry = NULL;
> -            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
> +            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers,
>                                        ctx, key, natInfo, commit, currentTime,
>                                        entryCreated);
>               if (!entry) {
> @@ -869,10 +851,10 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>   
>       /* Delete entry in reverse direction if 'force' is specified */
>       if (force && ctx.reply && entry) {
> -        PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
> -        NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
> +        UINT32 bucketIdx = ctx.hash & CT_HASH_TABLE_MASK;
> +        NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
>           OvsCtEntryDelete(entry, TRUE);
> -        NdisReleaseRWLock(bucketLockRef, &lockStateTable);
> +        NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
>           entry = NULL;
>       }
>   
> @@ -885,7 +867,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>               OvsCtIncrementCounters(entry, ctx.reply, curNbl);
>           }
>           /* Process the entry and update CT flags */
> -        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
> +        entry = OvsProcessConntrackEntry(fwdCtx, layers, &ctx, key,
>                                            zone, natInfo, commit, currentTime,
>                                            &entryCreated);
>   
> @@ -900,7 +882,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>           }
>           /* If no matching entry was found, create one and add New state */
>           entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
> -                                 layers->l4Offset, &ctx,
> +                                 layers, &ctx,
>                                    key, natInfo, commit, currentTime,
>                                    &entryCreated);
>       }
> @@ -918,13 +900,9 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>        */
>       KIRQL irql = KeGetCurrentIrql();
>       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
> -    if (natInfo->natAction != NAT_ACTION_NONE)
> -    {
> -        LOCK_STATE_EX lockStateNat;
> -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
> +    if (natInfo->natAction != NAT_ACTION_NONE) {
>           OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
>                        key, ctx.reply);
> -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       }
>   
>       OvsCtSetMarkLabel(key, entry, mark, labels, &triggerUpdateEvent);
> @@ -1156,7 +1134,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
>   {
>       PLIST_ENTRY link, next;
>       POVS_CT_ENTRY entry;
> -    LOCK_STATE_EX lockState, lockStateNat;
> +    LOCK_STATE_EX lockState;
>   
>       if (ctTotalEntries) {
>           for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> @@ -1188,9 +1166,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
>           }
>       }
>   
> -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>       OvsNatFlush(zone);
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       return NDIS_STATUS_SUCCESS;
>   }
>   
> diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
> index 7dc92a1..7a80eea 100644
> --- a/datapath-windows/ovsext/Conntrack.h
> +++ b/datapath-windows/ovsext/Conntrack.h
> @@ -99,8 +99,6 @@ typedef struct _NAT_ACTION_INFO {
>   } NAT_ACTION_INFO, *PNAT_ACTION_INFO;
>   
>   typedef struct OVS_CT_ENTRY {
> -    /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
> -    PNDIS_RW_LOCK_EX bucketLockRef;
>       NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
>       OVS_CT_KEY  key;
>       OVS_CT_KEY  rev_key;
> @@ -156,23 +154,33 @@ OvsConntrackUpdateExpiration(OVS_CT_ENTRY *ctEntry,
>       ctEntry->expiration = now + interval;
>   }
>   
> -static __inline UINT32
> -OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
> +static const TCPHdr*
> +OvsGetTcpHeader(PNET_BUFFER_LIST nbl,
> +                OVS_PACKET_HDR_INFO *layers,
> +                VOID *storage,
> +                UINT32 *tcpPayloadLen)
>   {
>       IPHdr *ipHdr;
>       TCPHdr *tcp;
> -    char *ipBuf[sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)];
> +    VOID *dest = storage;
>   
> -    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl), sizeof *ipBuf,
> -                              (PVOID)&ipBuf, 1 /*no align*/, 0);
> +    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl),
> +                              layers->l4Offset + sizeof(TCPHdr),
> +                              NULL, 1 /*no align*/, 0);
>       if (ipHdr == NULL) {
> -        return 0;
> +        return NULL;
>       }
>   
> -    ipHdr = (IPHdr *)((PCHAR)ipHdr + sizeof(EthHdr));
> +    ipHdr = (IPHdr *)((PCHAR)ipHdr + layers->l3Offset);
>       tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
> +    if (tcp->doff * 4 >= sizeof *tcp) {
> +        NdisMoveMemory(dest, tcp, sizeof(TCPHdr));
> +        *tcpPayloadLen = ntohs((ipHdr->tot_len) - (ipHdr->ihl * 4) -
> +                               (TCP_HDR_LEN(tcp)));
> +        return storage;
> +    }
>   
> -    return (ntohs(ipHdr->tot_len) - (ipHdr->ihl * 4) - (TCP_HDR_LEN(tcp)));
> +    return NULL;
>   }
>   
>   VOID OvsCleanupConntrack(VOID);
> @@ -184,17 +192,17 @@ NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
>   BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
>   BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp);
>   OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
> -                                          PNET_BUFFER_LIST nbl,
> -                                          UINT64 now);
> +                                          UINT64 now,
> +                                          UINT32 tcpPayloadLen);
>   NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
>                                        OVS_CT_ENTRY *conn_);
>   OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
>   OVS_CT_ENTRY * OvsConntrackCreateIcmpEntry(UINT64 now);
>   enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>                                                 const TCPHdr *tcp,
> -                                              PNET_BUFFER_LIST nbl,
>                                                 BOOLEAN reply,
> -                                              UINT64 now);
> +                                              UINT64 now,
> +                                              UINT32 tcpPayloadLen);
>   enum CT_UPDATE_RES OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_,
>                                                   BOOLEAN reply,
>                                                   UINT64 now);



More information about the dev mailing list