[ovs-dev] [PATCH v2 1/5] datapath-windows: Added a new file to support Ipv4 fragments.

Guru Shetty guru at ovn.org
Wed Jan 18 18:54:15 UTC 2017


On 12 January 2017 at 13:13, Anand Kumar <kumaranand at vmware.com> wrote:

> This patch adds functionalities to handle IPv4 fragments, which will be
> used by Conntrack module.
>
> Added a new structure to hold the Ipv4 fragments and a hash table to
> hold Ipv4 datagram entries. Also added a clean up thread that runs
> every minute to delete the expired IPv4 datagram entries.
>
> The individual fragments are ignored by the conntrack. Once all the
> fragments are recieved, a new NBL is created out of the reassembled
> fragments and conntrack executes actions on the new NBL.
>
> Created new APIs OvsProcessIpv4Fragment() to process individual fragments,
> OvsIpv4Reassemble() to reassemble Ipv4 fragments.
>

You should include your Signed-off-by for all your commits.


> ---
>  datapath-windows/automake.mk           |   2 +
>  datapath-windows/ovsext/Debug.h        |   3 +-
>  datapath-windows/ovsext/IpFragment.c   | 506
> +++++++++++++++++++++++++++++++++
>  datapath-windows/ovsext/IpFragment.h   |  74 +++++
>  datapath-windows/ovsext/Switch.c       |   9 +
>  datapath-windows/ovsext/ovsext.vcxproj |   2 +
>  6 files changed, 595 insertions(+), 1 deletion(-)
>  create mode 100644 datapath-windows/ovsext/IpFragment.c
>  create mode 100644 datapath-windows/ovsext/IpFragment.h
>
> diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
> index 53983ae..4f7b55a 100644
> --- a/datapath-windows/automake.mk
> +++ b/datapath-windows/automake.mk
> @@ -32,6 +32,8 @@ EXTRA_DIST += \
>         datapath-windows/ovsext/Flow.h \
>         datapath-windows/ovsext/Gre.h \
>         datapath-windows/ovsext/Gre.c \
> +       datapath-windows/ovsext/IpFragment.c \
> +       datapath-windows/ovsext/IpFragment.h \
>         datapath-windows/ovsext/IpHelper.c \
>         datapath-windows/ovsext/IpHelper.h \
>         datapath-windows/ovsext/Jhash.c \
> diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/
> Debug.h
> index cae6ac9..6de1812 100644
> --- a/datapath-windows/ovsext/Debug.h
> +++ b/datapath-windows/ovsext/Debug.h
> @@ -42,8 +42,9 @@
>  #define OVS_DBG_STT      BIT32(22)
>  #define OVS_DBG_CONTRK   BIT32(23)
>  #define OVS_DBG_GENEVE   BIT32(24)
> +#define OVS_DBG_IPFRAG   BIT32(25)
>
> -#define OVS_DBG_LAST     24  /* Set this to the last defined module
> number. */
> +#define OVS_DBG_LAST     25  /* Set this to the last defined module
> number. */
>  /* Please add above OVS_DBG_LAST. */
>
>  #define OVS_DBG_ERROR    DPFLTR_ERROR_LEVEL
> diff --git a/datapath-windows/ovsext/IpFragment.c
> b/datapath-windows/ovsext/IpFragment.c
> new file mode 100644
> index 0000000..2ce3932
> --- /dev/null
> +++ b/datapath-windows/ovsext/IpFragment.c
> @@ -0,0 +1,506 @@
> +/*
> + * Copyright (c) 2017 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include "Conntrack.h"
> +#include "Debug.h"
> +#include "IpFragment.h"
> +#include "Jhash.h"
> +#include "Offload.h"
> +#include "PacketParser.h"
> +
> +#ifdef OVS_DBG_MOD
> +#undef OVS_DBG_MOD
> +#endif
> +#define OVS_DBG_MOD OVS_DBG_IPFRAG
> +
> +/* Function declarations */
> +static VOID OvsIpFragmentEntryCleaner(PVOID data);
> +static VOID OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry);
> +
> +/* Global and static variables */
> +static OVS_IPFRAG_THREAD_CTX ipFragThreadCtx;
> +static PNDIS_RW_LOCK_EX ovsIpFragmentHashLockObj;
> +static UINT64 ipTotalEntries;
> +static PLIST_ENTRY OvsIpFragTable;
> +
> +NDIS_STATUS
> +OvsInitIpFragment(POVS_SWITCH_CONTEXT context)
> +{
> +
> +    NDIS_STATUS status;
> +    HANDLE threadHandle = NULL;
> +
> +    /* Init the sync-lock */
> +    ovsIpFragmentHashLockObj = NdisAllocateRWLock(context->
> NdisFilterHandle);
> +    if (ovsIpFragmentHashLockObj == NULL) {
> +        return STATUS_INSUFFICIENT_RESOURCES;
> +    }
> +
> +    /* Init the Hash Buffer */
> +    OvsIpFragTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
> +                                              * IP_FRAG_HASH_TABLE_SIZE,
> +                                              OVS_MEMORY_TAG);
> +    if (OvsIpFragTable == NULL) {
> +        NdisFreeRWLock(ovsIpFragmentHashLockObj);
> +        ovsIpFragmentHashLockObj = NULL;
> +        return STATUS_INSUFFICIENT_RESOURCES;
> +    }
> +
> +    for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE; i++) {
> +        InitializeListHead(&OvsIpFragTable[i]);
> +    }
> +
> +    /* Init Cleaner Thread */
> +    KeInitializeEvent(&ipFragThreadCtx.event, NotificationEvent, FALSE);
> +    status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
> +                                  NULL, OvsIpFragmentEntryCleaner,
> +                                  &ipFragThreadCtx);
> +
> +    if (status != STATUS_SUCCESS) {
> +        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
> +        OvsIpFragTable = NULL;
> +        NdisFreeRWLock(ovsIpFragmentHashLockObj);
> +        ovsIpFragmentHashLockObj = NULL;
> +        return status;
> +    }
> +
> +    ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL,
> KernelMode,
> +                              &ipFragThreadCtx.threadObject, NULL);
> +    ZwClose(threadHandle);
> +    threadHandle = NULL;
> +    return STATUS_SUCCESS;
> +}
> +
> +static __inline UINT32
> +OvsGetIPFragmentHash(POVS_IPFRAG_KEY fragKey)
> +{
> +    UINT32 arr[6];
> +    arr[0] = (UINT32)fragKey->protocol;
> +    arr[1] = (UINT32)fragKey->id;
> +    arr[2] = (UINT32)fragKey->sAddr;
> +    arr[3] = (UINT32)fragKey->dAddr;
> +    arr[4] = (UINT32)((fragKey->tunnelId & 0xFFFFFFFF00000000LL) >> 32);
> +    arr[5] = (UINT32)(fragKey->tunnelId & 0xFFFFFFFFLL);
> +    return OvsJhashWords(arr, 6, OVS_HASH_BASIS);
> +}
> +
> +static __inline POVS_IPFRAG_ENTRY
> +OvsLookupIPFrag(POVS_IPFRAG_KEY fragKey, UINT32 hash)
> +{
> +    POVS_IPFRAG_ENTRY entry;
> +    PLIST_ENTRY link;
> +    LOCK_STATE_EX lockState;
> +
> +    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
> +    LIST_FORALL(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK], link) {
> +        entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
> +        if (entry->fragKey.dAddr == fragKey->dAddr &&
> +            entry->fragKey.sAddr == fragKey->sAddr &&
> +            entry->fragKey.id == fragKey->id &&
> +            entry->fragKey.protocol == fragKey->protocol &&
> +            entry->fragKey.tunnelId == fragKey->tunnelId) {
> +            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
> +            return entry;
> +        }
> +    }
> +    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
> +    return NULL;
> +}
> +
> +/*
> +*----------------------------------------------------------
> ------------------
> +* OvsIpv4Reassemble
> +*     Reassemble the ipv4 fragments and return newNbl on success.
> +*     Should be called after acquiring the lockObj for the entry.
> +*----------------------------------------------------------
> ------------------
> +*/
> +NDIS_STATUS
> +OvsIpv4Reassemble(POVS_SWITCH_CONTEXT switchContext,
> +                  PNET_BUFFER_LIST *curNbl,
> +                  OvsCompletionList *completionList,
> +                  NDIS_SWITCH_PORT_ID sourcePort,
> +                  POVS_IPFRAG_ENTRY entry,
> +                  PNET_BUFFER_LIST *newNbl)
> +{
> +    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
> +    NDIS_STRING filterReason;
> +    POVS_BUFFER_CONTEXT ctx;
> +    PNET_BUFFER curNb;
> +    EthHdr *eth;
> +    IPHdr *ipHdr, *newIpHdr;
> +    CHAR *ethBuf[sizeof(EthHdr)];
> +    CHAR *packetBuf;
> +    UINT16 ipHdrLen, packetLen, packetHeader;
> +    POVS_FRAGMENT_LIST head = NULL;
> +
> +    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
> +    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
> +
> +    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
> +                                     (PVOID)&ethBuf, 1, 0);
> +    if (eth == NULL) {
> +        return NDIS_STATUS_INVALID_PACKET;
> +    }
> +    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
> +    if (ipHdr == NULL) {
> +        return NDIS_STATUS_INVALID_PACKET;
> +    }
> +    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
> +    packetLen = ETH_HEADER_LENGTH + ipHdrLen + entry->totalLen;
> +    packetBuf = (CHAR*)OvsAllocateMemoryWithTag(packetLen,
> +                                                OVS_MEMORY_TAG);
> +    if (packetBuf == NULL) {
> +        OVS_LOG_ERROR("Insufficient resources, failed to allocate
> packetBuf");
> +        return NDIS_STATUS_RESOURCES;
> +    }
> +
> +    /* copy Ethernet header */
> +    NdisMoveMemory(packetBuf, eth, ETH_HEADER_LENGTH);
> +    /* copy ipv4 header to packet buff */
> +    NdisMoveMemory(packetBuf + ETH_HEADER_LENGTH, ipHdr, ipHdrLen);
> +
> +    /* update new ip header */
> +    newIpHdr = (IPHdr *)(packetBuf + ETH_HEADER_LENGTH);
> +    newIpHdr->frag_off = 0;
> +    newIpHdr->tot_len = htons(packetLen - ETH_HEADER_LENGTH);
> +    newIpHdr->check = 0;
> +    newIpHdr->check = IPChecksum((UINT8 *)packetBuf + ETH_HEADER_LENGTH,
> +                                 ipHdrLen, 0);
> +    packetHeader = ETH_HEADER_LENGTH + ipHdrLen;
> +    head = entry->head;
> +    while (head) {
> +        ASSERT((packetHeader + head->offset) <= packetLen);
> +        NdisMoveMemory(packetBuf + packetHeader + head->offset,
> +                       head->pbuff, head->len);
> +        head = head->next;
> +    }
> +    /* Create new nbl from the flat buffer */
> +    *newNbl = OvsAllocateNBLFromBuffer(switchContext, packetBuf,
> packetLen);
> +    if (*newNbl == NULL) {
> +        OVS_LOG_ERROR("Insufficient resources, failed to allocate
> newNbl");
> +        status = NDIS_STATUS_RESOURCES;
> +    }
> +
> +    OvsFreeMemoryWithTag(packetBuf, OVS_MEMORY_TAG);
> +    /* Timeout the entry so that clean up thread deletes it .*/
> +    entry->expiration -= IPFRAG_ENTRY_TIMEOUT;
> +
> +    /* Complete the fragment NBL */
> +    ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_
> START(*curNbl);
> +    if (ctx->flags & OVS_BUFFER_NEED_COMPLETE) {
> +        RtlInitUnicodeString(&filterReason, L"Complete last fragment");
> +        OvsAddPktCompletionList(completionList, TRUE, sourcePort,
> *curNbl, 1,
> +                                &filterReason);
> +    } else {
> +        OvsCompleteNBL(switchContext, *curNbl, TRUE);
> +    }
> +    *curNbl = *newNbl;
> +    return status;
> +}
> +/*
> +*----------------------------------------------------------
> ------------------
> +* OvsProcessIpv4Fragment
> +*     Reassemble the fragments once all the fragments are recieved and
> +*     return NDIS_STATUS_PENDING for the pending fragments
> +*     XXX - Instead of copying NBls, Keep the NBLs in limbo state.
> +*----------------------------------------------------------
> ------------------
> +*/
> +NDIS_STATUS
> +OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
> +                       PNET_BUFFER_LIST *curNbl,
> +                       OvsCompletionList *completionList,
> +                       NDIS_SWITCH_PORT_ID sourcePort,
> +                       UINT16 *mru,
> +                       ovs_be64 tunnelId,
> +                       PNET_BUFFER_LIST *newNbl)
> +{
> +    NDIS_STATUS status = NDIS_STATUS_PENDING;
> +    PNET_BUFFER curNb;
> +    CHAR *ethBuf[sizeof(EthHdr)];
> +    UINT16 offset, flags;
> +    UINT16 payloadLen, ipHdrLen;
> +    UINT32 hash;
> +    UINT64 currentTime;
> +    EthHdr *eth;
> +    IPHdr *ipHdr;
> +    OVS_IPFRAG_KEY fragKey;
> +    POVS_IPFRAG_ENTRY entry;
> +    POVS_FRAGMENT_LIST fragStorage;
> +    LOCK_STATE_EX htLockState, entryLockState;
> +
> +    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
> +    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
> +
> +    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
> +                                     (PVOID)&ethBuf, 1, 0);
> +    if (eth == NULL) {
> +        return NDIS_STATUS_INVALID_PACKET;
> +    }
> +
> +    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
> +    if (ipHdr == NULL) {
> +        return NDIS_STATUS_INVALID_PACKET;
> +    }
> +    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
> +    payloadLen = ntohs(ipHdr->tot_len) - ipHdrLen;
> +    offset = ntohs(ipHdr->frag_off) & IP_OFFSET;
> +    offset <<= 3;
> +    flags = ntohs(ipHdr->frag_off) & IP_MF;
> +
> +    /*Copy fragment specific fields. */
> +    fragKey.protocol = ipHdr->protocol;
> +    fragKey.id = ipHdr->id;
> +    fragKey.sAddr = ipHdr->saddr;
> +    fragKey.dAddr = ipHdr->daddr;
> +    fragKey.tunnelId = tunnelId;
> +    /* Padding. */
> +    NdisZeroMemory(&fragKey.pad_1, 3);
> +    fragKey.pad_2 = 0;
> +
> +    fragStorage = (POVS_FRAGMENT_LIST )
> +        OvsAllocateMemoryWithTag(sizeof(OVS_FRAGMENT_LIST),
> OVS_MEMORY_TAG);
> +    if (fragStorage == NULL) {
> +        OVS_LOG_ERROR("Insufficient resources, failed to allocate
> fragStorage");
> +        return NDIS_STATUS_RESOURCES;
> +    }
> +
> +    fragStorage->pbuff = (CHAR *)OvsAllocateMemoryWithTag(payloadLen,
> +                                                          OVS_MEMORY_TAG);
> +    if (fragStorage->pbuff == NULL) {
> +        OVS_LOG_ERROR("Insufficient resources, failed to allocate
> fragStorage");
> +        OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
> +        return NDIS_STATUS_RESOURCES;
> +    }
> +
> +    /* Copy payload from nbl to fragment storage. */
> +    if (OvsGetPacketBytes(*curNbl, payloadLen, ETH_HEADER_LENGTH +
> ipHdrLen,
> +                          fragStorage->pbuff) == NULL) {
> +        status = NDIS_STATUS_RESOURCES;
> +        goto payload_copy_error;
> +    }
> +    fragStorage->len = payloadLen;
> +    fragStorage->offset = offset;
> +    fragStorage->next = NULL;
> +    hash = OvsGetIPFragmentHash(&fragKey);
> +    entry = OvsLookupIPFrag(&fragKey, hash);
> +    if (entry == NULL) {
> +        entry = (POVS_IPFRAG_ENTRY)
> +            OvsAllocateMemoryWithTag(sizeof(OVS_IPFRAG_ENTRY),
> +                                     OVS_MEMORY_TAG);
> +        if (entry == NULL) {
> +            status = NDIS_STATUS_RESOURCES;
> +            goto payload_copy_error;
> +        }
> +        /* Copy the fragmeny key. */
> +        NdisZeroMemory(entry, sizeof(OVS_IPFRAG_ENTRY));
> +        NdisMoveMemory(&(entry->fragKey), &fragKey,
> +                       sizeof(OVS_IPFRAG_KEY));
> +        /* Update maximum recieving unit. */
> +        entry->mru = payloadLen + ipHdrLen;
> +        entry->recvdLen += fragStorage->len;
> +        entry->head = entry->tail = fragStorage;
> +        if (!flags) {
> +            entry->totalLen = offset + payloadLen;
> +        }
> +        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
> +        entry->expiration = currentTime + IPFRAG_ENTRY_TIMEOUT;
> +
> +        /* Init the sync-lock. */
> +        entry->lockObj = NdisAllocateRWLock(switchContext->
> NdisFilterHandle);
> +        if (entry->lockObj == NULL) {
> +            OvsFreeMemoryWithTag(entry, OVS_MEMORY_TAG);
> +            status = NDIS_STATUS_RESOURCES;
> +            goto payload_copy_error;
> +        }
> +
> +        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &htLockState,
> 0);
> +        InsertHeadList(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK],
> +                       &entry->link);
> +
> +        ipTotalEntries++;
> +        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &htLockState);
> +        return NDIS_STATUS_PENDING;
> +    } else {
> +        /* Acquire the entry lock. */
> +        NdisAcquireRWLockWrite(entry->lockObj, &entryLockState, 0);
> +        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
> +        if (currentTime > entry->expiration) {
> +            /* Expired entry. */
> +            goto fragment_error;
> +        }
> +        POVS_FRAGMENT_LIST next = entry->head;
> +        POVS_FRAGMENT_LIST prev = entry->tail;
> +        if (prev != NULL || prev->offset < offset) {
> +            next = NULL;
> +            goto found;
> +        }
> +        prev = NULL;
> +        for (next = entry->head; next != NULL; next = next->next) {
> +            if (next->offset > fragStorage->offset) {
> +                break;
> +            }
> +            prev = next;
> +        }
> +found:
> +        /*Check for overlap. */
> +        if (prev) {
> +            /* i bytes overlap. */
> +            int i = (prev->offset + prev->len) - fragStorage->offset;
> +            if (i > 0) {
> +                goto fragment_error;
> +            }
> +        }
> +        if (next) {
> +        /* i bytes overlap. */
> +            int i = (fragStorage->offset + fragStorage->len) -
> next->offset;
> +            if (i > 0) {
> +                goto fragment_error;
> +            }
> +        }
> +        /*Insert. */
> +        if (prev) {
> +            prev->next = fragStorage;
> +            fragStorage->next = next;
> +        } else {
> +            fragStorage->next = next;
> +            entry->head = fragStorage;
> +        }
> +        if (!next) {
> +            entry->tail = fragStorage;
> +        }
> +
> +        entry->mru = entry->mru > (payloadLen + ipHdrLen) ?
> +                         entry->mru : (payloadLen + ipHdrLen);
> +        if (entry->recvdLen + fragStorage->len > entry->recvdLen) {
> +            entry->recvdLen += fragStorage->len;
> +        } else {
> +            /* Overflow, ignore the fragment.*/
> +            goto fragment_error;
> +        }
> +        if (!flags) {
> +            entry->totalLen = offset + payloadLen;
> +        }
> +        if (entry->recvdLen == entry->totalLen) {
> +            /* Update mru of the forwarding context. */
> +            *mru = entry->mru + ETH_HEADER_LENGTH;
> +            status = OvsIpv4Reassemble(switchContext, curNbl,
> completionList,
> +                                       sourcePort, entry, newNbl);
> +        }
> +        NdisReleaseRWLock(entry->lockObj, &entryLockState);
> +        return status;
> +    }
> +fragment_error:
> +    /* Release the entry lock. */
> +    NdisReleaseRWLock(entry->lockObj, &entryLockState);
> +payload_copy_error:
> +    OvsFreeMemoryWithTag(fragStorage->pbuff, OVS_MEMORY_TAG);
> +    OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
> +    return status;
> +}
> +
> +
> +/*
> +*----------------------------------------------------------
> ------------------
> +* OvsIpFragmentEntryCleaner
> +*     Runs periodically and cleans up the Ip Fragment table
> +*     Interval is selected as twice the entry timeout
> +*----------------------------------------------------------
> ------------------
> +*/
> +static VOID
> +OvsIpFragmentEntryCleaner(PVOID data)
> +{
> +
> +    POVS_IPFRAG_THREAD_CTX context = (POVS_IPFRAG_THREAD_CTX)data;
> +    PLIST_ENTRY link, next;
> +    POVS_IPFRAG_ENTRY entry;
> +    BOOLEAN success = TRUE;
> +
> +    while (success) {
> +        LOCK_STATE_EX lockState;
> +        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
> +        if (context->exit) {
> +            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
> +            break;
> +        }
> +
> +        /* Set the timeout for the thread and cleanup. */
> +        UINT64 currentTime, threadSleepTimeout;
> +        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
> +        threadSleepTimeout = currentTime + IPFRAG_CLEANUP_INTERVAL;
> +        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries;
> i++) {
> +            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
> +                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
> +                if (entry->expiration < currentTime) {
> +                    OvsIpFragmentEntryDelete(entry);
> +                }
> +            }
> +        }
> +
> +        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
> +        KeWaitForSingleObject(&context->event, Executive, KernelMode,
> +                              FALSE, (LARGE_INTEGER
> *)&threadSleepTimeout);
> +    }
> +
> +    PsTerminateSystemThread(STATUS_SUCCESS);
> +}
> +
> +static VOID
> +OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry)
> +{
> +    LOCK_STATE_EX lockState;
> +    NdisAcquireRWLockWrite(entry->lockObj, &lockState, 0);
> +    POVS_FRAGMENT_LIST head = entry->head;
> +    POVS_FRAGMENT_LIST temp = NULL;
> +    while (head) {
> +        temp = head;
> +        head = head->next;
> +        OvsFreeMemoryWithTag(temp->pbuff, OVS_MEMORY_TAG);
> +        OvsFreeMemoryWithTag(temp, OVS_MEMORY_TAG);
> +    }
> +    RemoveEntryList(&entry->link);
> +    ipTotalEntries--;
> +    NdisReleaseRWLock(entry->lockObj, &lockState);
> +    NdisFreeRWLock(entry->lockObj);
> +    OvsFreeMemoryWithTag(entry, OVS_MEMORY_TAG);
> +}
> +
> +VOID
> +OvsCleanupIpFragment(VOID)
> +{
> +    PLIST_ENTRY link, next;
> +    POVS_IPFRAG_ENTRY entry;
> +    LOCK_STATE_EX lockState;
> +    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
> +    ipFragThreadCtx.exit = 1;
> +    KeSetEvent(&ipFragThreadCtx.event, 0, FALSE);
> +    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
> +    KeWaitForSingleObject(ipFragThreadCtx.threadObject, Executive,
> +                          KernelMode, FALSE, NULL);
> +    ObDereferenceObject(ipFragThreadCtx.threadObject);
> +
> +    if (OvsIpFragTable) {
> +        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries;
> i++) {
> +            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
> +                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
> +                OvsIpFragmentEntryDelete(entry);
> +                }
> +            }
> +        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
> +        OvsIpFragTable = NULL;
> +    }
> +    NdisFreeRWLock(ovsIpFragmentHashLockObj);
> +    ovsIpFragmentHashLockObj = NULL;
> + }
> diff --git a/datapath-windows/ovsext/IpFragment.h
> b/datapath-windows/ovsext/IpFragment.h
> new file mode 100644
> index 0000000..8d87451
> --- /dev/null
> +++ b/datapath-windows/ovsext/IpFragment.h
> @@ -0,0 +1,74 @@
> +/*
> +* Copyright (c) 2017 VMware, Inc.
> +*
> +* Licensed under the Apache License, Version 2.0 (the "License");
> +* you may not use this file except in compliance with the License.
> +* You may obtain a copy of the License at:
> +*
> +*     http://www.apache.org/licenses/LICENSE-2.0
> +*
> +* Unless required by applicable law or agreed to in writing, software
> +* distributed under the License is distributed on an "AS IS" BASIS,
> +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +* See the License for the specific language governing permissions and
> +* limitations under the License.
> +*/
> +
> +#ifndef __IPFRAGMENT_H_
> +#define __IPFRAGMENT_H_ 1
> +#include "PacketIO.h"
> +
> +typedef struct _OVS_FRAGMENT_LIST {
> +    CHAR *pbuff;
> +    UINT16 len;
> +    UINT16 offset;
> +    struct _OVS_FRAGMENT_LIST *next;
> +} OVS_FRAGMENT_LIST, *POVS_FRAGMENT_LIST;
> +
> +typedef struct _OVS_IPFRAG_KEY {
> +    UINT8 protocol;
> +    UINT8 pad_1[3];             /* Align the structure to address
> boundaries.*/
> +    UINT16 id;
> +    UINT16 pad_2;               /* Align the structure to address
> boundaries.*/
> +    UINT32 sAddr;
> +    UINT32 dAddr;
> +    ovs_be64 tunnelId;
> +} OVS_IPFRAG_KEY, *POVS_IPFRAG_KEY;
> +
> +typedef struct _OVS_IPFRAG_ENTRY {
> +    PNDIS_RW_LOCK_EX lockObj;       /* To access the entry. */
> +    UINT16 totalLen;
> +    UINT16 recvdLen;
> +    UINT16 mru;
> +    UINT64 expiration;
> +    OVS_IPFRAG_KEY fragKey;
> +    POVS_FRAGMENT_LIST head;
> +    POVS_FRAGMENT_LIST tail;
> +    LIST_ENTRY link;
> +} OVS_IPFRAG_ENTRY, *POVS_IPFRAG_ENTRY;
> +
> +typedef struct _OVS_IPFRAG_THREAD_CTX {
> +    KEVENT event;
> +    PVOID threadObject;
> +    UINT32 exit;
> +} OVS_IPFRAG_THREAD_CTX, *POVS_IPFRAG_THREAD_CTX;
> +
> +#define IP_FRAG_HASH_TABLE_SIZE ((UINT32)1 << 10)
> +#define IP_FRAG_HASH_TABLE_MASK (IP_FRAG_HASH_TABLE_SIZE - 1)
> +/*30s -Sufficient time to recieve all fragments.*/
> +#define IPFRAG_ENTRY_TIMEOUT 300000000LL
> +#define IPFRAG_CLEANUP_INTERVAL IPFRAG_ENTRY_TIMEOUT * 2 /*1m.*/
> +PNET_BUFFER_LIST OvsIpv4FragmentNBL(PVOID ovsContext,
> +                                    PNET_BUFFER_LIST nbl,
> +                                    UINT16 mru);
> +
> +NDIS_STATUS OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
> +                                   PNET_BUFFER_LIST *curNbl,
> +                                   OvsCompletionList *completionList,
> +                                   NDIS_SWITCH_PORT_ID sourcePort,
> +                                   UINT16 *mru,
> +                                   ovs_be64 tunnelId,
> +                                   PNET_BUFFER_LIST *newNbl);
> +NDIS_STATUS OvsInitIpFragment(POVS_SWITCH_CONTEXT context);
> +VOID OvsCleanupIpFragment(VOID);
> +#endif /* __IPFRAGMENT_H_ */
> diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/
> Switch.c
> index 138a656..558e3af 100644
> --- a/datapath-windows/ovsext/Switch.c
> +++ b/datapath-windows/ovsext/Switch.c
> @@ -27,6 +27,7 @@
>  #include "Flow.h"
>  #include "IpHelper.h"
>  #include "Oid.h"
> +#include "IpFragment.h"
>
>  #ifdef OVS_DBG_MOD
>  #undef OVS_DBG_MOD
> @@ -229,6 +230,12 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
>      if (status != STATUS_SUCCESS) {
>          OvsUninitSwitchContext(switchContext);
>          OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
> +    }
> +
> +    status = OvsInitIpFragment(switchContext);
> +    if (status != STATUS_SUCCESS) {
> +        OvsUninitSwitchContext(switchContext);
> +        OVS_LOG_ERROR("Exit: Failed to initialize Ip Fragment");
>          goto create_switch_done;
>      }
>
> @@ -265,6 +272,8 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
>      OvsCleanupSttDefragmentation();
>      OvsCleanupConntrack();
>      OvsCleanupCtRelated();
> +    OvsCleanupIpFragment();
> +
>      /* This completes the cleanup, and a new attach can be handled now. */
>
>      OVS_LOG_TRACE("Exit: OvsDetach Successfully");
> diff --git a/datapath-windows/ovsext/ovsext.vcxproj
> b/datapath-windows/ovsext/ovsext.vcxproj
> index 44aea19..ecfc0b8 100644
> --- a/datapath-windows/ovsext/ovsext.vcxproj
> +++ b/datapath-windows/ovsext/ovsext.vcxproj
> @@ -112,6 +112,7 @@
>      <ClInclude Include="Flow.h" />
>      <ClInclude Include="Geneve.h" />
>      <ClInclude Include="Gre.h" />
> +    <ClInclude Include="IpFragment.h" />
>      <ClInclude Include="IpHelper.h" />
>      <ClInclude Include="Jhash.h" />
>      <ClInclude Include="Mpls.h" />
> @@ -268,6 +269,7 @@
>      <ClCompile Include="Flow.c" />
>      <ClCompile Include="Geneve.c" />
>      <ClCompile Include="Gre.c" />
> +    <ClCompile Include="IpFragment.c" />
>      <ClCompile Include="IpHelper.c" />
>      <ClCompile Include="Jhash.c" />
>      <ClCompile Include="Netlink/Netlink.c" />
> --
> 2.9.3.windows.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list