[ovs-dev] [PATCH v1 1/5] datapath-windows: Added a new file to support Ipv4 fragments.

Anand Kumar kumaranand at vmware.com
Tue Jan 10 00:59:20 UTC 2017


This patch adds functionalities to handle IPv4 fragments, which will be
used by Conntrack module.

Added a new structure to hold the Ipv4 fragments and a hash table to
hold Ipv4 datagram entries. Also added a clean up thread that runs
every minute to delete the expired IPv4 datagram entries.

The individual fragments are ignored by the conntrack. Once all the
fragments are recieved, a new NBL is created out of the reassembled
fragments and conntrack executes actions on the new NBL.

Created new APIs OvsProcessIpv4Fragment() to process individual fragments,
OvsIpv4Reassemble() to reassemble Ipv4 fragments.
---
 datapath-windows/automake.mk           |   2 +
 datapath-windows/ovsext/Debug.h        |   3 +-
 datapath-windows/ovsext/IpFragment.c   | 506 +++++++++++++++++++++++++++++++++
 datapath-windows/ovsext/IpFragment.h   |  74 +++++
 datapath-windows/ovsext/Switch.c       |   9 +
 datapath-windows/ovsext/ovsext.vcxproj |   2 +
 6 files changed, 595 insertions(+), 1 deletion(-)
 create mode 100644 datapath-windows/ovsext/IpFragment.c
 create mode 100644 datapath-windows/ovsext/IpFragment.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 53983ae..4f7b55a 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
 	datapath-windows/ovsext/Flow.h \
 	datapath-windows/ovsext/Gre.h \
 	datapath-windows/ovsext/Gre.c \
+	datapath-windows/ovsext/IpFragment.c \
+	datapath-windows/ovsext/IpFragment.h \
 	datapath-windows/ovsext/IpHelper.c \
 	datapath-windows/ovsext/IpHelper.h \
 	datapath-windows/ovsext/Jhash.c \
diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h
index cae6ac9..6de1812 100644
--- a/datapath-windows/ovsext/Debug.h
+++ b/datapath-windows/ovsext/Debug.h
@@ -42,8 +42,9 @@
 #define OVS_DBG_STT      BIT32(22)
 #define OVS_DBG_CONTRK   BIT32(23)
 #define OVS_DBG_GENEVE   BIT32(24)
+#define OVS_DBG_IPFRAG   BIT32(25)
 
-#define OVS_DBG_LAST     24  /* Set this to the last defined module number. */
+#define OVS_DBG_LAST     25  /* Set this to the last defined module number. */
 /* Please add above OVS_DBG_LAST. */
 
 #define OVS_DBG_ERROR    DPFLTR_ERROR_LEVEL
diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c
new file mode 100644
index 0000000..2ce3932
--- /dev/null
+++ b/datapath-windows/ovsext/IpFragment.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 2017 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conntrack.h"
+#include "Debug.h"
+#include "IpFragment.h"
+#include "Jhash.h"
+#include "Offload.h"
+#include "PacketParser.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_IPFRAG
+
+/* Function declarations */
+static VOID OvsIpFragmentEntryCleaner(PVOID data);
+static VOID OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry);
+
+/* Global and static variables */
+static OVS_IPFRAG_THREAD_CTX ipFragThreadCtx;
+static PNDIS_RW_LOCK_EX ovsIpFragmentHashLockObj;
+static UINT64 ipTotalEntries;
+static PLIST_ENTRY OvsIpFragTable;
+
+NDIS_STATUS
+OvsInitIpFragment(POVS_SWITCH_CONTEXT context)
+{
+
+    NDIS_STATUS status;
+    HANDLE threadHandle = NULL;
+
+    /* Init the sync-lock */
+    ovsIpFragmentHashLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
+    if (ovsIpFragmentHashLockObj == NULL) {
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    /* Init the Hash Buffer */
+    OvsIpFragTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
+                                              * IP_FRAG_HASH_TABLE_SIZE,
+                                              OVS_MEMORY_TAG);
+    if (OvsIpFragTable == NULL) {
+        NdisFreeRWLock(ovsIpFragmentHashLockObj);
+        ovsIpFragmentHashLockObj = NULL;
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE; i++) {
+        InitializeListHead(&OvsIpFragTable[i]);
+    }
+
+    /* Init Cleaner Thread */
+    KeInitializeEvent(&ipFragThreadCtx.event, NotificationEvent, FALSE);
+    status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
+                                  NULL, OvsIpFragmentEntryCleaner,
+                                  &ipFragThreadCtx);
+
+    if (status != STATUS_SUCCESS) {
+        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
+        OvsIpFragTable = NULL;
+        NdisFreeRWLock(ovsIpFragmentHashLockObj);
+        ovsIpFragmentHashLockObj = NULL;
+        return status;
+    }
+
+    ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
+                              &ipFragThreadCtx.threadObject, NULL);
+    ZwClose(threadHandle);
+    threadHandle = NULL;
+    return STATUS_SUCCESS;
+}
+
+static __inline UINT32
+OvsGetIPFragmentHash(POVS_IPFRAG_KEY fragKey)
+{
+    UINT32 arr[6];
+    arr[0] = (UINT32)fragKey->protocol;
+    arr[1] = (UINT32)fragKey->id;
+    arr[2] = (UINT32)fragKey->sAddr;
+    arr[3] = (UINT32)fragKey->dAddr;
+    arr[4] = (UINT32)((fragKey->tunnelId & 0xFFFFFFFF00000000LL) >> 32);
+    arr[5] = (UINT32)(fragKey->tunnelId & 0xFFFFFFFFLL);
+    return OvsJhashWords(arr, 6, OVS_HASH_BASIS);
+}
+
+static __inline POVS_IPFRAG_ENTRY
+OvsLookupIPFrag(POVS_IPFRAG_KEY fragKey, UINT32 hash)
+{
+    POVS_IPFRAG_ENTRY entry;
+    PLIST_ENTRY link;
+    LOCK_STATE_EX lockState;
+
+    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+    LIST_FORALL(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK], link) {
+        entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+        if (entry->fragKey.dAddr == fragKey->dAddr &&
+            entry->fragKey.sAddr == fragKey->sAddr &&
+            entry->fragKey.id == fragKey->id &&
+            entry->fragKey.protocol == fragKey->protocol &&
+            entry->fragKey.tunnelId == fragKey->tunnelId) {
+            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+            return entry;
+        }
+    }
+    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+    return NULL;
+}
+
+/*
+*----------------------------------------------------------------------------
+* OvsIpv4Reassemble
+*     Reassemble the ipv4 fragments and return newNbl on success.
+*     Should be called after acquiring the lockObj for the entry.
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsIpv4Reassemble(POVS_SWITCH_CONTEXT switchContext,
+                  PNET_BUFFER_LIST *curNbl,
+                  OvsCompletionList *completionList,
+                  NDIS_SWITCH_PORT_ID sourcePort,
+                  POVS_IPFRAG_ENTRY entry,
+                  PNET_BUFFER_LIST *newNbl)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    NDIS_STRING filterReason;
+    POVS_BUFFER_CONTEXT ctx;
+    PNET_BUFFER curNb;
+    EthHdr *eth;
+    IPHdr *ipHdr, *newIpHdr;
+    CHAR *ethBuf[sizeof(EthHdr)];
+    CHAR *packetBuf;
+    UINT16 ipHdrLen, packetLen, packetHeader;
+    POVS_FRAGMENT_LIST head = NULL;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+
+    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
+                                     (PVOID)&ethBuf, 1, 0);
+    if (eth == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
+    if (ipHdr == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
+    packetLen = ETH_HEADER_LENGTH + ipHdrLen + entry->totalLen;
+    packetBuf = (CHAR*)OvsAllocateMemoryWithTag(packetLen,
+                                                OVS_MEMORY_TAG);
+    if (packetBuf == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate packetBuf");
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    /* copy Ethernet header */
+    NdisMoveMemory(packetBuf, eth, ETH_HEADER_LENGTH);
+    /* copy ipv4 header to packet buff */
+    NdisMoveMemory(packetBuf + ETH_HEADER_LENGTH, ipHdr, ipHdrLen);
+
+    /* update new ip header */
+    newIpHdr = (IPHdr *)(packetBuf + ETH_HEADER_LENGTH);
+    newIpHdr->frag_off = 0;
+    newIpHdr->tot_len = htons(packetLen - ETH_HEADER_LENGTH);
+    newIpHdr->check = 0;
+    newIpHdr->check = IPChecksum((UINT8 *)packetBuf + ETH_HEADER_LENGTH,
+                                 ipHdrLen, 0);
+    packetHeader = ETH_HEADER_LENGTH + ipHdrLen;
+    head = entry->head;
+    while (head) {
+        ASSERT((packetHeader + head->offset) <= packetLen);
+        NdisMoveMemory(packetBuf + packetHeader + head->offset,
+                       head->pbuff, head->len);
+        head = head->next;
+    }
+    /* Create new nbl from the flat buffer */
+    *newNbl = OvsAllocateNBLFromBuffer(switchContext, packetBuf, packetLen);
+    if (*newNbl == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate newNbl");
+        status = NDIS_STATUS_RESOURCES;
+    }
+
+    OvsFreeMemoryWithTag(packetBuf, OVS_MEMORY_TAG);
+    /* Timeout the entry so that clean up thread deletes it .*/
+    entry->expiration -= IPFRAG_ENTRY_TIMEOUT;
+
+    /* Complete the fragment NBL */
+    ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(*curNbl);
+    if (ctx->flags & OVS_BUFFER_NEED_COMPLETE) {
+        RtlInitUnicodeString(&filterReason, L"Complete last fragment");
+        OvsAddPktCompletionList(completionList, TRUE, sourcePort, *curNbl, 1,
+                                &filterReason);
+    } else {
+        OvsCompleteNBL(switchContext, *curNbl, TRUE);
+    }
+    *curNbl = *newNbl;
+    return status;
+}
+/*
+*----------------------------------------------------------------------------
+* OvsProcessIpv4Fragment
+*     Reassemble the fragments once all the fragments are recieved and
+*     return NDIS_STATUS_PENDING for the pending fragments
+*     XXX - Instead of copying NBls, Keep the NBLs in limbo state.
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
+                       PNET_BUFFER_LIST *curNbl,
+                       OvsCompletionList *completionList,
+                       NDIS_SWITCH_PORT_ID sourcePort,
+                       UINT16 *mru,
+                       ovs_be64 tunnelId,
+                       PNET_BUFFER_LIST *newNbl)
+{
+    NDIS_STATUS status = NDIS_STATUS_PENDING;
+    PNET_BUFFER curNb;
+    CHAR *ethBuf[sizeof(EthHdr)];
+    UINT16 offset, flags;
+    UINT16 payloadLen, ipHdrLen;
+    UINT32 hash;
+    UINT64 currentTime;
+    EthHdr *eth;
+    IPHdr *ipHdr;
+    OVS_IPFRAG_KEY fragKey;
+    POVS_IPFRAG_ENTRY entry;
+    POVS_FRAGMENT_LIST fragStorage;
+    LOCK_STATE_EX htLockState, entryLockState;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+
+    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
+                                     (PVOID)&ethBuf, 1, 0);
+    if (eth == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+
+    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
+    if (ipHdr == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
+    payloadLen = ntohs(ipHdr->tot_len) - ipHdrLen;
+    offset = ntohs(ipHdr->frag_off) & IP_OFFSET;
+    offset <<= 3;
+    flags = ntohs(ipHdr->frag_off) & IP_MF;
+
+    /*Copy fragment specific fields. */
+    fragKey.protocol = ipHdr->protocol;
+    fragKey.id = ipHdr->id;
+    fragKey.sAddr = ipHdr->saddr;
+    fragKey.dAddr = ipHdr->daddr;
+    fragKey.tunnelId = tunnelId;
+    /* Padding. */
+    NdisZeroMemory(&fragKey.pad_1, 3);
+    fragKey.pad_2 = 0;
+
+    fragStorage = (POVS_FRAGMENT_LIST )
+        OvsAllocateMemoryWithTag(sizeof(OVS_FRAGMENT_LIST), OVS_MEMORY_TAG);
+    if (fragStorage == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate fragStorage");
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    fragStorage->pbuff = (CHAR *)OvsAllocateMemoryWithTag(payloadLen,
+                                                          OVS_MEMORY_TAG);
+    if (fragStorage->pbuff == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate fragStorage");
+        OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    /* Copy payload from nbl to fragment storage. */
+    if (OvsGetPacketBytes(*curNbl, payloadLen, ETH_HEADER_LENGTH + ipHdrLen,
+                          fragStorage->pbuff) == NULL) {
+        status = NDIS_STATUS_RESOURCES;
+        goto payload_copy_error;
+    }
+    fragStorage->len = payloadLen;
+    fragStorage->offset = offset;
+    fragStorage->next = NULL;
+    hash = OvsGetIPFragmentHash(&fragKey);
+    entry = OvsLookupIPFrag(&fragKey, hash);
+    if (entry == NULL) {
+        entry = (POVS_IPFRAG_ENTRY)
+            OvsAllocateMemoryWithTag(sizeof(OVS_IPFRAG_ENTRY),
+                                     OVS_MEMORY_TAG);
+        if (entry == NULL) {
+            status = NDIS_STATUS_RESOURCES;
+            goto payload_copy_error;
+        }
+        /* Copy the fragmeny key. */
+        NdisZeroMemory(entry, sizeof(OVS_IPFRAG_ENTRY));
+        NdisMoveMemory(&(entry->fragKey), &fragKey,
+                       sizeof(OVS_IPFRAG_KEY));
+        /* Update maximum recieving unit. */
+        entry->mru = payloadLen + ipHdrLen;
+        entry->recvdLen += fragStorage->len;
+        entry->head = entry->tail = fragStorage;
+        if (!flags) {
+            entry->totalLen = offset + payloadLen;
+        }
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        entry->expiration = currentTime + IPFRAG_ENTRY_TIMEOUT;
+
+        /* Init the sync-lock. */
+        entry->lockObj = NdisAllocateRWLock(switchContext->NdisFilterHandle);
+        if (entry->lockObj == NULL) {
+            OvsFreeMemoryWithTag(entry, OVS_MEMORY_TAG);
+            status = NDIS_STATUS_RESOURCES;
+            goto payload_copy_error;
+        }
+
+        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &htLockState, 0);
+        InsertHeadList(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK],
+                       &entry->link);
+
+        ipTotalEntries++;
+        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &htLockState);
+        return NDIS_STATUS_PENDING;
+    } else {
+        /* Acquire the entry lock. */
+        NdisAcquireRWLockWrite(entry->lockObj, &entryLockState, 0);
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        if (currentTime > entry->expiration) {
+            /* Expired entry. */
+            goto fragment_error;
+        }
+        POVS_FRAGMENT_LIST next = entry->head;
+        POVS_FRAGMENT_LIST prev = entry->tail;
+        if (prev != NULL || prev->offset < offset) {
+            next = NULL;
+            goto found;
+        }
+        prev = NULL;
+        for (next = entry->head; next != NULL; next = next->next) {
+            if (next->offset > fragStorage->offset) {
+                break;
+            }
+            prev = next;
+        }
+found:
+        /*Check for overlap. */
+        if (prev) {
+            /* i bytes overlap. */
+            int i = (prev->offset + prev->len) - fragStorage->offset;
+            if (i > 0) {
+                goto fragment_error;
+            }
+        }
+        if (next) {
+        /* i bytes overlap. */
+            int i = (fragStorage->offset + fragStorage->len) - next->offset;
+            if (i > 0) {
+                goto fragment_error;
+            }
+        }
+        /*Insert. */
+        if (prev) {
+            prev->next = fragStorage;
+            fragStorage->next = next;
+        } else {
+            fragStorage->next = next;
+            entry->head = fragStorage;
+        }
+        if (!next) {
+            entry->tail = fragStorage;
+        }
+
+        entry->mru = entry->mru > (payloadLen + ipHdrLen) ?
+                         entry->mru : (payloadLen + ipHdrLen);
+        if (entry->recvdLen + fragStorage->len > entry->recvdLen) {
+            entry->recvdLen += fragStorage->len;
+        } else {
+            /* Overflow, ignore the fragment.*/
+            goto fragment_error;
+        }
+        if (!flags) {
+            entry->totalLen = offset + payloadLen;
+        }
+        if (entry->recvdLen == entry->totalLen) {
+            /* Update mru of the forwarding context. */
+            *mru = entry->mru + ETH_HEADER_LENGTH;
+            status = OvsIpv4Reassemble(switchContext, curNbl, completionList,
+                                       sourcePort, entry, newNbl);
+        }
+        NdisReleaseRWLock(entry->lockObj, &entryLockState);
+        return status;
+    }
+fragment_error:
+    /* Release the entry lock. */
+    NdisReleaseRWLock(entry->lockObj, &entryLockState);
+payload_copy_error:
+    OvsFreeMemoryWithTag(fragStorage->pbuff, OVS_MEMORY_TAG);
+    OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
+    return status;
+}
+
+
+/*
+*----------------------------------------------------------------------------
+* OvsIpFragmentEntryCleaner
+*     Runs periodically and cleans up the Ip Fragment table
+*     Interval is selected as twice the entry timeout
+*----------------------------------------------------------------------------
+*/
+static VOID
+OvsIpFragmentEntryCleaner(PVOID data)
+{
+
+    POVS_IPFRAG_THREAD_CTX context = (POVS_IPFRAG_THREAD_CTX)data;
+    PLIST_ENTRY link, next;
+    POVS_IPFRAG_ENTRY entry;
+    BOOLEAN success = TRUE;
+
+    while (success) {
+        LOCK_STATE_EX lockState;
+        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+        if (context->exit) {
+            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+            break;
+        }
+
+        /* Set the timeout for the thread and cleanup. */
+        UINT64 currentTime, threadSleepTimeout;
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        threadSleepTimeout = currentTime + IPFRAG_CLEANUP_INTERVAL;
+        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries; i++) {
+            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
+                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+                if (entry->expiration < currentTime) {
+                    OvsIpFragmentEntryDelete(entry);
+                }
+            }
+        }
+
+        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+        KeWaitForSingleObject(&context->event, Executive, KernelMode,
+                              FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
+    }
+
+    PsTerminateSystemThread(STATUS_SUCCESS);
+}
+
+static VOID
+OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry)
+{
+    LOCK_STATE_EX lockState;
+    NdisAcquireRWLockWrite(entry->lockObj, &lockState, 0);
+    POVS_FRAGMENT_LIST head = entry->head;
+    POVS_FRAGMENT_LIST temp = NULL;
+    while (head) {
+        temp = head;
+        head = head->next;
+        OvsFreeMemoryWithTag(temp->pbuff, OVS_MEMORY_TAG);
+        OvsFreeMemoryWithTag(temp, OVS_MEMORY_TAG);
+    }
+    RemoveEntryList(&entry->link);
+    ipTotalEntries--;
+    NdisReleaseRWLock(entry->lockObj, &lockState);
+    NdisFreeRWLock(entry->lockObj);
+    OvsFreeMemoryWithTag(entry, OVS_MEMORY_TAG);
+}
+
+VOID
+OvsCleanupIpFragment(VOID)
+{
+    PLIST_ENTRY link, next;
+    POVS_IPFRAG_ENTRY entry;
+    LOCK_STATE_EX lockState;
+    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+    ipFragThreadCtx.exit = 1;
+    KeSetEvent(&ipFragThreadCtx.event, 0, FALSE);
+    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+    KeWaitForSingleObject(ipFragThreadCtx.threadObject, Executive,
+                          KernelMode, FALSE, NULL);
+    ObDereferenceObject(ipFragThreadCtx.threadObject);
+
+    if (OvsIpFragTable) {
+        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries; i++) {
+            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
+                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+                OvsIpFragmentEntryDelete(entry);
+                }
+            }
+        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
+        OvsIpFragTable = NULL;
+    }
+    NdisFreeRWLock(ovsIpFragmentHashLockObj);
+    ovsIpFragmentHashLockObj = NULL;
+ }
diff --git a/datapath-windows/ovsext/IpFragment.h b/datapath-windows/ovsext/IpFragment.h
new file mode 100644
index 0000000..8d87451
--- /dev/null
+++ b/datapath-windows/ovsext/IpFragment.h
@@ -0,0 +1,74 @@
+/*
+* Copyright (c) 2017 VMware, Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef __IPFRAGMENT_H_
+#define __IPFRAGMENT_H_ 1
+#include "PacketIO.h"
+
+typedef struct _OVS_FRAGMENT_LIST {
+    CHAR *pbuff;
+    UINT16 len;
+    UINT16 offset;
+    struct _OVS_FRAGMENT_LIST *next;
+} OVS_FRAGMENT_LIST, *POVS_FRAGMENT_LIST;
+
+typedef struct _OVS_IPFRAG_KEY {
+    UINT8 protocol;
+    UINT8 pad_1[3];             /* Align the structure to address boundaries.*/
+    UINT16 id;
+    UINT16 pad_2;               /* Align the structure to address boundaries.*/
+    UINT32 sAddr;
+    UINT32 dAddr;
+    ovs_be64 tunnelId;
+} OVS_IPFRAG_KEY, *POVS_IPFRAG_KEY;
+
+typedef struct _OVS_IPFRAG_ENTRY {
+    PNDIS_RW_LOCK_EX lockObj;       /* To access the entry. */
+    UINT16 totalLen;
+    UINT16 recvdLen;
+    UINT16 mru;
+    UINT64 expiration;
+    OVS_IPFRAG_KEY fragKey;
+    POVS_FRAGMENT_LIST head;
+    POVS_FRAGMENT_LIST tail;
+    LIST_ENTRY link;
+} OVS_IPFRAG_ENTRY, *POVS_IPFRAG_ENTRY;
+
+typedef struct _OVS_IPFRAG_THREAD_CTX {
+    KEVENT event;
+    PVOID threadObject;
+    UINT32 exit;
+} OVS_IPFRAG_THREAD_CTX, *POVS_IPFRAG_THREAD_CTX;
+
+#define IP_FRAG_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define IP_FRAG_HASH_TABLE_MASK (IP_FRAG_HASH_TABLE_SIZE - 1)
+/*30s -Sufficient time to recieve all fragments.*/
+#define IPFRAG_ENTRY_TIMEOUT 300000000LL
+#define IPFRAG_CLEANUP_INTERVAL IPFRAG_ENTRY_TIMEOUT * 2 /*1m.*/
+PNET_BUFFER_LIST OvsIpv4FragmentNBL(PVOID ovsContext,
+                                    PNET_BUFFER_LIST nbl,
+                                    UINT16 mru);
+
+NDIS_STATUS OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
+                                   PNET_BUFFER_LIST *curNbl,
+                                   OvsCompletionList *completionList,
+                                   NDIS_SWITCH_PORT_ID sourcePort,
+                                   UINT16 *mru,
+                                   ovs_be64 tunnelId,
+                                   PNET_BUFFER_LIST *newNbl);
+NDIS_STATUS OvsInitIpFragment(POVS_SWITCH_CONTEXT context);
+VOID OvsCleanupIpFragment(VOID);
+#endif /* __IPFRAGMENT_H_ */
diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c
index 138a656..558e3af 100644
--- a/datapath-windows/ovsext/Switch.c
+++ b/datapath-windows/ovsext/Switch.c
@@ -27,6 +27,7 @@
 #include "Flow.h"
 #include "IpHelper.h"
 #include "Oid.h"
+#include "IpFragment.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
@@ -229,6 +230,12 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
     if (status != STATUS_SUCCESS) {
         OvsUninitSwitchContext(switchContext);
         OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
+    }
+
+    status = OvsInitIpFragment(switchContext);
+    if (status != STATUS_SUCCESS) {
+        OvsUninitSwitchContext(switchContext);
+        OVS_LOG_ERROR("Exit: Failed to initialize Ip Fragment");
         goto create_switch_done;
     }
 
@@ -265,6 +272,8 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
     OvsCleanupSttDefragmentation();
     OvsCleanupConntrack();
     OvsCleanupCtRelated();
+    OvsCleanupIpFragment();
+
     /* This completes the cleanup, and a new attach can be handled now. */
 
     OVS_LOG_TRACE("Exit: OvsDetach Successfully");
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 44aea19..ecfc0b8 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -112,6 +112,7 @@
     <ClInclude Include="Flow.h" />
     <ClInclude Include="Geneve.h" />
     <ClInclude Include="Gre.h" />
+    <ClInclude Include="IpFragment.h" />
     <ClInclude Include="IpHelper.h" />
     <ClInclude Include="Jhash.h" />
     <ClInclude Include="Mpls.h" />
@@ -268,6 +269,7 @@
     <ClCompile Include="Flow.c" />
     <ClCompile Include="Geneve.c" />
     <ClCompile Include="Gre.c" />
+    <ClCompile Include="IpFragment.c" />
     <ClCompile Include="IpHelper.c" />
     <ClCompile Include="Jhash.c" />
     <ClCompile Include="Netlink/Netlink.c" />
-- 
2.9.3.windows.1



More information about the dev mailing list