[ovs-dev] [PATCH v3 1/5] datapath-windows: Added a new file to support Ipv4 fragments.

Anand Kumar kumaranand at vmware.com
Fri Jan 27 22:13:26 UTC 2017


This patch adds functionalities to support IPv4 fragments, which will be
used by Conntrack module.

Added a new structure to hold the Ipv4 fragments and a hash table to
hold Ipv4 datagram entries. Also added a clean up thread that runs
every minute to delete the expired IPv4 datagram entries.

The individual fragments are ignored by the conntrack. Once all the
fragments are recieved, a new NBL is created out of the reassembled
fragments and conntrack executes actions on the new NBL.

Created new APIs OvsProcessIpv4Fragment() to process individual fragments,
OvsIpv4Reassemble() to reassemble Ipv4 fragments.

v2->v3:
	- Use spinlock instead of RW lock
	- Trigger cleanup event after reassembling the fragments.
v1->v2: No change

Signed-off-by: Anand Kumar <kumaranand at vmware.com>
---
 datapath-windows/automake.mk           |   2 +
 datapath-windows/ovsext/Debug.h        |   3 +-
 datapath-windows/ovsext/IpFragment.c   | 503 +++++++++++++++++++++++++++++++++
 datapath-windows/ovsext/IpFragment.h   |  74 +++++
 datapath-windows/ovsext/Switch.c       |   9 +
 datapath-windows/ovsext/ovsext.vcxproj |   2 +
 6 files changed, 592 insertions(+), 1 deletion(-)
 create mode 100644 datapath-windows/ovsext/IpFragment.c
 create mode 100644 datapath-windows/ovsext/IpFragment.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 53983ae..4f7b55a 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
 	datapath-windows/ovsext/Flow.h \
 	datapath-windows/ovsext/Gre.h \
 	datapath-windows/ovsext/Gre.c \
+	datapath-windows/ovsext/IpFragment.c \
+	datapath-windows/ovsext/IpFragment.h \
 	datapath-windows/ovsext/IpHelper.c \
 	datapath-windows/ovsext/IpHelper.h \
 	datapath-windows/ovsext/Jhash.c \
diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h
index cae6ac9..6de1812 100644
--- a/datapath-windows/ovsext/Debug.h
+++ b/datapath-windows/ovsext/Debug.h
@@ -42,8 +42,9 @@
 #define OVS_DBG_STT      BIT32(22)
 #define OVS_DBG_CONTRK   BIT32(23)
 #define OVS_DBG_GENEVE   BIT32(24)
+#define OVS_DBG_IPFRAG   BIT32(25)
 
-#define OVS_DBG_LAST     24  /* Set this to the last defined module number. */
+#define OVS_DBG_LAST     25  /* Set this to the last defined module number. */
 /* Please add above OVS_DBG_LAST. */
 
 #define OVS_DBG_ERROR    DPFLTR_ERROR_LEVEL
diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c
new file mode 100644
index 0000000..95fc754
--- /dev/null
+++ b/datapath-windows/ovsext/IpFragment.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2017 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conntrack.h"
+#include "Debug.h"
+#include "IpFragment.h"
+#include "Jhash.h"
+#include "Offload.h"
+#include "PacketParser.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_IPFRAG
+
+/* Function declarations */
+static VOID OvsIpFragmentEntryCleaner(PVOID data);
+static VOID OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry);
+
+/* Global and static variables */
+static OVS_IPFRAG_THREAD_CTX ipFragThreadCtx;
+static PNDIS_RW_LOCK_EX ovsIpFragmentHashLockObj;
+static UINT64 ipTotalEntries;
+static PLIST_ENTRY OvsIpFragTable;
+
+NDIS_STATUS
+OvsInitIpFragment(POVS_SWITCH_CONTEXT context)
+{
+
+    NDIS_STATUS status;
+    HANDLE threadHandle = NULL;
+
+    /* Init the sync-lock */
+    ovsIpFragmentHashLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
+    if (ovsIpFragmentHashLockObj == NULL) {
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    /* Init the Hash Buffer */
+    OvsIpFragTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
+                                              * IP_FRAG_HASH_TABLE_SIZE,
+                                              OVS_MEMORY_TAG);
+    if (OvsIpFragTable == NULL) {
+        NdisFreeRWLock(ovsIpFragmentHashLockObj);
+        ovsIpFragmentHashLockObj = NULL;
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE; i++) {
+        InitializeListHead(&OvsIpFragTable[i]);
+    }
+
+    /* Init Cleaner Thread */
+    KeInitializeEvent(&ipFragThreadCtx.event, NotificationEvent, FALSE);
+    status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
+                                  NULL, OvsIpFragmentEntryCleaner,
+                                  &ipFragThreadCtx);
+
+    if (status != STATUS_SUCCESS) {
+        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
+        OvsIpFragTable = NULL;
+        NdisFreeRWLock(ovsIpFragmentHashLockObj);
+        ovsIpFragmentHashLockObj = NULL;
+        return status;
+    }
+
+    ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
+                              &ipFragThreadCtx.threadObject, NULL);
+    ZwClose(threadHandle);
+    threadHandle = NULL;
+    return STATUS_SUCCESS;
+}
+
+static __inline UINT32
+OvsGetIPFragmentHash(POVS_IPFRAG_KEY fragKey)
+{
+    UINT32 arr[6];
+    arr[0] = (UINT32)fragKey->protocol;
+    arr[1] = (UINT32)fragKey->id;
+    arr[2] = (UINT32)fragKey->sAddr;
+    arr[3] = (UINT32)fragKey->dAddr;
+    arr[4] = (UINT32)((fragKey->tunnelId & 0xFFFFFFFF00000000LL) >> 32);
+    arr[5] = (UINT32)(fragKey->tunnelId & 0xFFFFFFFFLL);
+    return OvsJhashWords(arr, 6, OVS_HASH_BASIS);
+}
+
+static __inline POVS_IPFRAG_ENTRY
+OvsLookupIPFrag(POVS_IPFRAG_KEY fragKey, UINT32 hash)
+{
+    POVS_IPFRAG_ENTRY entry;
+    PLIST_ENTRY link;
+    LOCK_STATE_EX lockState;
+
+    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+    LIST_FORALL(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK], link) {
+        entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+        if (entry->fragKey.dAddr == fragKey->dAddr &&
+            entry->fragKey.sAddr == fragKey->sAddr &&
+            entry->fragKey.id == fragKey->id &&
+            entry->fragKey.protocol == fragKey->protocol &&
+            entry->fragKey.tunnelId == fragKey->tunnelId) {
+            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+            return entry;
+        }
+    }
+    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+    return NULL;
+}
+
+/*
+*----------------------------------------------------------------------------
+* OvsIpv4Reassemble
+*     Reassemble the ipv4 fragments and return newNbl on success.
+*     Should be called after acquiring the lockObj for the entry.
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsIpv4Reassemble(POVS_SWITCH_CONTEXT switchContext,
+                  PNET_BUFFER_LIST *curNbl,
+                  OvsCompletionList *completionList,
+                  NDIS_SWITCH_PORT_ID sourcePort,
+                  POVS_IPFRAG_ENTRY entry,
+                  PNET_BUFFER_LIST *newNbl)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    NDIS_STRING filterReason;
+    POVS_BUFFER_CONTEXT ctx;
+    PNET_BUFFER curNb;
+    EthHdr *eth;
+    IPHdr *ipHdr, *newIpHdr;
+    CHAR *ethBuf[sizeof(EthHdr)];
+    CHAR *packetBuf;
+    UINT16 ipHdrLen, packetLen, packetHeader;
+    POVS_FRAGMENT_LIST head = NULL;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+
+    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
+                                     (PVOID)&ethBuf, 1, 0);
+    if (eth == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
+    if (ipHdr == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
+    packetLen = ETH_HEADER_LENGTH + ipHdrLen + entry->totalLen;
+    packetBuf = (CHAR*)OvsAllocateMemoryWithTag(packetLen,
+                                                OVS_MEMORY_TAG);
+    if (packetBuf == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate packetBuf");
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    /* copy Ethernet header */
+    NdisMoveMemory(packetBuf, eth, ETH_HEADER_LENGTH);
+    /* copy ipv4 header to packet buff */
+    NdisMoveMemory(packetBuf + ETH_HEADER_LENGTH, ipHdr, ipHdrLen);
+
+    /* update new ip header */
+    newIpHdr = (IPHdr *)(packetBuf + ETH_HEADER_LENGTH);
+    newIpHdr->frag_off = 0;
+    newIpHdr->tot_len = htons(packetLen - ETH_HEADER_LENGTH);
+    newIpHdr->check = 0;
+    newIpHdr->check = IPChecksum((UINT8 *)packetBuf + ETH_HEADER_LENGTH,
+                                 ipHdrLen, 0);
+    packetHeader = ETH_HEADER_LENGTH + ipHdrLen;
+    head = entry->head;
+    while (head) {
+        ASSERT((packetHeader + head->offset) <= packetLen);
+        NdisMoveMemory(packetBuf + packetHeader + head->offset,
+                       head->pbuff, head->len);
+        head = head->next;
+    }
+    /* Create new nbl from the flat buffer */
+    *newNbl = OvsAllocateNBLFromBuffer(switchContext, packetBuf, packetLen);
+    if (*newNbl == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate newNbl");
+        status = NDIS_STATUS_RESOURCES;
+    }
+
+    OvsFreeMemoryWithTag(packetBuf, OVS_MEMORY_TAG);
+    /* Timeout the entry so that clean up thread deletes it .*/
+    entry->expiration -= IPFRAG_ENTRY_TIMEOUT;
+
+    /* Complete the fragment NBL */
+    ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(*curNbl);
+    if (ctx->flags & OVS_BUFFER_NEED_COMPLETE) {
+        RtlInitUnicodeString(&filterReason, L"Complete last fragment");
+        OvsAddPktCompletionList(completionList, TRUE, sourcePort, *curNbl, 1,
+                                &filterReason);
+    } else {
+        OvsCompleteNBL(switchContext, *curNbl, TRUE);
+    }
+    *curNbl = *newNbl;
+    return status;
+}
+/*
+*----------------------------------------------------------------------------
+* OvsProcessIpv4Fragment
+*     Reassemble the fragments once all the fragments are recieved and
+*     return NDIS_STATUS_PENDING for the pending fragments
+*     XXX - Instead of copying NBls, Keep the NBLs in limbo state.
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
+                       PNET_BUFFER_LIST *curNbl,
+                       OvsCompletionList *completionList,
+                       NDIS_SWITCH_PORT_ID sourcePort,
+                       UINT16 *mru,
+                       ovs_be64 tunnelId,
+                       PNET_BUFFER_LIST *newNbl)
+{
+    NDIS_STATUS status = NDIS_STATUS_PENDING;
+    PNET_BUFFER curNb;
+    CHAR *ethBuf[sizeof(EthHdr)];
+    UINT16 offset, flags;
+    UINT16 payloadLen, ipHdrLen;
+    UINT32 hash;
+    UINT64 currentTime;
+    EthHdr *eth;
+    IPHdr *ipHdr;
+    OVS_IPFRAG_KEY fragKey;
+    POVS_IPFRAG_ENTRY entry;
+    POVS_FRAGMENT_LIST fragStorage;
+    LOCK_STATE_EX htLockState;
+
+    curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
+    ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+
+    eth = (EthHdr*)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
+                                     (PVOID)&ethBuf, 1, 0);
+    if (eth == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+
+    ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
+    if (ipHdr == NULL) {
+        return NDIS_STATUS_INVALID_PACKET;
+    }
+    ipHdrLen = (UINT16)(ipHdr->ihl * 4);
+    payloadLen = ntohs(ipHdr->tot_len) - ipHdrLen;
+    offset = ntohs(ipHdr->frag_off) & IP_OFFSET;
+    offset <<= 3;
+    flags = ntohs(ipHdr->frag_off) & IP_MF;
+
+    /*Copy fragment specific fields. */
+    fragKey.protocol = ipHdr->protocol;
+    fragKey.id = ipHdr->id;
+    fragKey.sAddr = ipHdr->saddr;
+    fragKey.dAddr = ipHdr->daddr;
+    fragKey.tunnelId = tunnelId;
+    /* Padding. */
+    NdisZeroMemory(&fragKey.pad_1, 3);
+    fragKey.pad_2 = 0;
+
+    fragStorage = (POVS_FRAGMENT_LIST )
+        OvsAllocateMemoryWithTag(sizeof(OVS_FRAGMENT_LIST), OVS_MEMORY_TAG);
+    if (fragStorage == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate fragStorage");
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    fragStorage->pbuff = (CHAR *)OvsAllocateMemoryWithTag(payloadLen,
+                                                          OVS_MEMORY_TAG);
+    if (fragStorage->pbuff == NULL) {
+        OVS_LOG_ERROR("Insufficient resources, failed to allocate fragStorage");
+        OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
+        return NDIS_STATUS_RESOURCES;
+    }
+
+    /* Copy payload from nbl to fragment storage. */
+    if (OvsGetPacketBytes(*curNbl, payloadLen, ETH_HEADER_LENGTH + ipHdrLen,
+                          fragStorage->pbuff) == NULL) {
+        status = NDIS_STATUS_RESOURCES;
+        goto payload_copy_error;
+    }
+    fragStorage->len = payloadLen;
+    fragStorage->offset = offset;
+    fragStorage->next = NULL;
+    hash = OvsGetIPFragmentHash(&fragKey);
+    entry = OvsLookupIPFrag(&fragKey, hash);
+    if (entry == NULL) {
+        entry = (POVS_IPFRAG_ENTRY)
+            OvsAllocateMemoryWithTag(sizeof(OVS_IPFRAG_ENTRY),
+                                     OVS_MEMORY_TAG);
+        if (entry == NULL) {
+            status = NDIS_STATUS_RESOURCES;
+            goto payload_copy_error;
+        }
+        /* Copy the fragmeny key. */
+        NdisZeroMemory(entry, sizeof(OVS_IPFRAG_ENTRY));
+        NdisMoveMemory(&(entry->fragKey), &fragKey,
+                       sizeof(OVS_IPFRAG_KEY));
+        /* Init MRU. */
+        entry->mru = payloadLen + ipHdrLen;
+        entry->recvdLen += fragStorage->len;
+        entry->head = entry->tail = fragStorage;
+        if (!flags) {
+            entry->totalLen = offset + payloadLen;
+        }
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        entry->expiration = currentTime + IPFRAG_ENTRY_TIMEOUT;
+
+        /* Init the sync-lock. */
+        NdisAllocateSpinLock(&(entry->lockObj));
+        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &htLockState, 0);
+        InsertHeadList(&OvsIpFragTable[hash & IP_FRAG_HASH_TABLE_MASK],
+                       &entry->link);
+
+        ipTotalEntries++;
+        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &htLockState);
+        return NDIS_STATUS_PENDING;
+    } else {
+        /* Acquire the entry lock. */
+        NdisAcquireSpinLock(&(entry->lockObj));
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        if (currentTime > entry->expiration) {
+            /* Expired entry. */
+            goto fragment_error;
+        }
+        POVS_FRAGMENT_LIST next = entry->head;
+        POVS_FRAGMENT_LIST prev = entry->tail;
+        if (prev != NULL || prev->offset < offset) {
+            next = NULL;
+            goto found;
+        }
+        prev = NULL;
+        for (next = entry->head; next != NULL; next = next->next) {
+            if (next->offset > fragStorage->offset) {
+                break;
+            }
+            prev = next;
+        }
+found:
+        /*Check for overlap. */
+        if (prev) {
+            /* i bytes overlap. */
+            int i = (prev->offset + prev->len) - fragStorage->offset;
+            if (i > 0) {
+                goto fragment_error;
+            }
+        }
+        if (next) {
+        /* i bytes overlap. */
+            int i = (fragStorage->offset + fragStorage->len) - next->offset;
+            if (i > 0) {
+                goto fragment_error;
+            }
+        }
+
+        if (entry->recvdLen + fragStorage->len > entry->recvdLen) {
+            entry->recvdLen += fragStorage->len;
+        } else {
+            /* Overflow, ignore the fragment.*/
+            goto fragment_error;
+        }
+
+        /*Insert. */
+        if (prev) {
+            prev->next = fragStorage;
+            fragStorage->next = next;
+        } else {
+            fragStorage->next = next;
+            entry->head = fragStorage;
+        }
+        if (!next) {
+            entry->tail = fragStorage;
+        }
+
+        /*Update Maximum reciecved Unit */
+        entry->mru = entry->mru > (payloadLen + ipHdrLen) ?
+                         entry->mru : (payloadLen + ipHdrLen);
+        if (!flags) {
+            entry->totalLen = offset + payloadLen;
+        }
+        if (entry->recvdLen == entry->totalLen) {
+            /* Update mru of the forwarding context. */
+            *mru = entry->mru + ETH_HEADER_LENGTH;
+            status = OvsIpv4Reassemble(switchContext, curNbl, completionList,
+                                       sourcePort, entry, newNbl);
+        }
+        NdisReleaseSpinLock(&(entry->lockObj));
+        KeSetEvent(&ipFragThreadCtx.event, 0, FALSE);
+        return status;
+    }
+fragment_error:
+    /* Release the entry lock. */
+    NdisReleaseSpinLock(&(entry->lockObj));
+payload_copy_error:
+    OvsFreeMemoryWithTag(fragStorage->pbuff, OVS_MEMORY_TAG);
+    OvsFreeMemoryWithTag(fragStorage, OVS_MEMORY_TAG);
+    return status;
+}
+
+
+/*
+*----------------------------------------------------------------------------
+* OvsIpFragmentEntryCleaner
+*     Runs periodically and cleans up the Ip Fragment table
+*     Interval is selected as twice the entry timeout
+*----------------------------------------------------------------------------
+*/
+static VOID
+OvsIpFragmentEntryCleaner(PVOID data)
+{
+
+    POVS_IPFRAG_THREAD_CTX context = (POVS_IPFRAG_THREAD_CTX)data;
+    PLIST_ENTRY link, next;
+    POVS_IPFRAG_ENTRY entry;
+    BOOLEAN success = TRUE;
+
+    while (success) {
+        LOCK_STATE_EX lockState;
+        NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+        if (context->exit) {
+            NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+            break;
+        }
+
+        /* Set the timeout for the thread and cleanup. */
+        UINT64 currentTime, threadSleepTimeout;
+        NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+        threadSleepTimeout = currentTime + IPFRAG_CLEANUP_INTERVAL;
+        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries; i++) {
+            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
+                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+                if (entry->expiration < currentTime) {
+                    OvsIpFragmentEntryDelete(entry);
+                }
+            }
+        }
+
+        NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+        KeWaitForSingleObject(&context->event, Executive, KernelMode,
+                              FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
+    }
+
+    PsTerminateSystemThread(STATUS_SUCCESS);
+}
+
+static VOID
+OvsIpFragmentEntryDelete(POVS_IPFRAG_ENTRY entry)
+{
+    NdisAcquireSpinLock(&(entry->lockObj));
+    POVS_FRAGMENT_LIST head = entry->head;
+    POVS_FRAGMENT_LIST temp = NULL;
+    while (head) {
+        temp = head;
+        head = head->next;
+        OvsFreeMemoryWithTag(temp->pbuff, OVS_MEMORY_TAG);
+        OvsFreeMemoryWithTag(temp, OVS_MEMORY_TAG);
+    }
+    RemoveEntryList(&entry->link);
+    ipTotalEntries--;
+    NdisReleaseSpinLock(&(entry->lockObj));
+    NdisFreeSpinLock(&(entry->lockObj));
+    OvsFreeMemoryWithTag(entry, OVS_MEMORY_TAG);
+}
+
+VOID
+OvsCleanupIpFragment(VOID)
+{
+    PLIST_ENTRY link, next;
+    POVS_IPFRAG_ENTRY entry;
+    LOCK_STATE_EX lockState;
+    NdisAcquireRWLockWrite(ovsIpFragmentHashLockObj, &lockState, 0);
+    ipFragThreadCtx.exit = 1;
+    KeSetEvent(&ipFragThreadCtx.event, 0, FALSE);
+    NdisReleaseRWLock(ovsIpFragmentHashLockObj, &lockState);
+    KeWaitForSingleObject(ipFragThreadCtx.threadObject, Executive,
+                          KernelMode, FALSE, NULL);
+    ObDereferenceObject(ipFragThreadCtx.threadObject);
+
+    if (OvsIpFragTable) {
+        for (int i = 0; i < IP_FRAG_HASH_TABLE_SIZE && ipTotalEntries; i++) {
+            LIST_FORALL_SAFE(&OvsIpFragTable[i], link, next) {
+                entry = CONTAINING_RECORD(link, OVS_IPFRAG_ENTRY, link);
+                OvsIpFragmentEntryDelete(entry);
+                }
+            }
+        OvsFreeMemoryWithTag(OvsIpFragTable, OVS_MEMORY_TAG);
+        OvsIpFragTable = NULL;
+    }
+    NdisFreeRWLock(ovsIpFragmentHashLockObj);
+    ovsIpFragmentHashLockObj = NULL;
+ }
diff --git a/datapath-windows/ovsext/IpFragment.h b/datapath-windows/ovsext/IpFragment.h
new file mode 100644
index 0000000..9e3e607
--- /dev/null
+++ b/datapath-windows/ovsext/IpFragment.h
@@ -0,0 +1,74 @@
+/*
+* Copyright (c) 2017 VMware, Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef __IPFRAGMENT_H_
+#define __IPFRAGMENT_H_ 1
+#include "PacketIO.h"
+
+typedef struct _OVS_FRAGMENT_LIST {
+    CHAR *pbuff;
+    UINT16 len;
+    UINT16 offset;
+    struct _OVS_FRAGMENT_LIST *next;
+} OVS_FRAGMENT_LIST, *POVS_FRAGMENT_LIST;
+
+typedef struct _OVS_IPFRAG_KEY {
+    UINT8 protocol;
+    UINT8 pad_1[3];             /* Align the structure to address boundaries.*/
+    UINT16 id;
+    UINT16 pad_2;               /* Align the structure to address boundaries.*/
+    UINT32 sAddr;
+    UINT32 dAddr;
+    ovs_be64 tunnelId;
+} OVS_IPFRAG_KEY, *POVS_IPFRAG_KEY;
+
+typedef struct _OVS_IPFRAG_ENTRY {
+    NDIS_SPIN_LOCK lockObj;       /* To access the entry. */
+    UINT16 totalLen;
+    UINT16 recvdLen;
+    UINT16 mru;
+    UINT64 expiration;
+    OVS_IPFRAG_KEY fragKey;
+    POVS_FRAGMENT_LIST head;
+    POVS_FRAGMENT_LIST tail;
+    LIST_ENTRY link;
+} OVS_IPFRAG_ENTRY, *POVS_IPFRAG_ENTRY;
+
+typedef struct _OVS_IPFRAG_THREAD_CTX {
+    KEVENT event;
+    PVOID threadObject;
+    UINT32 exit;
+} OVS_IPFRAG_THREAD_CTX, *POVS_IPFRAG_THREAD_CTX;
+
+#define IP_FRAG_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define IP_FRAG_HASH_TABLE_MASK (IP_FRAG_HASH_TABLE_SIZE - 1)
+/*30s -Sufficient time to recieve all fragments.*/
+#define IPFRAG_ENTRY_TIMEOUT 300000000LL
+#define IPFRAG_CLEANUP_INTERVAL IPFRAG_ENTRY_TIMEOUT * 2 /*1m.*/
+PNET_BUFFER_LIST OvsIpv4FragmentNBL(PVOID ovsContext,
+                                    PNET_BUFFER_LIST nbl,
+                                    UINT16 mru);
+
+NDIS_STATUS OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext,
+                                   PNET_BUFFER_LIST *curNbl,
+                                   OvsCompletionList *completionList,
+                                   NDIS_SWITCH_PORT_ID sourcePort,
+                                   UINT16 *mru,
+                                   ovs_be64 tunnelId,
+                                   PNET_BUFFER_LIST *newNbl);
+NDIS_STATUS OvsInitIpFragment(POVS_SWITCH_CONTEXT context);
+VOID OvsCleanupIpFragment(VOID);
+#endif /* __IPFRAGMENT_H_ */
diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c
index 138a656..558e3af 100644
--- a/datapath-windows/ovsext/Switch.c
+++ b/datapath-windows/ovsext/Switch.c
@@ -27,6 +27,7 @@
 #include "Flow.h"
 #include "IpHelper.h"
 #include "Oid.h"
+#include "IpFragment.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
@@ -229,6 +230,12 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
     if (status != STATUS_SUCCESS) {
         OvsUninitSwitchContext(switchContext);
         OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
+    }
+
+    status = OvsInitIpFragment(switchContext);
+    if (status != STATUS_SUCCESS) {
+        OvsUninitSwitchContext(switchContext);
+        OVS_LOG_ERROR("Exit: Failed to initialize Ip Fragment");
         goto create_switch_done;
     }
 
@@ -265,6 +272,8 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
     OvsCleanupSttDefragmentation();
     OvsCleanupConntrack();
     OvsCleanupCtRelated();
+    OvsCleanupIpFragment();
+
     /* This completes the cleanup, and a new attach can be handled now. */
 
     OVS_LOG_TRACE("Exit: OvsDetach Successfully");
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 44aea19..ecfc0b8 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -112,6 +112,7 @@
     <ClInclude Include="Flow.h" />
     <ClInclude Include="Geneve.h" />
     <ClInclude Include="Gre.h" />
+    <ClInclude Include="IpFragment.h" />
     <ClInclude Include="IpHelper.h" />
     <ClInclude Include="Jhash.h" />
     <ClInclude Include="Mpls.h" />
@@ -268,6 +269,7 @@
     <ClCompile Include="Flow.c" />
     <ClCompile Include="Geneve.c" />
     <ClCompile Include="Gre.c" />
+    <ClCompile Include="IpFragment.c" />
     <ClCompile Include="IpHelper.c" />
     <ClCompile Include="Jhash.c" />
     <ClCompile Include="Netlink/Netlink.c" />
-- 
2.9.3.windows.1



More information about the dev mailing list